mkl-devel-dpcpp 2025.0.1__py2.py3-none-win_amd64.whl → 2025.2.0__py2.py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mkl-devel-dpcpp might be problematic. Click here for more details.

Files changed (94) hide show
  1. {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl.lib +0 -0
  2. {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_blas_dll.lib +0 -0
  3. mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
  4. {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_dll.lib +0 -0
  5. mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
  6. {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_rng_dll.lib +0 -0
  7. mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
  8. {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/METADATA +4 -3
  9. mkl_devel_dpcpp-2025.2.0.dist-info/RECORD +15 -0
  10. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/bfloat16.hpp +0 -26
  11. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/buffer.hpp +0 -42
  12. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/buffer_decls.hpp +0 -880
  13. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/types.hpp +0 -60
  14. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/usm.hpp +0 -42
  15. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas/usm_decls.hpp +0 -1240
  16. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/blas.hpp +0 -33
  17. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/dft.hpp +0 -253
  18. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/dfti.hpp +0 -22
  19. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/exceptions.hpp +0 -110
  20. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/interpolate.hpp +0 -67
  21. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/spline_and_data_params.hpp +0 -68
  22. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting/splines.hpp +0 -177
  23. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/experimental/data_fitting.hpp +0 -22
  24. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/export.hpp +0 -25
  25. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/concepts.hpp +0 -55
  26. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/exceptions.hpp +0 -75
  27. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/lapack.hpp +0 -1095
  28. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack/scratchpad.hpp +0 -106
  29. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/lapack.hpp +0 -23
  30. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/detail/engine_base.hpp +0 -48
  31. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/bernoulli_impl.hpp +0 -89
  32. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/beta_impl.hpp +0 -464
  33. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/bits_impl.hpp +0 -71
  34. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/distribution_base.hpp +0 -81
  35. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/engine_base.hpp +0 -43
  36. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/engine_helpers_base.hpp +0 -54
  37. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/exponential_impl.hpp +0 -116
  38. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/gamma_impl.hpp +0 -285
  39. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/gaussian_impl.hpp +0 -270
  40. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/lognormal_impl.hpp +0 -105
  41. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_helpers_impl.hpp +0 -117
  42. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_impl.hpp +0 -223
  43. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_helpers_impl.hpp +0 -118
  44. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_impl.hpp +0 -266
  45. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_helpers_impl.hpp +0 -125
  46. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp +0 -385
  47. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp +0 -3668
  48. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_helpers_impl.hpp +0 -141
  49. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp +0 -552
  50. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/poisson_impl.hpp +0 -355
  51. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/types.hpp +0 -58
  52. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_bits_impl.hpp +0 -51
  53. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_impl.hpp +0 -289
  54. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/detail/vm_wrappers.hpp +0 -183
  55. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/distributions.hpp +0 -637
  56. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/engine_helpers.hpp +0 -116
  57. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/engines.hpp +0 -187
  58. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/functions.hpp +0 -59
  59. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device/types.hpp +0 -74
  60. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/device.hpp +0 -29
  61. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/distributions.hpp +0 -1913
  62. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/engines.hpp +0 -788
  63. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng/functions.hpp +0 -163
  64. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/rng.hpp +0 -22
  65. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_auxiliary.hpp +0 -111
  66. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_operations.hpp +0 -446
  67. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas/sparse_structures.hpp +0 -193
  68. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/spblas.hpp +0 -32
  69. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/stats.hpp +0 -356
  70. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/types.hpp +0 -321
  71. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/buffer.hpp +0 -3529
  72. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/decls.hpp +0 -280
  73. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/decls.hpp +0 -81
  74. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/dispatch.hpp +0 -1059
  75. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/ep.hpp +0 -861
  76. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/ha.hpp +0 -860
  77. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/la.hpp +0 -860
  78. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/rts.hpp +0 -4608
  79. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/detail/scalar.hpp +0 -8963
  80. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/device/vm.hpp +0 -460
  81. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/span.hpp +0 -3813
  82. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm/usm.hpp +0 -3581
  83. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl/vm.hpp +0 -30
  84. mkl_devel_dpcpp-2025.0.1.data/data/Library/include/oneapi/mkl.hpp +0 -34
  85. mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
  86. mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
  87. mkl_devel_dpcpp-2025.0.1.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
  88. mkl_devel_dpcpp-2025.0.1.dist-info/RECORD +0 -90
  89. {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_data_fitting_dll.lib +0 -0
  90. {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_stats_dll.lib +0 -0
  91. {mkl_devel_dpcpp-2025.0.1.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_vm_dll.lib +0 -0
  92. {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/LICENSE.txt +0 -0
  93. {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/WHEEL +0 -0
  94. {mkl_devel_dpcpp-2025.0.1.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/top_level.txt +0 -0
@@ -1,141 +0,0 @@
1
- /*******************************************************************************
2
- * Copyright 2023 Intel Corporation
3
- *
4
- * Licensed under the Apache License, Version 2.0 (the "License");
5
- * you may not use this file except in compliance with the License.
6
- * You may obtain a copy of the License at
7
- *
8
- * http://www.apache.org/licenses/LICENSE-2.0
9
- *
10
- * Unless required by applicable law or agreed to in writing,
11
- * software distributed under the License is distributed on an "AS IS" BASIS,
12
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- * See the License for the specific language governing permissions
14
- * and limitations under the License.
15
- *
16
- *
17
- * SPDX-License-Identifier: Apache-2.0
18
- *******************************************************************************/
19
-
20
- #ifndef _MKL_RNG_DEVICE_PHILOX4X32X10_HELPERS_IMPL_HPP_
21
- #define _MKL_RNG_DEVICE_PHILOX4X32X10_HELPERS_IMPL_HPP_
22
-
23
- #include "oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp"
24
-
25
- namespace oneapi::mkl::rng::device::detail {
26
- namespace philox4x32x10_impl {
27
-
28
- template <std::int32_t VecSize>
29
- static inline void init(
30
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state, std::size_t id,
31
- const sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write>& accessor) {
32
- std::size_t num_elements_acc =
33
- sizeof(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>) /
34
- sizeof(std::uint32_t);
35
- state.key[0] = accessor[id * num_elements_acc];
36
- state.key[1] = accessor[id * num_elements_acc + 1];
37
- state.counter[0] = accessor[id * num_elements_acc + 2];
38
- state.counter[1] = accessor[id * num_elements_acc + 3];
39
- state.counter[2] = accessor[id * num_elements_acc + 4];
40
- state.counter[3] = accessor[id * num_elements_acc + 5];
41
-
42
- state.part = accessor[id * num_elements_acc + 6];
43
-
44
- state.result[0] = accessor[id * num_elements_acc + 7];
45
- state.result[1] = accessor[id * num_elements_acc + 8];
46
- state.result[2] = accessor[id * num_elements_acc + 9];
47
- state.result[3] = accessor[id * num_elements_acc + 10];
48
- }
49
-
50
- template <std::int32_t VecSize>
51
- static inline void store(
52
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state, std::size_t id,
53
- const sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write>& accessor) {
54
- std::size_t num_elements_acc =
55
- sizeof(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>) /
56
- sizeof(std::uint32_t);
57
- accessor[id * num_elements_acc] = state.key[0];
58
- accessor[id * num_elements_acc + 1] = state.key[1];
59
- accessor[id * num_elements_acc + 2] = state.counter[0];
60
- accessor[id * num_elements_acc + 3] = state.counter[1];
61
- accessor[id * num_elements_acc + 4] = state.counter[2];
62
- accessor[id * num_elements_acc + 5] = state.counter[3];
63
- accessor[id * num_elements_acc + 6] = state.part;
64
- accessor[id * num_elements_acc + 7] = state.result[0];
65
- accessor[id * num_elements_acc + 8] = state.result[1];
66
- accessor[id * num_elements_acc + 9] = state.result[2];
67
- accessor[id * num_elements_acc + 10] = state.result[3];
68
- }
69
-
70
- } // namespace philox4x32x10_impl
71
-
72
- template <std::int32_t VecSize>
73
- class engine_accessor_base<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
74
- public:
75
- engine_accessor_base(sycl::buffer<std::uint32_t, 1>& state_buf, sycl::handler& cgh)
76
- : states_accessor_(state_buf, cgh) {}
77
-
78
- oneapi::mkl::rng::device::philox4x32x10<VecSize> load(std::size_t id) const {
79
- oneapi::mkl::rng::device::philox4x32x10<VecSize> engine;
80
- philox4x32x10_impl::init(engine.state_, id, states_accessor_);
81
- return engine;
82
- }
83
-
84
- void store(oneapi::mkl::rng::device::philox4x32x10<VecSize>& engine, std::size_t id) const {
85
- philox4x32x10_impl::store(engine.state_, id, states_accessor_);
86
- }
87
-
88
- protected:
89
- sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write> states_accessor_;
90
- };
91
-
92
- template <std::int32_t VecSize>
93
- class engine_descriptor_base<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
94
- public:
95
- using engine_type = oneapi::mkl::rng::device::philox4x32x10<VecSize>;
96
-
97
- using accessor_type =
98
- oneapi::mkl::rng::device::engine_accessor<oneapi::mkl::rng::device::philox4x32x10<VecSize>>;
99
-
100
- engine_descriptor_base(sycl::queue& queue, sycl::range<1> range, std::uint64_t seed,
101
- std::uint64_t offset)
102
- : states_buffer_(range.get(0) * sizeof(engine_state<engine_type>) /
103
- sizeof(std::uint32_t)) {
104
- queue.submit([&](sycl::handler& cgh) {
105
- accessor_type states_accessor(states_buffer_, cgh);
106
-
107
- cgh.parallel_for<class init_kernel<engine_type>>
108
- (range, [=](sycl::item<1> item) {
109
- std::size_t id = item.get_id(0);
110
- oneapi::mkl::rng::device::philox4x32x10<VecSize> engine(seed, offset* id);
111
- states_accessor.store(engine, id);
112
- });
113
- });
114
- }
115
-
116
- template <typename InitEngineFunc>
117
- engine_descriptor_base(sycl::queue& queue, sycl::range<1> range, InitEngineFunc func)
118
- : states_buffer_(range.get(0) * sizeof(engine_state<engine_type>) /
119
- sizeof(std::uint32_t)) {
120
- queue.submit([&](sycl::handler& cgh) {
121
- accessor_type states_accessor(states_buffer_, cgh);
122
-
123
- cgh.parallel_for<class init_kernel_ex<engine_type>>
124
- (range, [=](sycl::item<1> item) {
125
- std::size_t id = item.get_id(0);
126
- states_accessor.store(func(item), id);
127
- });
128
- });
129
- }
130
-
131
- accessor_type get_access(sycl::handler& cgh) {
132
- return accessor_type{ states_buffer_, cgh };
133
- }
134
-
135
- protected:
136
- sycl::buffer<std::uint32_t, 1> states_buffer_;
137
- };
138
-
139
- } // namespace oneapi::mkl::rng::device::detail
140
-
141
- #endif // _MKL_RNG_DEVICE_PHILOX4X32X10_HELPERS_IMPL_HPP_
@@ -1,552 +0,0 @@
1
- /*******************************************************************************
2
- * Copyright 2020 Intel Corporation
3
- *
4
- * Licensed under the Apache License, Version 2.0 (the "License");
5
- * you may not use this file except in compliance with the License.
6
- * You may obtain a copy of the License at
7
- *
8
- * http://www.apache.org/licenses/LICENSE-2.0
9
- *
10
- * Unless required by applicable law or agreed to in writing,
11
- * software distributed under the License is distributed on an "AS IS" BASIS,
12
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- * See the License for the specific language governing permissions
14
- * and limitations under the License.
15
- *
16
- *
17
- * SPDX-License-Identifier: Apache-2.0
18
- *******************************************************************************/
19
-
20
- #ifndef _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
21
- #define _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
22
-
23
- #include <utility> // std::pair
24
-
25
- namespace oneapi::mkl::rng::device {
26
-
27
- template <std::int32_t VecSize = 1>
28
- class philox4x32x10;
29
-
30
- namespace detail {
31
-
32
- template <std::int32_t VecSize>
33
- struct engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
34
- std::uint32_t key[2];
35
- std::uint32_t counter[4];
36
- std::uint32_t part;
37
- std::uint32_t result[4];
38
- };
39
-
40
- namespace philox4x32x10_impl {
41
-
42
- static inline void add128(std::uint32_t* a, std::uint64_t b) {
43
- std::uint64_t tmp = ((static_cast<std::uint64_t>(a[1]) << 32) | a[0]);
44
-
45
- tmp += b;
46
-
47
- a[0] = static_cast<std::uint32_t>(tmp);
48
- a[1] = static_cast<std::uint32_t>(tmp >> 32);
49
-
50
- if (tmp < b) {
51
- tmp = ((static_cast<std::uint64_t>(a[3]) << 32) | a[2]) + 1;
52
-
53
- a[2] = static_cast<std::uint32_t>(tmp);
54
- a[3] = static_cast<std::uint32_t>(tmp >> 32);
55
- }
56
- return;
57
- }
58
-
59
- static inline void add128_1(std::uint32_t* a) {
60
- if (++a[0]) {
61
- return;
62
- }
63
- if (++a[1]) {
64
- return;
65
- }
66
- if (++a[2]) {
67
- return;
68
- }
69
- ++a[3];
70
- }
71
-
72
- static inline std::pair<std::uint32_t, std::uint32_t> mul_hilo_32(std::uint32_t a,
73
- std::uint32_t b) {
74
- std::uint64_t res_64 = static_cast<std::uint64_t>(a) * static_cast<std::uint64_t>(b);
75
- return std::make_pair(static_cast<std::uint32_t>(res_64),
76
- static_cast<std::uint32_t>(res_64 >> 32));
77
- }
78
-
79
- static inline void round(std::uint32_t* cnt, std::uint32_t* k) {
80
- auto [L0, H0] = mul_hilo_32(0xD2511F53, cnt[0]);
81
- auto [L1, H1] = mul_hilo_32(0xCD9E8D57, cnt[2]);
82
-
83
- cnt[0] = H1 ^ cnt[1] ^ k[0];
84
- cnt[1] = L1;
85
- cnt[2] = H0 ^ cnt[3] ^ k[1];
86
- cnt[3] = L0;
87
- }
88
-
89
- static inline void round_10(std::uint32_t* cnt, std::uint32_t* k) {
90
- round(cnt, k); // 1
91
- // increasing keys with philox4x32x10 constants
92
- k[0] += 0x9E3779B9;
93
- k[1] += 0xBB67AE85;
94
- round(cnt, k); // 2
95
- k[0] += 0x9E3779B9;
96
- k[1] += 0xBB67AE85;
97
- round(cnt, k); // 3
98
- k[0] += 0x9E3779B9;
99
- k[1] += 0xBB67AE85;
100
- round(cnt, k); // 4
101
- k[0] += 0x9E3779B9;
102
- k[1] += 0xBB67AE85;
103
- round(cnt, k); // 5
104
- k[0] += 0x9E3779B9;
105
- k[1] += 0xBB67AE85;
106
- round(cnt, k); // 6
107
- k[0] += 0x9E3779B9;
108
- k[1] += 0xBB67AE85;
109
- round(cnt, k); // 7
110
- k[0] += 0x9E3779B9;
111
- k[1] += 0xBB67AE85;
112
- round(cnt, k); // 8
113
- k[0] += 0x9E3779B9;
114
- k[1] += 0xBB67AE85;
115
- round(cnt, k); // 9
116
- k[0] += 0x9E3779B9;
117
- k[1] += 0xBB67AE85;
118
- round(cnt, k); // 10
119
- }
120
-
121
- template <std::int32_t VecSize>
122
- static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
123
- std::uint64_t num_to_skip) {
124
- std::uint64_t num_to_skip_tmp = num_to_skip;
125
- std::uint64_t c_inc;
126
- std::uint32_t counter[4];
127
- std::uint32_t key[2];
128
- std::uint64_t tail;
129
- if (num_to_skip_tmp <= state.part) {
130
- state.part -= num_to_skip_tmp;
131
- }
132
- else {
133
- tail = num_to_skip % 4;
134
- if ((tail == 0) && (state.part == 0)) {
135
- add128(state.counter, num_to_skip / 4);
136
- }
137
- else {
138
- num_to_skip_tmp = num_to_skip_tmp - state.part;
139
- state.part = 0;
140
- c_inc = (num_to_skip_tmp - 1) / 4;
141
- state.part = (4 - num_to_skip_tmp % 4) % 4;
142
- add128(state.counter, c_inc);
143
- counter[0] = state.counter[0];
144
- counter[1] = state.counter[1];
145
- counter[2] = state.counter[2];
146
- counter[3] = state.counter[3];
147
- key[0] = state.key[0];
148
- key[1] = state.key[1];
149
- round_10(counter, key);
150
- state.result[0] = counter[0];
151
- state.result[1] = counter[1];
152
- state.result[2] = counter[2];
153
- state.result[3] = counter[3];
154
- add128_1(state.counter);
155
- }
156
- }
157
- }
158
-
159
- template <std::int32_t VecSize>
160
- static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
161
- std::uint64_t n, const std::uint64_t* num_to_skip_ptr) {
162
- constexpr std::uint64_t uint_max = 0xFFFFFFFFFFFFFFFF;
163
- std::uint64_t post_buffer, pre_buffer;
164
- std::int32_t num_elements = 0;
165
- std::int32_t remained_counter;
166
- std::uint64_t tmp_skip_array[3] = { 0, 0, 0 };
167
-
168
- for (std::uint64_t i = 0; (i < 3) && (i < n); i++) {
169
- tmp_skip_array[i] = num_to_skip_ptr[i];
170
- if (tmp_skip_array[i]) {
171
- num_elements = i + 1;
172
- }
173
- }
174
-
175
- if (num_elements == 0) {
176
- return;
177
- }
178
- if ((num_elements == 1) && (tmp_skip_array[0] <= state.part)) {
179
- state.part -= static_cast<std::uint32_t>(tmp_skip_array[0]);
180
- return;
181
- }
182
- std::uint32_t counter[4];
183
- std::uint32_t key[2];
184
-
185
- if ((tmp_skip_array[0] - state.part) <= tmp_skip_array[0]) {
186
- tmp_skip_array[0] = tmp_skip_array[0] - state.part;
187
- }
188
- else if ((num_elements == 2) || (tmp_skip_array[1] - 1 < tmp_skip_array[1])) {
189
- tmp_skip_array[1] = tmp_skip_array[1] - 1;
190
- tmp_skip_array[0] = uint_max - state.part + tmp_skip_array[0];
191
- }
192
- else {
193
- tmp_skip_array[2] = tmp_skip_array[2] - 1;
194
- tmp_skip_array[1] = uint_max - 1;
195
- tmp_skip_array[0] = uint_max - state.part + tmp_skip_array[0];
196
- }
197
-
198
- state.part = 0;
199
-
200
- post_buffer = 0;
201
-
202
- remained_counter = static_cast<std::uint32_t>(tmp_skip_array[0] % 4);
203
-
204
- for (int i = num_elements - 1; i >= 0; i--) {
205
- pre_buffer = (tmp_skip_array[i] << 62);
206
- tmp_skip_array[i] >>= 2;
207
- tmp_skip_array[i] |= post_buffer;
208
- post_buffer = pre_buffer;
209
- }
210
-
211
- state.part = 4 - remained_counter;
212
-
213
- std::uint64_t counter64[] = { state.counter[1], state.counter[3] };
214
- counter64[0] = ((counter64[0] << 32ull) | state.counter[0]);
215
- counter64[1] = ((counter64[1] << 32ull) | state.counter[2]);
216
-
217
- counter64[0] += tmp_skip_array[0];
218
-
219
- if (counter64[0] < tmp_skip_array[0]) {
220
- counter64[1]++;
221
- }
222
-
223
- counter64[1] += tmp_skip_array[1];
224
-
225
- counter[0] = static_cast<std::uint32_t>(counter64[0]);
226
- counter[1] = static_cast<std::uint32_t>(counter64[0] >> 32);
227
- counter[2] = static_cast<std::uint32_t>(counter64[1]);
228
- counter[3] = static_cast<std::uint32_t>(counter64[1] >> 32);
229
-
230
- key[0] = state.key[0];
231
- key[1] = state.key[1];
232
-
233
- round_10(counter, key);
234
-
235
- state.result[0] = counter[0];
236
- state.result[1] = counter[1];
237
- state.result[2] = counter[2];
238
- state.result[3] = counter[3];
239
-
240
- counter64[0]++;
241
-
242
- if (counter64[0] < 1) {
243
- counter64[1]++;
244
- }
245
-
246
- state.counter[0] = static_cast<std::uint32_t>(counter64[0]);
247
- state.counter[1] = static_cast<std::uint32_t>(counter64[0] >> 32);
248
- state.counter[2] = static_cast<std::uint32_t>(counter64[1]);
249
- state.counter[3] = static_cast<std::uint32_t>(counter64[1] >> 32);
250
- }
251
-
252
- template <std::int32_t VecSize>
253
- static inline void init(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
254
- std::uint64_t n, const std::uint64_t* seed_ptr, std::uint64_t offset) {
255
- state.key[0] = static_cast<std::uint32_t>(seed_ptr[0]);
256
- state.key[1] = static_cast<std::uint32_t>(seed_ptr[0] >> 32);
257
-
258
- state.counter[0] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1]) : 0);
259
- state.counter[1] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1] >> 32) : 0);
260
-
261
- state.counter[2] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2]) : 0);
262
- state.counter[3] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2] >> 32) : 0);
263
-
264
- state.part = 0;
265
- state.result[0] = 0;
266
- state.result[1] = 0;
267
- state.result[2] = 0;
268
- state.result[3] = 0;
269
- skip_ahead(state, offset);
270
- }
271
-
272
- template <std::int32_t VecSize>
273
- static inline void init(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
274
- std::uint64_t n, const std::uint64_t* seed_ptr, std::uint64_t n_offset,
275
- const std::uint64_t* offset_ptr) {
276
- state.key[0] = static_cast<std::uint32_t>(seed_ptr[0]);
277
- state.key[1] = static_cast<std::uint32_t>(seed_ptr[0] >> 32);
278
-
279
- state.counter[0] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1]) : 0);
280
- state.counter[1] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1] >> 32) : 0);
281
-
282
- state.counter[2] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2]) : 0);
283
- state.counter[3] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2] >> 32) : 0);
284
-
285
- state.part = 0;
286
- state.result[0] = 0;
287
- state.result[1] = 0;
288
- state.result[2] = 0;
289
- state.result[3] = 0;
290
- skip_ahead(state, n_offset, offset_ptr);
291
- }
292
-
293
- // for VecSize > 4
294
- template <std::int32_t VecSize>
295
- __attribute__((always_inline)) static inline sycl::vec<std::uint32_t, VecSize> generate_full(
296
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
297
- const std::int32_t num_elements = VecSize;
298
- sycl::vec<std::uint32_t, VecSize> res;
299
-
300
- std::uint32_t counter[4];
301
-
302
- int i = 0;
303
- int part = (int)state.part;
304
- while (part && (i < num_elements)) {
305
- res[i++] = state.result[3 - (--part)];
306
- }
307
- if (i == num_elements) {
308
- skip_ahead(state, num_elements);
309
- return res;
310
- }
311
-
312
- counter[0] = state.counter[0];
313
- counter[1] = state.counter[1];
314
- counter[2] = state.counter[2];
315
- counter[3] = state.counter[3];
316
-
317
- std::uint32_t cntTmp[4];
318
- std::uint32_t keyTmp[2];
319
- for (; i < num_elements; i += 4) {
320
- cntTmp[0] = counter[0];
321
- cntTmp[1] = counter[1];
322
- cntTmp[2] = counter[2];
323
- cntTmp[3] = counter[3];
324
-
325
- keyTmp[0] = state.key[0];
326
- keyTmp[1] = state.key[1];
327
-
328
- round_10(cntTmp, keyTmp);
329
-
330
- if (i + 4 <= num_elements) {
331
- for (int j = 0; j < 4; j++) {
332
- res[i + j] = cntTmp[j];
333
- }
334
- add128_1(counter);
335
- }
336
- else {
337
- // here if last iteration
338
- for (int j = 0; i < num_elements; i++, j++) {
339
- res[i] = cntTmp[j];
340
- }
341
- }
342
- }
343
- skip_ahead(state, num_elements);
344
- return res;
345
- }
346
-
347
- // for VecSize <= 4
348
- template <std::int32_t VecSize>
349
- __attribute__((always_inline)) static inline sycl::vec<std::uint32_t, VecSize> generate_small(
350
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
351
- const std::int32_t num_elements = VecSize;
352
- sycl::vec<std::uint32_t, VecSize> res;
353
-
354
- std::uint32_t counter[4];
355
- std::uint32_t key[2];
356
-
357
- int i = 0;
358
- int part = (int)state.part;
359
- while (part && (i < num_elements)) {
360
- res[i++] = state.result[3 - (--part)];
361
- }
362
- if (i == num_elements) {
363
- skip_ahead(state, num_elements);
364
- return res;
365
- }
366
-
367
- counter[0] = state.counter[0];
368
- counter[1] = state.counter[1];
369
- counter[2] = state.counter[2];
370
- counter[3] = state.counter[3];
371
- key[0] = state.key[0];
372
- key[1] = state.key[1];
373
-
374
- round_10(counter, key);
375
-
376
- for (int j = 0; i < num_elements; i++, j++) {
377
- res[i] = counter[j];
378
- }
379
-
380
- skip_ahead(state, num_elements);
381
- return res;
382
- }
383
-
384
- template <int VecSize>
385
- __attribute__((always_inline)) static inline std::uint32_t generate_single(
386
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
387
- std::uint32_t res;
388
-
389
- std::uint32_t counter[4];
390
- std::uint32_t key[2];
391
-
392
- std::int32_t part = static_cast<std::int32_t>(state.part);
393
- if (part != 0) {
394
- res = state.result[3 - (--part)];
395
- skip_ahead(state, 1);
396
- return res;
397
- }
398
- counter[0] = state.counter[0];
399
- counter[1] = state.counter[1];
400
- counter[2] = state.counter[2];
401
- counter[3] = state.counter[3];
402
- key[0] = state.key[0];
403
- key[1] = state.key[1];
404
-
405
- round_10(counter, key);
406
-
407
- res = counter[0];
408
-
409
- skip_ahead(state, 1);
410
- return res;
411
- }
412
-
413
- } // namespace philox4x32x10_impl
414
-
415
- template <std::int32_t VecSize>
416
- class engine_base<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
417
- protected:
418
- engine_base(std::uint64_t seed, std::uint64_t offset = 0) {
419
- philox4x32x10_impl::init(this->state_, 1, &seed, offset);
420
- }
421
-
422
- engine_base(std::uint64_t n, const std::uint64_t* seed, std::uint64_t offset = 0) {
423
- philox4x32x10_impl::init(this->state_, n, seed, offset);
424
- }
425
-
426
- engine_base(std::uint64_t seed, std::uint64_t n_offset, const std::uint64_t* offset_ptr) {
427
- philox4x32x10_impl::init(this->state_, 1, &seed, n_offset, offset_ptr);
428
- }
429
-
430
- engine_base(std::uint64_t n, const std::uint64_t* seed, std::uint64_t n_offset,
431
- const std::uint64_t* offset_ptr) {
432
- philox4x32x10_impl::init(this->state_, n, seed, n_offset, offset_ptr);
433
- }
434
-
435
- template <typename RealType>
436
- __attribute__((always_inline)) inline auto generate(RealType a, RealType b) ->
437
- typename std::conditional<VecSize == 1, RealType, sycl::vec<RealType, VecSize>>::type {
438
- sycl::vec<RealType, VecSize> res;
439
- sycl::vec<std::uint32_t, VecSize> res_uint;
440
- RealType a1;
441
- RealType c1;
442
-
443
- c1 = (b - a) / (static_cast<RealType>((std::numeric_limits<std::uint32_t>::max)()) + 1);
444
- a1 = (b + a) / static_cast<RealType>(2.0);
445
-
446
- if constexpr (VecSize > 4) {
447
- res_uint = philox4x32x10_impl::generate_full(this->state_);
448
- }
449
- else {
450
- res_uint = philox4x32x10_impl::generate_small(this->state_);
451
- }
452
- for (int i = 0; i < VecSize; i++) {
453
- res[i] = static_cast<RealType>(static_cast<std::int32_t>(res_uint[i])) * c1 + a1;
454
- }
455
- return res;
456
- }
457
-
458
- __attribute__((always_inline)) inline auto generate() ->
459
- typename std::conditional<VecSize == 1, std::uint32_t,
460
- sycl::vec<std::uint32_t, VecSize>>::type {
461
- if constexpr (VecSize > 4) {
462
- return philox4x32x10_impl::generate_full(this->state_);
463
- }
464
- return philox4x32x10_impl::generate_small(this->state_);
465
- }
466
-
467
- template <typename UIntType>
468
- __attribute__((always_inline)) inline auto generate_uniform_bits() ->
469
- typename std::conditional<VecSize == 1, UIntType, sycl::vec<UIntType, VecSize>>::type {
470
- if constexpr (std::is_same<UIntType, std::uint32_t>::value) {
471
- return generate();
472
- }
473
- else {
474
- auto uni_res1 = generate();
475
- auto uni_res2 = generate();
476
-
477
- if constexpr (VecSize == 1) {
478
- return (static_cast<std::uint64_t>(uni_res2) << 32) + uni_res1;
479
- }
480
- else {
481
- sycl::vec<std::uint64_t, VecSize> vec_out;
482
-
483
- if constexpr (VecSize != 3) {
484
- for (int i = 0; i < VecSize / 2; i++) {
485
- vec_out[i] = (static_cast<std::uint64_t>(uni_res1[2 * i + 1]) << 32) +
486
- uni_res1[2 * i];
487
- vec_out[i + VecSize / 2] =
488
- (static_cast<std::uint64_t>(uni_res2[2 * i + 1]) << 32) +
489
- uni_res2[2 * i];
490
- }
491
- }
492
- else {
493
- vec_out[0] = (static_cast<std::uint64_t>(uni_res1[1]) << 32) + uni_res1[0];
494
- vec_out[1] = (static_cast<std::uint64_t>(uni_res2[0]) << 32) + uni_res1[2];
495
- vec_out[2] = (static_cast<std::uint64_t>(uni_res2[2]) << 32) + uni_res2[1];
496
- }
497
-
498
- return vec_out;
499
- }
500
- }
501
- }
502
-
503
- template <typename RealType>
504
- RealType generate_single(RealType a, RealType b) {
505
- RealType res;
506
- std::uint32_t res_uint;
507
- RealType a1;
508
- RealType c1;
509
-
510
- c1 = (b - a) / (static_cast<RealType>((std::numeric_limits<std::uint32_t>::max)()) + 1);
511
- a1 = (b + a) / static_cast<RealType>(2.0);
512
-
513
- res_uint = philox4x32x10_impl::generate_single(this->state_);
514
-
515
- res = static_cast<RealType>(static_cast<std::int32_t>(res_uint)) * c1 + a1;
516
-
517
- return res;
518
- }
519
-
520
- __attribute__((always_inline)) inline std::uint32_t generate_single() {
521
- return philox4x32x10_impl::generate_single(this->state_);
522
- }
523
-
524
- template <typename UIntType>
525
- __attribute__((always_inline)) inline auto generate_single_uniform_bits() {
526
- if constexpr (std::is_same<UIntType, std::uint32_t>::value) {
527
- return philox4x32x10_impl::generate_single(this->state_);
528
- }
529
- else {
530
- auto uni_res1 = philox4x32x10_impl::generate_single(this->state_);
531
- auto uni_res2 = philox4x32x10_impl::generate_single(this->state_);
532
-
533
- return (static_cast<std::uint64_t>(uni_res2) << 32) + uni_res1;
534
- }
535
- }
536
-
537
- void skip_ahead(std::uint64_t num_to_skip) {
538
- detail::philox4x32x10_impl::skip_ahead(this->state_, num_to_skip);
539
- }
540
-
541
- void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) {
542
- detail::philox4x32x10_impl::skip_ahead(this->state_, num_to_skip.size(),
543
- num_to_skip.begin());
544
- }
545
-
546
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>> state_;
547
- };
548
-
549
- } // namespace detail
550
- } // namespace oneapi::mkl::rng::device
551
-
552
- #endif // _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_