mkl-devel-dpcpp 2025.1.0__py2.py3-none-win_amd64.whl → 2025.2.0__py2.py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mkl-devel-dpcpp might be problematic. Click here for more details.

Files changed (103) hide show
  1. {mkl_devel_dpcpp-2025.1.0.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl.lib +0 -0
  2. mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
  3. {mkl_devel_dpcpp-2025.1.0.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_dll.lib +0 -0
  4. mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
  5. {mkl_devel_dpcpp-2025.1.0.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_rng_dll.lib +0 -0
  6. mkl_devel_dpcpp-2025.2.0.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
  7. {mkl_devel_dpcpp-2025.1.0.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/METADATA +4 -3
  8. mkl_devel_dpcpp-2025.2.0.dist-info/RECORD +15 -0
  9. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/bfloat16.hpp +0 -26
  10. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/blas/buffer.hpp +0 -42
  11. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/blas/buffer_decls.hpp +0 -880
  12. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/blas/spec.hpp +0 -42
  13. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/blas/types.hpp +0 -60
  14. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/blas/usm.hpp +0 -42
  15. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/blas/usm_decls.hpp +0 -1240
  16. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/blas.hpp +0 -34
  17. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/dft/spec.hpp +0 -42
  18. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/dft.hpp +0 -254
  19. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/dfti.hpp +0 -22
  20. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/exceptions.hpp +0 -110
  21. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/experimental/data_fitting/interpolate.hpp +0 -67
  22. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/experimental/data_fitting/spline_and_data_params.hpp +0 -68
  23. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/experimental/data_fitting/splines.hpp +0 -177
  24. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/experimental/data_fitting.hpp +0 -22
  25. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/export.hpp +0 -25
  26. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/lapack/concepts.hpp +0 -55
  27. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/lapack/exceptions.hpp +0 -75
  28. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/lapack/lapack.hpp +0 -1096
  29. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/lapack/scratchpad.hpp +0 -107
  30. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/lapack/spec.hpp +0 -42
  31. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/lapack.hpp +0 -24
  32. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/detail/engine_base.hpp +0 -48
  33. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/bernoulli_impl.hpp +0 -89
  34. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/beta_impl.hpp +0 -463
  35. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/bits_impl.hpp +0 -71
  36. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/distribution_base.hpp +0 -85
  37. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/engine_base.hpp +0 -43
  38. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/engine_helpers_base.hpp +0 -54
  39. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/exponential_impl.hpp +0 -116
  40. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/gamma_impl.hpp +0 -285
  41. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/gaussian_impl.hpp +0 -270
  42. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/geometric_impl.hpp +0 -99
  43. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/lognormal_impl.hpp +0 -105
  44. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_helpers_impl.hpp +0 -117
  45. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_impl.hpp +0 -223
  46. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_helpers_impl.hpp +0 -118
  47. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_impl.hpp +0 -266
  48. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_helpers_impl.hpp +0 -125
  49. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp +0 -385
  50. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp +0 -3668
  51. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_helpers_impl.hpp +0 -141
  52. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp +0 -552
  53. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/poisson_impl.hpp +0 -328
  54. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/types.hpp +0 -58
  55. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_bits_impl.hpp +0 -51
  56. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_impl.hpp +0 -287
  57. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/detail/vm_wrappers.hpp +0 -183
  58. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/distributions.hpp +0 -700
  59. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/engine_helpers.hpp +0 -116
  60. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/engines.hpp +0 -187
  61. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/functions.hpp +0 -59
  62. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device/types.hpp +0 -79
  63. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/device.hpp +0 -29
  64. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/distributions.hpp +0 -1913
  65. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/engines.hpp +0 -788
  66. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/functions.hpp +0 -163
  67. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng/spec.hpp +0 -42
  68. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/rng.hpp +0 -23
  69. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/spblas/sparse_auxiliary.hpp +0 -111
  70. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/spblas/sparse_operations.hpp +0 -446
  71. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/spblas/sparse_structures.hpp +0 -193
  72. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/spblas/spec.hpp +0 -42
  73. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/spblas.hpp +0 -33
  74. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/spec.hpp +0 -42
  75. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/stats/spec.hpp +0 -42
  76. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/stats.hpp +0 -357
  77. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/types.hpp +0 -321
  78. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/buffer.hpp +0 -3529
  79. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/decls.hpp +0 -280
  80. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/device/detail/decls.hpp +0 -81
  81. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/device/detail/dispatch.hpp +0 -1059
  82. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/device/detail/ep.hpp +0 -861
  83. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/device/detail/ha.hpp +0 -860
  84. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/device/detail/la.hpp +0 -860
  85. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/device/detail/rts.hpp +0 -4608
  86. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/device/detail/scalar.hpp +0 -8963
  87. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/device/vm.hpp +0 -460
  88. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/span.hpp +0 -3813
  89. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/spec.hpp +0 -42
  90. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm/usm.hpp +0 -3581
  91. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl/vm.hpp +0 -31
  92. mkl_devel_dpcpp-2025.1.0.data/data/Library/include/oneapi/mkl.hpp +0 -35
  93. mkl_devel_dpcpp-2025.1.0.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
  94. mkl_devel_dpcpp-2025.1.0.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
  95. mkl_devel_dpcpp-2025.1.0.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
  96. mkl_devel_dpcpp-2025.1.0.dist-info/RECORD +0 -99
  97. {mkl_devel_dpcpp-2025.1.0.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_blas_dll.lib +0 -0
  98. {mkl_devel_dpcpp-2025.1.0.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_data_fitting_dll.lib +0 -0
  99. {mkl_devel_dpcpp-2025.1.0.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_stats_dll.lib +0 -0
  100. {mkl_devel_dpcpp-2025.1.0.data → mkl_devel_dpcpp-2025.2.0.data}/data/Library/lib/mkl_sycl_vm_dll.lib +0 -0
  101. {mkl_devel_dpcpp-2025.1.0.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/LICENSE.txt +0 -0
  102. {mkl_devel_dpcpp-2025.1.0.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/WHEEL +0 -0
  103. {mkl_devel_dpcpp-2025.1.0.dist-info → mkl_devel_dpcpp-2025.2.0.dist-info}/top_level.txt +0 -0
@@ -1,141 +0,0 @@
1
- /*******************************************************************************
2
- * Copyright (C) 2023 Intel Corporation
3
- *
4
- * Licensed under the Apache License, Version 2.0 (the "License");
5
- * you may not use this file except in compliance with the License.
6
- * You may obtain a copy of the License at
7
- *
8
- * http://www.apache.org/licenses/LICENSE-2.0
9
- *
10
- * Unless required by applicable law or agreed to in writing,
11
- * software distributed under the License is distributed on an "AS IS" BASIS,
12
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- * See the License for the specific language governing permissions
14
- * and limitations under the License.
15
- *
16
- *
17
- * SPDX-License-Identifier: Apache-2.0
18
- *******************************************************************************/
19
-
20
- #ifndef _MKL_RNG_DEVICE_PHILOX4X32X10_HELPERS_IMPL_HPP_
21
- #define _MKL_RNG_DEVICE_PHILOX4X32X10_HELPERS_IMPL_HPP_
22
-
23
- #include "oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp"
24
-
25
- namespace oneapi::mkl::rng::device::detail {
26
- namespace philox4x32x10_impl {
27
-
28
- template <std::int32_t VecSize>
29
- static inline void init(
30
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state, std::size_t id,
31
- const sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write>& accessor) {
32
- std::size_t num_elements_acc =
33
- sizeof(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>) /
34
- sizeof(std::uint32_t);
35
- state.key[0] = accessor[id * num_elements_acc];
36
- state.key[1] = accessor[id * num_elements_acc + 1];
37
- state.counter[0] = accessor[id * num_elements_acc + 2];
38
- state.counter[1] = accessor[id * num_elements_acc + 3];
39
- state.counter[2] = accessor[id * num_elements_acc + 4];
40
- state.counter[3] = accessor[id * num_elements_acc + 5];
41
-
42
- state.part = accessor[id * num_elements_acc + 6];
43
-
44
- state.result[0] = accessor[id * num_elements_acc + 7];
45
- state.result[1] = accessor[id * num_elements_acc + 8];
46
- state.result[2] = accessor[id * num_elements_acc + 9];
47
- state.result[3] = accessor[id * num_elements_acc + 10];
48
- }
49
-
50
- template <std::int32_t VecSize>
51
- static inline void store(
52
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state, std::size_t id,
53
- const sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write>& accessor) {
54
- std::size_t num_elements_acc =
55
- sizeof(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>) /
56
- sizeof(std::uint32_t);
57
- accessor[id * num_elements_acc] = state.key[0];
58
- accessor[id * num_elements_acc + 1] = state.key[1];
59
- accessor[id * num_elements_acc + 2] = state.counter[0];
60
- accessor[id * num_elements_acc + 3] = state.counter[1];
61
- accessor[id * num_elements_acc + 4] = state.counter[2];
62
- accessor[id * num_elements_acc + 5] = state.counter[3];
63
- accessor[id * num_elements_acc + 6] = state.part;
64
- accessor[id * num_elements_acc + 7] = state.result[0];
65
- accessor[id * num_elements_acc + 8] = state.result[1];
66
- accessor[id * num_elements_acc + 9] = state.result[2];
67
- accessor[id * num_elements_acc + 10] = state.result[3];
68
- }
69
-
70
- } // namespace philox4x32x10_impl
71
-
72
- template <std::int32_t VecSize>
73
- class engine_accessor_base<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
74
- public:
75
- engine_accessor_base(sycl::buffer<std::uint32_t, 1>& state_buf, sycl::handler& cgh)
76
- : states_accessor_(state_buf, cgh) {}
77
-
78
- oneapi::mkl::rng::device::philox4x32x10<VecSize> load(std::size_t id) const {
79
- oneapi::mkl::rng::device::philox4x32x10<VecSize> engine;
80
- philox4x32x10_impl::init(engine.state_, id, states_accessor_);
81
- return engine;
82
- }
83
-
84
- void store(oneapi::mkl::rng::device::philox4x32x10<VecSize>& engine, std::size_t id) const {
85
- philox4x32x10_impl::store(engine.state_, id, states_accessor_);
86
- }
87
-
88
- protected:
89
- sycl::accessor<std::uint32_t, 1, sycl::access::mode::read_write> states_accessor_;
90
- };
91
-
92
- template <std::int32_t VecSize>
93
- class engine_descriptor_base<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
94
- public:
95
- using engine_type = oneapi::mkl::rng::device::philox4x32x10<VecSize>;
96
-
97
- using accessor_type =
98
- oneapi::mkl::rng::device::engine_accessor<oneapi::mkl::rng::device::philox4x32x10<VecSize>>;
99
-
100
- engine_descriptor_base(sycl::queue& queue, sycl::range<1> range, std::uint64_t seed,
101
- std::uint64_t offset)
102
- : states_buffer_(range.get(0) * sizeof(engine_state<engine_type>) /
103
- sizeof(std::uint32_t)) {
104
- queue.submit([&](sycl::handler& cgh) {
105
- accessor_type states_accessor(states_buffer_, cgh);
106
-
107
- cgh.parallel_for<class init_kernel<engine_type>>
108
- (range, [=](sycl::item<1> item) {
109
- std::size_t id = item.get_id(0);
110
- oneapi::mkl::rng::device::philox4x32x10<VecSize> engine(seed, offset* id);
111
- states_accessor.store(engine, id);
112
- });
113
- });
114
- }
115
-
116
- template <typename InitEngineFunc>
117
- engine_descriptor_base(sycl::queue& queue, sycl::range<1> range, InitEngineFunc func)
118
- : states_buffer_(range.get(0) * sizeof(engine_state<engine_type>) /
119
- sizeof(std::uint32_t)) {
120
- queue.submit([&](sycl::handler& cgh) {
121
- accessor_type states_accessor(states_buffer_, cgh);
122
-
123
- cgh.parallel_for<class init_kernel_ex<engine_type>>
124
- (range, [=](sycl::item<1> item) {
125
- std::size_t id = item.get_id(0);
126
- states_accessor.store(func(item), id);
127
- });
128
- });
129
- }
130
-
131
- accessor_type get_access(sycl::handler& cgh) {
132
- return accessor_type{ states_buffer_, cgh };
133
- }
134
-
135
- protected:
136
- sycl::buffer<std::uint32_t, 1> states_buffer_;
137
- };
138
-
139
- } // namespace oneapi::mkl::rng::device::detail
140
-
141
- #endif // _MKL_RNG_DEVICE_PHILOX4X32X10_HELPERS_IMPL_HPP_
@@ -1,552 +0,0 @@
1
- /*******************************************************************************
2
- * Copyright (C) 2020 Intel Corporation
3
- *
4
- * Licensed under the Apache License, Version 2.0 (the "License");
5
- * you may not use this file except in compliance with the License.
6
- * You may obtain a copy of the License at
7
- *
8
- * http://www.apache.org/licenses/LICENSE-2.0
9
- *
10
- * Unless required by applicable law or agreed to in writing,
11
- * software distributed under the License is distributed on an "AS IS" BASIS,
12
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- * See the License for the specific language governing permissions
14
- * and limitations under the License.
15
- *
16
- *
17
- * SPDX-License-Identifier: Apache-2.0
18
- *******************************************************************************/
19
-
20
- #ifndef _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
21
- #define _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_
22
-
23
- #include <utility> // std::pair
24
-
25
- namespace oneapi::mkl::rng::device {
26
-
27
- template <std::int32_t VecSize = 1>
28
- class philox4x32x10;
29
-
30
- namespace detail {
31
-
32
- template <std::int32_t VecSize>
33
- struct engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
34
- std::uint32_t key[2];
35
- std::uint32_t counter[4];
36
- std::uint32_t part;
37
- std::uint32_t result[4];
38
- };
39
-
40
- namespace philox4x32x10_impl {
41
-
42
- static inline void add128(std::uint32_t* a, std::uint64_t b) {
43
- std::uint64_t tmp = ((static_cast<std::uint64_t>(a[1]) << 32) | a[0]);
44
-
45
- tmp += b;
46
-
47
- a[0] = static_cast<std::uint32_t>(tmp);
48
- a[1] = static_cast<std::uint32_t>(tmp >> 32);
49
-
50
- if (tmp < b) {
51
- tmp = ((static_cast<std::uint64_t>(a[3]) << 32) | a[2]) + 1;
52
-
53
- a[2] = static_cast<std::uint32_t>(tmp);
54
- a[3] = static_cast<std::uint32_t>(tmp >> 32);
55
- }
56
- return;
57
- }
58
-
59
- static inline void add128_1(std::uint32_t* a) {
60
- if (++a[0]) {
61
- return;
62
- }
63
- if (++a[1]) {
64
- return;
65
- }
66
- if (++a[2]) {
67
- return;
68
- }
69
- ++a[3];
70
- }
71
-
72
- static inline std::pair<std::uint32_t, std::uint32_t> mul_hilo_32(std::uint32_t a,
73
- std::uint32_t b) {
74
- std::uint64_t res_64 = static_cast<std::uint64_t>(a) * static_cast<std::uint64_t>(b);
75
- return std::make_pair(static_cast<std::uint32_t>(res_64),
76
- static_cast<std::uint32_t>(res_64 >> 32));
77
- }
78
-
79
- static inline void round(std::uint32_t* cnt, std::uint32_t* k) {
80
- auto [L0, H0] = mul_hilo_32(0xD2511F53, cnt[0]);
81
- auto [L1, H1] = mul_hilo_32(0xCD9E8D57, cnt[2]);
82
-
83
- cnt[0] = H1 ^ cnt[1] ^ k[0];
84
- cnt[1] = L1;
85
- cnt[2] = H0 ^ cnt[3] ^ k[1];
86
- cnt[3] = L0;
87
- }
88
-
89
- static inline void round_10(std::uint32_t* cnt, std::uint32_t* k) {
90
- round(cnt, k); // 1
91
- // increasing keys with philox4x32x10 constants
92
- k[0] += 0x9E3779B9;
93
- k[1] += 0xBB67AE85;
94
- round(cnt, k); // 2
95
- k[0] += 0x9E3779B9;
96
- k[1] += 0xBB67AE85;
97
- round(cnt, k); // 3
98
- k[0] += 0x9E3779B9;
99
- k[1] += 0xBB67AE85;
100
- round(cnt, k); // 4
101
- k[0] += 0x9E3779B9;
102
- k[1] += 0xBB67AE85;
103
- round(cnt, k); // 5
104
- k[0] += 0x9E3779B9;
105
- k[1] += 0xBB67AE85;
106
- round(cnt, k); // 6
107
- k[0] += 0x9E3779B9;
108
- k[1] += 0xBB67AE85;
109
- round(cnt, k); // 7
110
- k[0] += 0x9E3779B9;
111
- k[1] += 0xBB67AE85;
112
- round(cnt, k); // 8
113
- k[0] += 0x9E3779B9;
114
- k[1] += 0xBB67AE85;
115
- round(cnt, k); // 9
116
- k[0] += 0x9E3779B9;
117
- k[1] += 0xBB67AE85;
118
- round(cnt, k); // 10
119
- }
120
-
121
- template <std::int32_t VecSize>
122
- static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
123
- std::uint64_t num_to_skip) {
124
- std::uint64_t num_to_skip_tmp = num_to_skip;
125
- std::uint64_t c_inc;
126
- std::uint32_t counter[4];
127
- std::uint32_t key[2];
128
- std::uint64_t tail;
129
- if (num_to_skip_tmp <= state.part) {
130
- state.part -= num_to_skip_tmp;
131
- }
132
- else {
133
- tail = num_to_skip % 4;
134
- if ((tail == 0) && (state.part == 0)) {
135
- add128(state.counter, num_to_skip / 4);
136
- }
137
- else {
138
- num_to_skip_tmp = num_to_skip_tmp - state.part;
139
- state.part = 0;
140
- c_inc = (num_to_skip_tmp - 1) / 4;
141
- state.part = (4 - num_to_skip_tmp % 4) % 4;
142
- add128(state.counter, c_inc);
143
- counter[0] = state.counter[0];
144
- counter[1] = state.counter[1];
145
- counter[2] = state.counter[2];
146
- counter[3] = state.counter[3];
147
- key[0] = state.key[0];
148
- key[1] = state.key[1];
149
- round_10(counter, key);
150
- state.result[0] = counter[0];
151
- state.result[1] = counter[1];
152
- state.result[2] = counter[2];
153
- state.result[3] = counter[3];
154
- add128_1(state.counter);
155
- }
156
- }
157
- }
158
-
159
- template <std::int32_t VecSize>
160
- static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
161
- std::uint64_t n, const std::uint64_t* num_to_skip_ptr) {
162
- constexpr std::uint64_t uint_max = 0xFFFFFFFFFFFFFFFF;
163
- std::uint64_t post_buffer, pre_buffer;
164
- std::int32_t num_elements = 0;
165
- std::int32_t remained_counter;
166
- std::uint64_t tmp_skip_array[3] = { 0, 0, 0 };
167
-
168
- for (std::uint64_t i = 0; (i < 3) && (i < n); i++) {
169
- tmp_skip_array[i] = num_to_skip_ptr[i];
170
- if (tmp_skip_array[i]) {
171
- num_elements = i + 1;
172
- }
173
- }
174
-
175
- if (num_elements == 0) {
176
- return;
177
- }
178
- if ((num_elements == 1) && (tmp_skip_array[0] <= state.part)) {
179
- state.part -= static_cast<std::uint32_t>(tmp_skip_array[0]);
180
- return;
181
- }
182
- std::uint32_t counter[4];
183
- std::uint32_t key[2];
184
-
185
- if ((tmp_skip_array[0] - state.part) <= tmp_skip_array[0]) {
186
- tmp_skip_array[0] = tmp_skip_array[0] - state.part;
187
- }
188
- else if ((num_elements == 2) || (tmp_skip_array[1] - 1 < tmp_skip_array[1])) {
189
- tmp_skip_array[1] = tmp_skip_array[1] - 1;
190
- tmp_skip_array[0] = uint_max - state.part + tmp_skip_array[0];
191
- }
192
- else {
193
- tmp_skip_array[2] = tmp_skip_array[2] - 1;
194
- tmp_skip_array[1] = uint_max - 1;
195
- tmp_skip_array[0] = uint_max - state.part + tmp_skip_array[0];
196
- }
197
-
198
- state.part = 0;
199
-
200
- post_buffer = 0;
201
-
202
- remained_counter = static_cast<std::uint32_t>(tmp_skip_array[0] % 4);
203
-
204
- for (int i = num_elements - 1; i >= 0; i--) {
205
- pre_buffer = (tmp_skip_array[i] << 62);
206
- tmp_skip_array[i] >>= 2;
207
- tmp_skip_array[i] |= post_buffer;
208
- post_buffer = pre_buffer;
209
- }
210
-
211
- state.part = 4 - remained_counter;
212
-
213
- std::uint64_t counter64[] = { state.counter[1], state.counter[3] };
214
- counter64[0] = ((counter64[0] << 32ull) | state.counter[0]);
215
- counter64[1] = ((counter64[1] << 32ull) | state.counter[2]);
216
-
217
- counter64[0] += tmp_skip_array[0];
218
-
219
- if (counter64[0] < tmp_skip_array[0]) {
220
- counter64[1]++;
221
- }
222
-
223
- counter64[1] += tmp_skip_array[1];
224
-
225
- counter[0] = static_cast<std::uint32_t>(counter64[0]);
226
- counter[1] = static_cast<std::uint32_t>(counter64[0] >> 32);
227
- counter[2] = static_cast<std::uint32_t>(counter64[1]);
228
- counter[3] = static_cast<std::uint32_t>(counter64[1] >> 32);
229
-
230
- key[0] = state.key[0];
231
- key[1] = state.key[1];
232
-
233
- round_10(counter, key);
234
-
235
- state.result[0] = counter[0];
236
- state.result[1] = counter[1];
237
- state.result[2] = counter[2];
238
- state.result[3] = counter[3];
239
-
240
- counter64[0]++;
241
-
242
- if (counter64[0] < 1) {
243
- counter64[1]++;
244
- }
245
-
246
- state.counter[0] = static_cast<std::uint32_t>(counter64[0]);
247
- state.counter[1] = static_cast<std::uint32_t>(counter64[0] >> 32);
248
- state.counter[2] = static_cast<std::uint32_t>(counter64[1]);
249
- state.counter[3] = static_cast<std::uint32_t>(counter64[1] >> 32);
250
- }
251
-
252
- template <std::int32_t VecSize>
253
- static inline void init(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
254
- std::uint64_t n, const std::uint64_t* seed_ptr, std::uint64_t offset) {
255
- state.key[0] = static_cast<std::uint32_t>(seed_ptr[0]);
256
- state.key[1] = static_cast<std::uint32_t>(seed_ptr[0] >> 32);
257
-
258
- state.counter[0] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1]) : 0);
259
- state.counter[1] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1] >> 32) : 0);
260
-
261
- state.counter[2] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2]) : 0);
262
- state.counter[3] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2] >> 32) : 0);
263
-
264
- state.part = 0;
265
- state.result[0] = 0;
266
- state.result[1] = 0;
267
- state.result[2] = 0;
268
- state.result[3] = 0;
269
- skip_ahead(state, offset);
270
- }
271
-
272
- template <std::int32_t VecSize>
273
- static inline void init(engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state,
274
- std::uint64_t n, const std::uint64_t* seed_ptr, std::uint64_t n_offset,
275
- const std::uint64_t* offset_ptr) {
276
- state.key[0] = static_cast<std::uint32_t>(seed_ptr[0]);
277
- state.key[1] = static_cast<std::uint32_t>(seed_ptr[0] >> 32);
278
-
279
- state.counter[0] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1]) : 0);
280
- state.counter[1] = (n >= 2 ? static_cast<std::uint32_t>(seed_ptr[1] >> 32) : 0);
281
-
282
- state.counter[2] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2]) : 0);
283
- state.counter[3] = (n >= 3 ? static_cast<std::uint32_t>(seed_ptr[2] >> 32) : 0);
284
-
285
- state.part = 0;
286
- state.result[0] = 0;
287
- state.result[1] = 0;
288
- state.result[2] = 0;
289
- state.result[3] = 0;
290
- skip_ahead(state, n_offset, offset_ptr);
291
- }
292
-
293
- // for VecSize > 4
294
- template <std::int32_t VecSize>
295
- __attribute__((always_inline)) static inline sycl::vec<std::uint32_t, VecSize> generate_full(
296
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
297
- const std::int32_t num_elements = VecSize;
298
- sycl::vec<std::uint32_t, VecSize> res;
299
-
300
- std::uint32_t counter[4];
301
-
302
- int i = 0;
303
- int part = (int)state.part;
304
- while (part && (i < num_elements)) {
305
- res[i++] = state.result[3 - (--part)];
306
- }
307
- if (i == num_elements) {
308
- skip_ahead(state, num_elements);
309
- return res;
310
- }
311
-
312
- counter[0] = state.counter[0];
313
- counter[1] = state.counter[1];
314
- counter[2] = state.counter[2];
315
- counter[3] = state.counter[3];
316
-
317
- std::uint32_t cntTmp[4];
318
- std::uint32_t keyTmp[2];
319
- for (; i < num_elements; i += 4) {
320
- cntTmp[0] = counter[0];
321
- cntTmp[1] = counter[1];
322
- cntTmp[2] = counter[2];
323
- cntTmp[3] = counter[3];
324
-
325
- keyTmp[0] = state.key[0];
326
- keyTmp[1] = state.key[1];
327
-
328
- round_10(cntTmp, keyTmp);
329
-
330
- if (i + 4 <= num_elements) {
331
- for (int j = 0; j < 4; j++) {
332
- res[i + j] = cntTmp[j];
333
- }
334
- add128_1(counter);
335
- }
336
- else {
337
- // here if last iteration
338
- for (int j = 0; i < num_elements; i++, j++) {
339
- res[i] = cntTmp[j];
340
- }
341
- }
342
- }
343
- skip_ahead(state, num_elements);
344
- return res;
345
- }
346
-
347
- // for VecSize <= 4
348
- template <std::int32_t VecSize>
349
- __attribute__((always_inline)) static inline sycl::vec<std::uint32_t, VecSize> generate_small(
350
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
351
- const std::int32_t num_elements = VecSize;
352
- sycl::vec<std::uint32_t, VecSize> res;
353
-
354
- std::uint32_t counter[4];
355
- std::uint32_t key[2];
356
-
357
- int i = 0;
358
- int part = (int)state.part;
359
- while (part && (i < num_elements)) {
360
- res[i++] = state.result[3 - (--part)];
361
- }
362
- if (i == num_elements) {
363
- skip_ahead(state, num_elements);
364
- return res;
365
- }
366
-
367
- counter[0] = state.counter[0];
368
- counter[1] = state.counter[1];
369
- counter[2] = state.counter[2];
370
- counter[3] = state.counter[3];
371
- key[0] = state.key[0];
372
- key[1] = state.key[1];
373
-
374
- round_10(counter, key);
375
-
376
- for (int j = 0; i < num_elements; i++, j++) {
377
- res[i] = counter[j];
378
- }
379
-
380
- skip_ahead(state, num_elements);
381
- return res;
382
- }
383
-
384
- template <int VecSize>
385
- __attribute__((always_inline)) static inline std::uint32_t generate_single(
386
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>>& state) {
387
- std::uint32_t res;
388
-
389
- std::uint32_t counter[4];
390
- std::uint32_t key[2];
391
-
392
- std::int32_t part = static_cast<std::int32_t>(state.part);
393
- if (part != 0) {
394
- res = state.result[3 - (--part)];
395
- skip_ahead(state, 1);
396
- return res;
397
- }
398
- counter[0] = state.counter[0];
399
- counter[1] = state.counter[1];
400
- counter[2] = state.counter[2];
401
- counter[3] = state.counter[3];
402
- key[0] = state.key[0];
403
- key[1] = state.key[1];
404
-
405
- round_10(counter, key);
406
-
407
- res = counter[0];
408
-
409
- skip_ahead(state, 1);
410
- return res;
411
- }
412
-
413
- } // namespace philox4x32x10_impl
414
-
415
- template <std::int32_t VecSize>
416
- class engine_base<oneapi::mkl::rng::device::philox4x32x10<VecSize>> {
417
- protected:
418
- engine_base(std::uint64_t seed, std::uint64_t offset = 0) {
419
- philox4x32x10_impl::init(this->state_, 1, &seed, offset);
420
- }
421
-
422
- engine_base(std::uint64_t n, const std::uint64_t* seed, std::uint64_t offset = 0) {
423
- philox4x32x10_impl::init(this->state_, n, seed, offset);
424
- }
425
-
426
- engine_base(std::uint64_t seed, std::uint64_t n_offset, const std::uint64_t* offset_ptr) {
427
- philox4x32x10_impl::init(this->state_, 1, &seed, n_offset, offset_ptr);
428
- }
429
-
430
- engine_base(std::uint64_t n, const std::uint64_t* seed, std::uint64_t n_offset,
431
- const std::uint64_t* offset_ptr) {
432
- philox4x32x10_impl::init(this->state_, n, seed, n_offset, offset_ptr);
433
- }
434
-
435
- template <typename RealType>
436
- __attribute__((always_inline)) inline auto generate(RealType a, RealType b) ->
437
- typename std::conditional<VecSize == 1, RealType, sycl::vec<RealType, VecSize>>::type {
438
- sycl::vec<RealType, VecSize> res;
439
- sycl::vec<std::uint32_t, VecSize> res_uint;
440
- RealType a1;
441
- RealType c1;
442
-
443
- c1 = (b - a) / (static_cast<RealType>((std::numeric_limits<std::uint32_t>::max)()) + 1);
444
- a1 = (b + a) / static_cast<RealType>(2.0);
445
-
446
- if constexpr (VecSize > 4) {
447
- res_uint = philox4x32x10_impl::generate_full(this->state_);
448
- }
449
- else {
450
- res_uint = philox4x32x10_impl::generate_small(this->state_);
451
- }
452
- for (int i = 0; i < VecSize; i++) {
453
- res[i] = static_cast<RealType>(static_cast<std::int32_t>(res_uint[i])) * c1 + a1;
454
- }
455
- return res;
456
- }
457
-
458
- __attribute__((always_inline)) inline auto generate() ->
459
- typename std::conditional<VecSize == 1, std::uint32_t,
460
- sycl::vec<std::uint32_t, VecSize>>::type {
461
- if constexpr (VecSize > 4) {
462
- return philox4x32x10_impl::generate_full(this->state_);
463
- }
464
- return philox4x32x10_impl::generate_small(this->state_);
465
- }
466
-
467
- template <typename UIntType>
468
- __attribute__((always_inline)) inline auto generate_uniform_bits() ->
469
- typename std::conditional<VecSize == 1, UIntType, sycl::vec<UIntType, VecSize>>::type {
470
- if constexpr (std::is_same<UIntType, std::uint32_t>::value) {
471
- return generate();
472
- }
473
- else {
474
- auto uni_res1 = generate();
475
- auto uni_res2 = generate();
476
-
477
- if constexpr (VecSize == 1) {
478
- return (static_cast<std::uint64_t>(uni_res2) << 32) + uni_res1;
479
- }
480
- else {
481
- sycl::vec<std::uint64_t, VecSize> vec_out;
482
-
483
- if constexpr (VecSize != 3) {
484
- for (int i = 0; i < VecSize / 2; i++) {
485
- vec_out[i] = (static_cast<std::uint64_t>(uni_res1[2 * i + 1]) << 32) +
486
- uni_res1[2 * i];
487
- vec_out[i + VecSize / 2] =
488
- (static_cast<std::uint64_t>(uni_res2[2 * i + 1]) << 32) +
489
- uni_res2[2 * i];
490
- }
491
- }
492
- else {
493
- vec_out[0] = (static_cast<std::uint64_t>(uni_res1[1]) << 32) + uni_res1[0];
494
- vec_out[1] = (static_cast<std::uint64_t>(uni_res2[0]) << 32) + uni_res1[2];
495
- vec_out[2] = (static_cast<std::uint64_t>(uni_res2[2]) << 32) + uni_res2[1];
496
- }
497
-
498
- return vec_out;
499
- }
500
- }
501
- }
502
-
503
- template <typename RealType>
504
- RealType generate_single(RealType a, RealType b) {
505
- RealType res;
506
- std::uint32_t res_uint;
507
- RealType a1;
508
- RealType c1;
509
-
510
- c1 = (b - a) / (static_cast<RealType>((std::numeric_limits<std::uint32_t>::max)()) + 1);
511
- a1 = (b + a) / static_cast<RealType>(2.0);
512
-
513
- res_uint = philox4x32x10_impl::generate_single(this->state_);
514
-
515
- res = static_cast<RealType>(static_cast<std::int32_t>(res_uint)) * c1 + a1;
516
-
517
- return res;
518
- }
519
-
520
- __attribute__((always_inline)) inline std::uint32_t generate_single() {
521
- return philox4x32x10_impl::generate_single(this->state_);
522
- }
523
-
524
- template <typename UIntType>
525
- __attribute__((always_inline)) inline auto generate_single_uniform_bits() {
526
- if constexpr (std::is_same<UIntType, std::uint32_t>::value) {
527
- return philox4x32x10_impl::generate_single(this->state_);
528
- }
529
- else {
530
- auto uni_res1 = philox4x32x10_impl::generate_single(this->state_);
531
- auto uni_res2 = philox4x32x10_impl::generate_single(this->state_);
532
-
533
- return (static_cast<std::uint64_t>(uni_res2) << 32) + uni_res1;
534
- }
535
- }
536
-
537
- void skip_ahead(std::uint64_t num_to_skip) {
538
- detail::philox4x32x10_impl::skip_ahead(this->state_, num_to_skip);
539
- }
540
-
541
- void skip_ahead(std::initializer_list<std::uint64_t> num_to_skip) {
542
- detail::philox4x32x10_impl::skip_ahead(this->state_, num_to_skip.size(),
543
- num_to_skip.begin());
544
- }
545
-
546
- engine_state<oneapi::mkl::rng::device::philox4x32x10<VecSize>> state_;
547
- };
548
-
549
- } // namespace detail
550
- } // namespace oneapi::mkl::rng::device
551
-
552
- #endif // _MKL_RNG_DEVICE_PHILOX4X32X10_IMPL_HPP_