mkl-devel-dpcpp 2024.2.1__py2.py3-none-win_amd64.whl → 2025.0.0__py2.py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mkl-devel-dpcpp might be problematic. Click here for more details.

Files changed (97) hide show
  1. mkl_devel_dpcpp-2024.2.1.data/data/Library/include/oneapi/mkl/dfti.hpp → mkl_devel_dpcpp-2025.0.0.data/data/Library/include/oneapi/mkl/dft.hpp +82 -36
  2. mkl_devel_dpcpp-2025.0.0.data/data/Library/include/oneapi/mkl/dfti.hpp +22 -0
  3. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/lapack/lapack.hpp +90 -90
  4. mkl_devel_dpcpp-2025.0.0.data/data/Library/include/oneapi/mkl/rng/device/detail/beta_impl.hpp +464 -0
  5. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/distribution_base.hpp +8 -0
  6. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/exponential_impl.hpp +7 -0
  7. mkl_devel_dpcpp-2025.0.0.data/data/Library/include/oneapi/mkl/rng/device/detail/gamma_impl.hpp +285 -0
  8. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_impl.hpp +4 -12
  9. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_impl.hpp +8 -20
  10. mkl_devel_dpcpp-2025.0.0.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_impl.hpp +289 -0
  11. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/vm_wrappers.hpp +42 -0
  12. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/distributions.hpp +172 -15
  13. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/engines.hpp +1 -7
  14. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/types.hpp +12 -0
  15. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/distributions.hpp +11 -80
  16. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/spblas/sparse_auxiliary.hpp +43 -0
  17. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/spblas/sparse_operations.hpp +69 -6
  18. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/spblas/sparse_structures.hpp +33 -34
  19. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl.hpp +1 -1
  20. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/lib/mkl_sycl.lib +0 -0
  21. mkl_devel_dpcpp-2025.0.0.data/data/Library/lib/mkl_sycl_blas_dll.lib +0 -0
  22. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/lib/mkl_sycl_data_fitting_dll.lib +0 -0
  23. mkl_devel_dpcpp-2025.0.0.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
  24. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/lib/mkl_sycl_dll.lib +0 -0
  25. mkl_devel_dpcpp-2025.0.0.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
  26. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/lib/mkl_sycl_rng_dll.lib +0 -0
  27. mkl_devel_dpcpp-2025.0.0.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
  28. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/lib/mkl_sycl_stats_dll.lib +0 -0
  29. mkl_devel_dpcpp-2025.0.0.data/data/Library/lib/mkl_sycl_vm_dll.lib +0 -0
  30. {mkl_devel_dpcpp-2024.2.1.dist-info → mkl_devel_dpcpp-2025.0.0.dist-info}/METADATA +3 -3
  31. mkl_devel_dpcpp-2025.0.0.dist-info/RECORD +90 -0
  32. mkl_devel_dpcpp-2024.2.1.data/data/Library/include/oneapi/mkl/rng/device/detail/uniform_impl.hpp +0 -131
  33. mkl_devel_dpcpp-2024.2.1.data/data/Library/lib/mkl_sycl_blas_dll.lib +0 -0
  34. mkl_devel_dpcpp-2024.2.1.data/data/Library/lib/mkl_sycl_dft_dll.lib +0 -0
  35. mkl_devel_dpcpp-2024.2.1.data/data/Library/lib/mkl_sycl_lapack_dll.lib +0 -0
  36. mkl_devel_dpcpp-2024.2.1.data/data/Library/lib/mkl_sycl_sparse_dll.lib +0 -0
  37. mkl_devel_dpcpp-2024.2.1.data/data/Library/lib/mkl_sycl_vm_dll.lib +0 -0
  38. mkl_devel_dpcpp-2024.2.1.dist-info/RECORD +0 -87
  39. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/bfloat16.hpp +0 -0
  40. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/blas/buffer.hpp +0 -0
  41. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/blas/buffer_decls.hpp +0 -0
  42. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/blas/types.hpp +0 -0
  43. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/blas/usm.hpp +0 -0
  44. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/blas/usm_decls.hpp +0 -0
  45. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/blas.hpp +0 -0
  46. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/exceptions.hpp +0 -0
  47. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/experimental/data_fitting/interpolate.hpp +0 -0
  48. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/experimental/data_fitting/spline_and_data_params.hpp +0 -0
  49. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/experimental/data_fitting/splines.hpp +0 -0
  50. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/experimental/data_fitting.hpp +0 -0
  51. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/export.hpp +0 -0
  52. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/lapack/concepts.hpp +0 -0
  53. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/lapack/exceptions.hpp +0 -0
  54. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/lapack/scratchpad.hpp +0 -0
  55. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/lapack.hpp +0 -0
  56. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/detail/engine_base.hpp +0 -0
  57. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/bernoulli_impl.hpp +0 -0
  58. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/bits_impl.hpp +0 -0
  59. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/engine_base.hpp +0 -0
  60. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/engine_helpers_base.hpp +0 -0
  61. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/gaussian_impl.hpp +0 -0
  62. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/lognormal_impl.hpp +0 -0
  63. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/mcg31m1_helpers_impl.hpp +0 -0
  64. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/mcg59_helpers_impl.hpp +0 -0
  65. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_helpers_impl.hpp +0 -0
  66. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp +0 -0
  67. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp +0 -0
  68. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_helpers_impl.hpp +0 -0
  69. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp +0 -0
  70. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/poisson_impl.hpp +0 -0
  71. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/types.hpp +0 -0
  72. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/detail/uniform_bits_impl.hpp +0 -0
  73. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/engine_helpers.hpp +0 -0
  74. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device/functions.hpp +0 -0
  75. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/device.hpp +0 -0
  76. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/engines.hpp +0 -0
  77. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng/functions.hpp +0 -0
  78. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/rng.hpp +0 -0
  79. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/spblas.hpp +0 -0
  80. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/stats.hpp +0 -0
  81. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/types.hpp +0 -0
  82. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/buffer.hpp +0 -0
  83. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/decls.hpp +0 -0
  84. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/device/detail/decls.hpp +0 -0
  85. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/device/detail/dispatch.hpp +0 -0
  86. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/device/detail/ep.hpp +0 -0
  87. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/device/detail/ha.hpp +0 -0
  88. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/device/detail/la.hpp +0 -0
  89. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/device/detail/rts.hpp +0 -0
  90. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/device/detail/scalar.hpp +0 -0
  91. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/device/vm.hpp +0 -0
  92. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/span.hpp +0 -0
  93. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm/usm.hpp +0 -0
  94. {mkl_devel_dpcpp-2024.2.1.data → mkl_devel_dpcpp-2025.0.0.data}/data/Library/include/oneapi/mkl/vm.hpp +0 -0
  95. {mkl_devel_dpcpp-2024.2.1.dist-info → mkl_devel_dpcpp-2025.0.0.dist-info}/LICENSE.txt +0 -0
  96. {mkl_devel_dpcpp-2024.2.1.dist-info → mkl_devel_dpcpp-2025.0.0.dist-info}/WHEEL +0 -0
  97. {mkl_devel_dpcpp-2024.2.1.dist-info → mkl_devel_dpcpp-2025.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,285 @@
1
+ /*******************************************************************************
2
+ * Copyright 2024 Intel Corporation
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing,
11
+ * software distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions
14
+ * and limitations under the License.
15
+ *
16
+ *
17
+ * SPDX-License-Identifier: Apache-2.0
18
+ *******************************************************************************/
19
+
20
+ #ifndef _MKL_RNG_DEVICE_GAMMA_IMPL_HPP_
21
+ #define _MKL_RNG_DEVICE_GAMMA_IMPL_HPP_
22
+
23
+ #include "vm_wrappers.hpp"
24
+
25
+ namespace oneapi::mkl::rng::device::detail {
26
+
27
+ enum class gamma_algorithm { Exponential = 0, Vaduva, EPD_Transform, Marsaglia };
28
+
29
+ // 1/3
30
+ template <typename DataType>
31
+ inline DataType gamma_c1() {
32
+ if constexpr (std::is_same_v<DataType, double>)
33
+ return 0x1.5555555555555p-2;
34
+ else
35
+ return 0x1.555556p-2f;
36
+ }
37
+
38
+ // 0.0331
39
+ template <typename DataType>
40
+ inline DataType gamma_c2() {
41
+ if constexpr (std::is_same_v<DataType, double>)
42
+ return 0x1.0f27bb2fec56dp-5;
43
+ else
44
+ return 0x1.0f27bcp-5f;
45
+ }
46
+
47
+ // 0.6
48
+ template <typename DataType>
49
+ inline DataType gamma_c06() {
50
+ if constexpr (std::is_same_v<DataType, double>)
51
+ return 0x1.3333333333333p-1;
52
+ else
53
+ return 0x1.333334p-1f;
54
+ }
55
+
56
+ template <typename RealType, typename Method>
57
+ class distribution_base<oneapi::mkl::rng::device::gamma<RealType, Method>> {
58
+ public:
59
+ struct param_type {
60
+ param_type(RealType alpha, RealType a, RealType beta) : alpha_(alpha), a_(a), beta_(beta) {}
61
+ RealType alpha_;
62
+ RealType a_;
63
+ RealType beta_;
64
+ };
65
+
66
+ distribution_base(RealType alpha, RealType a, RealType beta)
67
+ : alpha_(alpha),
68
+ a_(a),
69
+ beta_(beta),
70
+ count_(0) {
71
+ set_algorithm();
72
+ #ifndef __SYCL_DEVICE_ONLY__
73
+ if (alpha <= RealType(0.0)) {
74
+ throw oneapi::mkl::invalid_argument("rng", "gamma", "alpha <= 0");
75
+ }
76
+ else if (beta <= RealType(0.0)) {
77
+ throw oneapi::mkl::invalid_argument("rng", "gamma", "beta <= 0");
78
+ }
79
+ #endif
80
+ }
81
+
82
+ RealType alpha() const {
83
+ return alpha_;
84
+ }
85
+
86
+ RealType a() const {
87
+ return a_;
88
+ }
89
+
90
+ RealType beta() const {
91
+ return beta_;
92
+ }
93
+
94
+ std::size_t count_rejected_numbers() const {
95
+ return count_;
96
+ }
97
+
98
+ param_type param() const {
99
+ return param_type(alpha_, a_, beta_);
100
+ }
101
+
102
+ void param(const param_type& pt) {
103
+ #ifndef __SYCL_DEVICE_ONLY__
104
+ if (pt.alpha_ <= RealType(0.0)) {
105
+ throw oneapi::mkl::invalid_argument("rng", "gamma", "alpha <= 0");
106
+ }
107
+ else if (pt.beta_ <= RealType(0.0)) {
108
+ throw oneapi::mkl::invalid_argument("rng", "gamma", "beta <= 0");
109
+ }
110
+ #endif
111
+ alpha_ = pt.alpha_;
112
+ a_ = pt.a_;
113
+ beta_ = pt.beta_;
114
+ set_algorithm();
115
+ }
116
+
117
+ protected:
118
+ void set_algorithm() {
119
+ if (alpha_ <= RealType(1.0)) {
120
+ if (alpha_ == RealType(1.0)) {
121
+ algorithm_ = gamma_algorithm::Exponential;
122
+ }
123
+ else if (alpha_ > gamma_c06<RealType>()) {
124
+ algorithm_ = gamma_algorithm::Vaduva;
125
+ }
126
+ else {
127
+ algorithm_ = gamma_algorithm::EPD_Transform;
128
+ }
129
+ }
130
+ else {
131
+ algorithm_ = gamma_algorithm::Marsaglia;
132
+ }
133
+ }
134
+
135
+ template <typename T, int vecSize>
136
+ inline std::pair<T, T> gauss_BM2_for_Marsaglia(const sycl::vec<T, vecSize>& vec) {
137
+ T tmp, sin, cos, gauss_1, gauss_2;
138
+ tmp = ln_wrapper(vec[0]);
139
+ tmp = sqrt_wrapper(T(-2.0) * tmp);
140
+ sin = sincospi_wrapper(T(2) * vec[2], cos);
141
+ gauss_1 = (tmp * sin);
142
+ gauss_2 = (tmp * cos);
143
+ return { gauss_1, gauss_2 };
144
+ }
145
+
146
+ template <std::int32_t n, typename T, typename EngineType>
147
+ T acc_rej_kernel(T& z, EngineType& engine) {
148
+ RealType flC, flD;
149
+ if (algorithm_ == gamma_algorithm::Vaduva) {
150
+ flC = RealType(1.0) / alpha_;
151
+ flD = (RealType(1.0) - alpha_) *
152
+ exp_wrapper(ln_wrapper(alpha_) * alpha_ / (RealType(1.0) - alpha_));
153
+ }
154
+ else if (algorithm_ == gamma_algorithm::EPD_Transform) {
155
+ flC = RealType(1.0) / alpha_;
156
+ flD = (RealType(1.0) - alpha_);
157
+ }
158
+ else if (algorithm_ == gamma_algorithm::Marsaglia) {
159
+ flD = alpha_ - gamma_c1<RealType>();
160
+ flC = sqrt_wrapper(RealType(1.0) / (RealType(9.0) * alpha_ - RealType(3.0)));
161
+ }
162
+
163
+ count_ = 0;
164
+ RealType z1, z2, z3, z4;
165
+ for (int i = 0; i < n; i++) {
166
+ while (1) { // looping until satisfied
167
+ if (!flag_) {
168
+ z1 = engine.generate_single(RealType(0), RealType(1));
169
+ z2 = engine.generate_single(RealType(0), RealType(1));
170
+ }
171
+
172
+ if (algorithm_ == gamma_algorithm::Vaduva) {
173
+ z1 = -ln_wrapper(z1);
174
+ z2 = -ln_wrapper(z2);
175
+ z[i] = powr_wrapper(z1, flC);
176
+ if (z1 + z2 >= z[i] + flD) {
177
+ break;
178
+ }
179
+ }
180
+ if (algorithm_ == gamma_algorithm::EPD_Transform) {
181
+ z2 = -ln_wrapper(z2);
182
+ if (z1 <= flD) {
183
+ z[i] = powr_wrapper(z1, flC);
184
+ if (z[i] <= z2) {
185
+ break;
186
+ }
187
+ }
188
+ else {
189
+ z1 = -ln_wrapper((RealType(1.0) - z1) * flC);
190
+ z[i] = powr_wrapper(flD + alpha_ * z1, flC);
191
+ if (z[i] <= z2 + z1) {
192
+ break;
193
+ }
194
+ }
195
+ }
196
+ if (algorithm_ == gamma_algorithm::Marsaglia) {
197
+ RealType local_uniform_2, local_gauss;
198
+ if (!flag_) {
199
+ z3 = engine.generate_single(RealType(0), RealType(1));
200
+ z4 = engine.generate_single(RealType(0), RealType(1));
201
+ auto gauss =
202
+ gauss_BM2_for_Marsaglia(sycl::vec<RealType, 4>{ z1, z2, z3, z4 });
203
+ local_uniform_2 = z2;
204
+ local_gauss = gauss.first;
205
+
206
+ saved_uniform_2_ = z4;
207
+ saved_gauss_ = gauss.second;
208
+ }
209
+ else {
210
+ local_uniform_2 = saved_uniform_2_;
211
+ local_gauss = saved_gauss_;
212
+ }
213
+ flag_ = !flag_;
214
+ z[i] = RealType(1.0) + flC * local_gauss;
215
+ if (z[i] > RealType(0.0)) {
216
+ z[i] = z[i] * z[i] * z[i];
217
+ local_gauss = local_gauss * local_gauss;
218
+ if (local_uniform_2 <
219
+ RealType(1.0) - gamma_c2<RealType>() * local_gauss * local_gauss) {
220
+ z[i] = flD * z[i];
221
+ break;
222
+ }
223
+ else {
224
+ RealType local_uniform_1 = ln_wrapper(z[i]);
225
+ local_uniform_2 = ln_wrapper(local_uniform_2);
226
+ if (local_uniform_2 <
227
+ RealType(0.5) * local_gauss +
228
+ flD * (RealType(1.0) - z[i] + local_uniform_1)) {
229
+ z[i] = flD * z[i];
230
+ break;
231
+ }
232
+ }
233
+ }
234
+ }
235
+ ++count_;
236
+ }
237
+ }
238
+ auto res = a_ + beta_ * z;
239
+ if constexpr (std::is_same_v<Method, gamma_method::marsaglia_accurate>) {
240
+ if (res < a_)
241
+ res = a_;
242
+ }
243
+ return res;
244
+ }
245
+
246
+ template <typename EngineType>
247
+ auto generate(EngineType& engine) ->
248
+ typename std::conditional<EngineType::vec_size == 1, RealType,
249
+ sycl::vec<RealType, EngineType::vec_size>>::type {
250
+ if (algorithm_ == gamma_algorithm::Exponential) {
251
+ distribution_base<oneapi::mkl::rng::device::exponential<RealType>> distr_exp(a_, beta_);
252
+ return distr_exp.generate(engine);
253
+ }
254
+ sycl::vec<RealType, EngineType::vec_size> res{};
255
+ res = acc_rej_kernel<EngineType::vec_size>(res, engine);
256
+
257
+ return res;
258
+ }
259
+
260
+ template <typename EngineType>
261
+ RealType generate_single(EngineType& engine) {
262
+ if (algorithm_ == gamma_algorithm::Exponential) {
263
+ distribution_base<oneapi::mkl::rng::device::exponential<RealType>> distr_exp(a_, beta_);
264
+ RealType z = distr_exp.generate_single(engine);
265
+ return z;
266
+ }
267
+ sycl::vec<RealType, 1> res{};
268
+ res = acc_rej_kernel<1>(res, engine);
269
+
270
+ return res[0];
271
+ }
272
+
273
+ RealType alpha_;
274
+ RealType a_;
275
+ RealType beta_;
276
+ RealType saved_gauss_;
277
+ RealType saved_uniform_2_;
278
+ bool flag_ = false;
279
+ std::size_t count_;
280
+ gamma_algorithm algorithm_;
281
+ };
282
+
283
+ } // namespace oneapi::mkl::rng::device::detail
284
+
285
+ #endif // _MKL_RNG_DEVICE_GAMMA_IMPL_HPP_
@@ -138,14 +138,10 @@ static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::mcg31m1<Vec
138
138
 
139
139
  template <std::int32_t VecSize>
140
140
  static inline void init(engine_state<oneapi::mkl::rng::device::mcg31m1<VecSize>>& state,
141
- std::uint64_t n, const std::uint32_t* seed_ptr, std::uint64_t offset) {
142
- if (n == 0)
141
+ std::uint32_t seed, std::uint64_t offset) {
142
+ state.s = custom_mod<std::uint32_t>(seed);
143
+ if (state.s == 0)
143
144
  state.s = 1;
144
- else {
145
- state.s = custom_mod<std::uint32_t>(seed_ptr[0]);
146
- if (state.s == 0)
147
- state.s = 1;
148
- }
149
145
  skip_ahead(state, offset);
150
146
  }
151
147
 
@@ -173,11 +169,7 @@ template <std::int32_t VecSize>
173
169
  class engine_base<oneapi::mkl::rng::device::mcg31m1<VecSize>> {
174
170
  protected:
175
171
  engine_base(std::uint32_t seed, std::uint64_t offset = 0) {
176
- mcg31m1_impl::init(this->state_, 1, &seed, offset);
177
- }
178
-
179
- engine_base(std::uint64_t n, const std::uint32_t* seed, std::uint64_t offset = 0) {
180
- mcg31m1_impl::init(this->state_, n, seed, offset);
172
+ mcg31m1_impl::init(this->state_, seed, offset);
181
173
  }
182
174
 
183
175
  template <typename RealType>
@@ -107,16 +107,8 @@ static inline void skip_ahead(engine_state<oneapi::mkl::rng::device::mcg59<VecSi
107
107
 
108
108
  template <std::int32_t VecSize>
109
109
  static inline void init(engine_state<oneapi::mkl::rng::device::mcg59<VecSize>>& state,
110
- std::uint64_t n, std::uint32_t* seed_ptr, std::uint64_t offset) {
111
- if (n < 1) {
112
- state.s = 1;
113
- }
114
- else if (n == 1) {
115
- state.s = static_cast<uint64_t>(seed_ptr[0]) & mcg59_param::m_64;
116
- }
117
- else {
118
- state.s = *(reinterpret_cast<std::uint64_t*>(&seed_ptr[0])) & mcg59_param::m_64;
119
- }
110
+ std::uint64_t seed, std::uint64_t offset) {
111
+ state.s = seed & mcg59_param::m_64;
120
112
  if (state.s == 0)
121
113
  state.s = 1;
122
114
 
@@ -145,12 +137,8 @@ static inline std::uint64_t generate_single(
145
137
  template <std::int32_t VecSize>
146
138
  class engine_base<oneapi::mkl::rng::device::mcg59<VecSize>> {
147
139
  protected:
148
- engine_base(std::uint32_t seed, std::uint64_t offset = 0) {
149
- mcg59_impl::init(this->state_, 1, &seed, offset);
150
- }
151
-
152
- engine_base(std::uint64_t n, const std::uint32_t* seed, std::uint64_t offset = 0) {
153
- mcg59_impl::init(this->state_, n, seed, offset);
140
+ engine_base(std::uint64_t seed, std::uint64_t offset = 0) {
141
+ mcg59_impl::init(this->state_, seed, offset);
154
142
  }
155
143
 
156
144
  template <typename RealType>
@@ -168,7 +156,7 @@ protected:
168
156
 
169
157
  auto generate() -> typename std::conditional<VecSize == 1, std::uint32_t,
170
158
  sycl::vec<std::uint32_t, VecSize>>::type {
171
- return mcg59_impl::generate(this->state_);
159
+ return mcg59_impl::generate(this->state_).template convert<std::uint32_t>();
172
160
  }
173
161
 
174
162
  auto generate_bits() -> typename std::conditional<VecSize == 1, std::uint64_t,
@@ -200,10 +188,10 @@ protected:
200
188
  auto uni_res2 = mcg59_impl::generate(this->state_);
201
189
 
202
190
  if constexpr (VecSize == 1) {
203
- uni_res1 >>= 27;
204
- uni_res2 >>= 27;
191
+ uni_res1 >>= UIntType(27);
192
+ uni_res2 >>= UIntType(27);
205
193
 
206
- return (uni_res2 << 32) + uni_res1;
194
+ return (uni_res2 << UIntType(32)) + uni_res1;
207
195
  }
208
196
  else {
209
197
  sycl::vec<std::uint64_t, VecSize> vec_out;
@@ -0,0 +1,289 @@
1
+ /*******************************************************************************
2
+ * Copyright 2020 Intel Corporation
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing,
11
+ * software distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions
14
+ * and limitations under the License.
15
+ *
16
+ *
17
+ * SPDX-License-Identifier: Apache-2.0
18
+ *******************************************************************************/
19
+
20
+ #ifndef _MKL_RNG_DEVICE_UNIFORM_IMPL_HPP_
21
+ #define _MKL_RNG_DEVICE_UNIFORM_IMPL_HPP_
22
+
23
+ #include <limits>
24
+
25
+ #include "engine_base.hpp"
26
+
27
+ namespace oneapi::mkl::rng::device::detail {
28
+
29
+ static inline std::uint64_t umul_hi_64(const std::uint64_t a, const std::uint64_t b) {
30
+ const std::uint64_t a_lo = a & 0xFFFFFFFFULL;
31
+ const std::uint64_t a_hi = a >> 32;
32
+ const std::uint64_t b_lo = b & 0xFFFFFFFFULL;
33
+ const std::uint64_t b_hi = b >> 32;
34
+
35
+ const std::uint64_t ab_hi = a_hi * b_hi;
36
+ const std::uint64_t ab_lo = a_lo * b_lo;
37
+ const std::uint64_t ab_md = a_hi * b_lo;
38
+ const std::uint64_t ba_md = b_hi * a_lo;
39
+
40
+ const std::uint64_t bias = ((ab_md & 0xFFFFFFFFULL) + (ba_md & 0xFFFFFFFFULL) + (ab_lo >> 32)) >> 32;
41
+
42
+ return ab_hi + (ab_md >> 32) + (ba_md >> 32) + bias;
43
+ }
44
+
45
+ template <typename EngineType, typename Generator>
46
+ static inline void generate_leftover(std::uint64_t range, Generator generate,
47
+ std::uint64_t& res_64, std::uint64_t& leftover) {
48
+ if constexpr (std::is_same_v<EngineType, mcg31m1<EngineType::vec_size>>) {
49
+ std::uint32_t res_1 = generate();
50
+ std::uint32_t res_2 = generate();
51
+ std::uint32_t res_3 = generate();
52
+ res_64 = (static_cast<std::uint64_t>(res_3) << 62) +
53
+ (static_cast<std::uint64_t>(res_2) << 31) + res_1;
54
+ }
55
+ else {
56
+ std::uint32_t res_1 = generate();
57
+ std::uint32_t res_2 = generate();
58
+ res_64 = (static_cast<std::uint64_t>(res_2) << 32) + res_1;
59
+ }
60
+
61
+ leftover = res_64 * range;
62
+ }
63
+
64
+ template <typename Type, typename Method>
65
+ class distribution_base<oneapi::mkl::rng::device::uniform<Type, Method>> {
66
+ public:
67
+ struct param_type {
68
+ param_type(Type a, Type b) : a_(a), b_(b) {}
69
+ Type a_;
70
+ Type b_;
71
+ };
72
+
73
+ distribution_base(Type a, Type b) : a_(a), b_(b) {
74
+ #ifndef __SYCL_DEVICE_ONLY__
75
+ if (a >= b) {
76
+ throw oneapi::mkl::invalid_argument("rng", "uniform", "a >= b");
77
+ }
78
+ #endif
79
+ }
80
+
81
+ Type a() const {
82
+ return a_;
83
+ }
84
+
85
+ Type b() const {
86
+ return b_;
87
+ }
88
+
89
+ param_type param() const {
90
+ return param_type(a_, b_);
91
+ }
92
+
93
+ void param(const param_type& pt) {
94
+ #ifndef __SYCL_DEVICE_ONLY__
95
+ if (pt.a_ >= pt.b_) {
96
+ throw oneapi::mkl::invalid_argument("rng", "uniform", "a >= b");
97
+ }
98
+ #endif
99
+ a_ = pt.a_;
100
+ b_ = pt.b_;
101
+ }
102
+
103
+ protected:
104
+ template <typename FpType, typename OutType, typename EngineType>
105
+ OutType generate_single_int(EngineType& engine) {
106
+ sycl::vec<FpType, EngineType::vec_size> res_fp;
107
+ res_fp = engine.generate(static_cast<FpType>(a_), static_cast<FpType>(b_));
108
+ res_fp = sycl::floor(res_fp);
109
+ OutType res = res_fp.template convert<Type>();
110
+ return res;
111
+ }
112
+
113
+ template <typename EngineType>
114
+ auto generate(EngineType& engine) ->
115
+ typename std::conditional<EngineType::vec_size == 1, Type,
116
+ sycl::vec<Type, EngineType::vec_size>>::type {
117
+ using OutType = typename std::conditional<EngineType::vec_size == 1, Type,
118
+ sycl::vec<Type, EngineType::vec_size>>::type;
119
+ using FpType =
120
+ typename std::conditional<std::is_same<Method, uniform_method::accurate>::value, double,
121
+ float>::type;
122
+ OutType res;
123
+ if constexpr (std::is_integral<Type>::value) {
124
+ if constexpr (std::is_same_v<Type, std::int32_t> || std::is_same_v<Type, std::uint32_t>) {
125
+ return generate_single_int<FpType, OutType>(engine);
126
+ }
127
+ else {
128
+ // Lemire's sample rejection method to exclude bias for uniform numbers
129
+ // https://arxiv.org/abs/1805.10941
130
+
131
+ constexpr std::uint64_t uint_max64 = std::numeric_limits<std::uint64_t>::max();
132
+ constexpr std::uint64_t uint_max32 = std::numeric_limits<std::uint32_t>::max();
133
+
134
+ std::uint64_t range = b_ - a_;
135
+ std::uint64_t threshold = (uint_max64 - range) % range;
136
+
137
+ if (range <= uint_max32)
138
+ return generate_single_int<FpType, OutType>(engine);
139
+
140
+ if constexpr (EngineType::vec_size == 1) {
141
+ std::uint32_t res_1, res_2;
142
+ std::uint64_t res_64, leftover;
143
+
144
+ generate_leftover<EngineType>(range, [&engine](){return engine.generate();},
145
+ res_64, leftover);
146
+
147
+ if (range == uint_max64)
148
+ return res_64;
149
+
150
+ while (leftover < threshold) {
151
+ generate_leftover<EngineType>(range, [&engine](){return engine.generate();},
152
+ res_64, leftover);
153
+ }
154
+
155
+ res = a_ + umul_hi_64(res_64, range);
156
+
157
+ return res;
158
+ }
159
+ else {
160
+ std::uint64_t leftover;
161
+
162
+ sycl::vec<std::uint32_t, EngineType::vec_size> res_1 = engine.generate();
163
+ sycl::vec<std::uint32_t, EngineType::vec_size> res_2 = engine.generate();
164
+ sycl::vec<std::uint64_t, EngineType::vec_size> res_64;
165
+
166
+ if constexpr (std::is_same_v<EngineType, mcg31m1<EngineType::vec_size>>) {
167
+ sycl::vec<std::uint32_t, EngineType::vec_size> res_3 = engine.generate();
168
+
169
+ for (int i = 0; i < EngineType::vec_size; i++) {
170
+ res_64[i] = (static_cast<std::uint64_t>(res_3[i]) << 62) +
171
+ (static_cast<std::uint64_t>(res_2[i]) << 31) + res_1[i];
172
+ }
173
+ }
174
+ else {
175
+ if constexpr (EngineType::vec_size == 3) {
176
+ res_64[0] = (static_cast<std::uint64_t>(res_1[1]) << 32) +
177
+ static_cast<std::uint64_t>(res_1[0]);
178
+ res_64[1] = (static_cast<std::uint64_t>(res_2[0]) << 32) +
179
+ static_cast<std::uint64_t>(res_1[2]);
180
+ res_64[2] = (static_cast<std::uint64_t>(res_2[2]) << 32) +
181
+ static_cast<std::uint64_t>(res_2[1]);
182
+ } else {
183
+ for (int i = 0; i < EngineType::vec_size / 2; i++) {
184
+ res_64[i] = (static_cast<std::uint64_t>(res_1[2 * i + 1]) << 32) +
185
+ static_cast<std::uint64_t>(res_1[2 * i]);
186
+ res_64[i + EngineType::vec_size / 2] = (static_cast<std::uint64_t>(res_2[2 * i + 1]) << 32) +
187
+ static_cast<std::uint64_t>(res_2[2 * i]);
188
+ }
189
+ }
190
+ }
191
+
192
+ if (range == uint_max64)
193
+ return res_64.template convert<Type>();
194
+
195
+ for (int i = 0; i < EngineType::vec_size; i++) {
196
+ leftover = res_64[i] * range;
197
+
198
+ while (leftover < threshold) {
199
+ generate_leftover<EngineType>(range, [&engine](){return engine.generate_single();},
200
+ res_64[i], leftover);
201
+ }
202
+
203
+ res[i] = a_ + umul_hi_64(res_64[i], range);
204
+ }
205
+
206
+ return res;
207
+ }
208
+ }
209
+ }
210
+ else {
211
+ res = engine.generate(a_, b_);
212
+ if constexpr (std::is_same<Method, uniform_method::accurate>::value) {
213
+ res = sycl::fmax(res, a_);
214
+ res = sycl::fmin(res, b_);
215
+ }
216
+ }
217
+
218
+ return res;
219
+ }
220
+
221
+ template <typename EngineType>
222
+ Type generate_single(EngineType& engine) {
223
+ using FpType =
224
+ typename std::conditional<std::is_same<Method, uniform_method::accurate>::value, double,
225
+ float>::type;
226
+ Type res;
227
+ if constexpr (std::is_integral<Type>::value) {
228
+ if constexpr (std::is_same_v<Type, std::int32_t> || std::is_same_v<Type, std::uint32_t>) {
229
+ FpType res_fp =
230
+ engine.generate_single(static_cast<FpType>(a_), static_cast<FpType>(b_));
231
+ res_fp = sycl::floor(res_fp);
232
+ res = static_cast<Type>(res_fp);
233
+ return res;
234
+ }
235
+ else {
236
+ // Lemire's sample rejection method to exclude bias for uniform numbers
237
+ // https://arxiv.org/abs/1805.10941
238
+
239
+ constexpr std::uint64_t uint_max64 = std::numeric_limits<std::uint64_t>::max();
240
+ constexpr std::uint64_t uint_max32 = std::numeric_limits<std::uint32_t>::max();
241
+
242
+ std::uint64_t range = b_ - a_;
243
+ std::uint64_t threshold = (uint_max64 - range) % range;
244
+
245
+ if (range <= uint_max32) {
246
+ FpType res_fp =
247
+ engine.generate_single(static_cast<FpType>(a_), static_cast<FpType>(b_));
248
+ res_fp = sycl::floor(res_fp);
249
+ res = static_cast<Type>(res_fp);
250
+ return res;
251
+ }
252
+
253
+ std::uint32_t res_1, res_2;
254
+ std::uint64_t res_64, leftover;
255
+
256
+ generate_leftover<EngineType>(range, [&engine](){return engine.generate_single();},
257
+ res_64, leftover);
258
+
259
+ if (range == uint_max64)
260
+ return res_64;
261
+
262
+ while (leftover < threshold) {
263
+ generate_leftover<EngineType>(range, [&engine](){return engine.generate_single();},
264
+ res_64, leftover);
265
+ }
266
+
267
+ res = a_ + umul_hi_64(res_64, range);
268
+
269
+ return res;
270
+ }
271
+ }
272
+ else {
273
+ res = engine.generate_single(a_, b_);
274
+ if constexpr (std::is_same<Method, uniform_method::accurate>::value) {
275
+ res = sycl::fmax(res, a_);
276
+ res = sycl::fmin(res, b_);
277
+ }
278
+ }
279
+
280
+ return res;
281
+ }
282
+
283
+ Type a_;
284
+ Type b_;
285
+ };
286
+
287
+ } // namespace oneapi::mkl::rng::device::detail
288
+
289
+ #endif // _MKL_RNG_DEVICE_UNIFORM_IMPL_HPP_