mkl-devel-dpcpp 2025.0.0__py2.py3-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkl-devel-dpcpp might be problematic. Click here for more details.
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/bfloat16.hpp +26 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/blas/buffer.hpp +42 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/blas/buffer_decls.hpp +880 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/blas/types.hpp +60 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/blas/usm.hpp +42 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/blas/usm_decls.hpp +1240 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/blas.hpp +33 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/dft.hpp +253 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/dfti.hpp +22 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/exceptions.hpp +110 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/experimental/data_fitting/interpolate.hpp +67 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/experimental/data_fitting/spline_and_data_params.hpp +68 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/experimental/data_fitting/splines.hpp +177 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/experimental/data_fitting.hpp +22 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/export.hpp +25 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/lapack/concepts.hpp +55 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/lapack/exceptions.hpp +75 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/lapack/lapack.hpp +1095 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/lapack/scratchpad.hpp +106 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/lapack.hpp +23 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/detail/engine_base.hpp +48 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/bernoulli_impl.hpp +89 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/beta_impl.hpp +464 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/bits_impl.hpp +71 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/distribution_base.hpp +81 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/engine_base.hpp +43 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/engine_helpers_base.hpp +54 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/exponential_impl.hpp +116 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/gamma_impl.hpp +285 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/gaussian_impl.hpp +270 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/lognormal_impl.hpp +105 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/mcg31m1_helpers_impl.hpp +117 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/mcg31m1_impl.hpp +223 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/mcg59_helpers_impl.hpp +118 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/mcg59_impl.hpp +266 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/mrg32k3a_helpers_impl.hpp +125 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/mrg32k3a_impl.hpp +385 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/mrg32k3a_skip_ahead_matrix.hpp +3668 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/philox4x32x10_helpers_impl.hpp +141 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/philox4x32x10_impl.hpp +552 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/poisson_impl.hpp +355 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/types.hpp +58 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/uniform_bits_impl.hpp +51 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/uniform_impl.hpp +289 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/detail/vm_wrappers.hpp +183 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/distributions.hpp +637 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/engine_helpers.hpp +116 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/engines.hpp +187 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/functions.hpp +59 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device/types.hpp +74 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/device.hpp +29 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/distributions.hpp +1913 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/engines.hpp +788 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng/functions.hpp +163 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/rng.hpp +22 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/spblas/sparse_auxiliary.hpp +111 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/spblas/sparse_operations.hpp +446 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/spblas/sparse_structures.hpp +193 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/spblas.hpp +32 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/stats.hpp +356 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/types.hpp +321 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/buffer.hpp +3529 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/decls.hpp +280 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/device/detail/decls.hpp +81 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/device/detail/dispatch.hpp +1059 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/device/detail/ep.hpp +861 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/device/detail/ha.hpp +860 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/device/detail/la.hpp +860 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/device/detail/rts.hpp +4608 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/device/detail/scalar.hpp +8963 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/device/vm.hpp +460 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/span.hpp +3813 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm/usm.hpp +3581 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl/vm.hpp +30 -0
- mkl_devel_dpcpp-2025.0.0.data/data/include/oneapi/mkl.hpp +34 -0
- mkl_devel_dpcpp-2025.0.0.data/data/lib/libmkl_sycl.a +0 -0
- mkl_devel_dpcpp-2025.0.0.data/data/lib/libmkl_sycl.so +1 -0
- mkl_devel_dpcpp-2025.0.0.dist-info/LICENSE.txt +25 -0
- mkl_devel_dpcpp-2025.0.0.dist-info/METADATA +24 -0
- mkl_devel_dpcpp-2025.0.0.dist-info/RECORD +82 -0
- mkl_devel_dpcpp-2025.0.0.dist-info/WHEEL +6 -0
- mkl_devel_dpcpp-2025.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
/*******************************************************************************
|
|
2
|
+
* Copyright 2021 Intel Corporation
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing,
|
|
11
|
+
* software distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions
|
|
14
|
+
* and limitations under the License.
|
|
15
|
+
*
|
|
16
|
+
*
|
|
17
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
18
|
+
*******************************************************************************/
|
|
19
|
+
|
|
20
|
+
#ifndef _MKL_RNG_DEVICE_POISSON_IMPL_HPP_
|
|
21
|
+
#define _MKL_RNG_DEVICE_POISSON_IMPL_HPP_
|
|
22
|
+
|
|
23
|
+
#include <limits>
|
|
24
|
+
|
|
25
|
+
namespace oneapi::mkl::rng::device::detail {
|
|
26
|
+
|
|
27
|
+
// Implementation of Poisson distribution uses 3 methods depending on lambda parameter:
|
|
28
|
+
// - table-lookup method [1] for small lambdas (lambda < 60)
|
|
29
|
+
// - Devroye's method [2] for medium lambdas (60 <= lambda < 1000)
|
|
30
|
+
// - Gaussian approximation [1] for huge lambdas (lambda >= 1000)
|
|
31
|
+
//
|
|
32
|
+
// References:
|
|
33
|
+
// [1] Michael B. Giles // Algorithm 955: approximation of the inverse Poisson cumulative
|
|
34
|
+
// distribution function
|
|
35
|
+
// [2] Devroye, L. Non-Uniform Random Variates Generation. Springer-Verlag,
|
|
36
|
+
// New York, 1986, Ch. X, Sects. 3.3 & 3.4 + Errata
|
|
37
|
+
|
|
38
|
+
#define RNG_POISSON_LAMBDA_HUGE_BOUND 1000.0
|
|
39
|
+
#define RNG_POISSON_LAMBDA_LOW_BOUND 60.0
|
|
40
|
+
#define RNG_POISSON_N_PRECOMPUTED_CDF 32
|
|
41
|
+
|
|
42
|
+
struct poisson_parameters {
|
|
43
|
+
void set_lambda(double lambda) {
|
|
44
|
+
if (lambda >= RNG_POISSON_LAMBDA_HUGE_BOUND) {
|
|
45
|
+
sqrt_lambda_ = sycl::sqrt(lambda);
|
|
46
|
+
}
|
|
47
|
+
else if (lambda >= RNG_POISSON_LAMBDA_LOW_BOUND) {
|
|
48
|
+
floored_lambda_ = sycl::floor(lambda);
|
|
49
|
+
log_lambda_ = sycl::log(lambda);
|
|
50
|
+
lgamma_floored_lambda_ = sycl::lgamma(floored_lambda_ + 1.0);
|
|
51
|
+
sqrt_floored_lambda_ = sycl::sqrt(floored_lambda_);
|
|
52
|
+
dx_ = sycl::sqrt(2.0 * floored_lambda_ * sycl::log(32.0 * floored_lambda_ / pi_4_));
|
|
53
|
+
delta_ = sycl::round((sycl::max)(6.0, (sycl::min)(floored_lambda_, dx_)));
|
|
54
|
+
dpdfl_ = delta_ + 2.0 * floored_lambda_;
|
|
55
|
+
sqrt_half_dpdfl_ = sycl::sqrt(dpdfl_ / 2.0);
|
|
56
|
+
inv_dpdfl_ = 1.0 / dpdfl_;
|
|
57
|
+
c2_add_coeff_ = sycl::sqrt(pi_4_ * dpdfl_) * sycl::exp(inv_dpdfl_);
|
|
58
|
+
c_add_coeff_ =
|
|
59
|
+
2.0 * dpdfl_ * sycl::exp(-delta_ * inv_dpdfl_ * (1.0 + delta_ / 2.0)) / delta_;
|
|
60
|
+
c1_ = sqrt_floored_lambda_ * spi_2_;
|
|
61
|
+
c2_ = c2_add_coeff_ + c1_;
|
|
62
|
+
c3_ = c2_ + 1.0;
|
|
63
|
+
c4_ = c2_ + 2.0;
|
|
64
|
+
c5_ = c4_ + exp_one_by_78;
|
|
65
|
+
c_ = c5_ + c_add_coeff_;
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
prob[0] = sycl::exp(-lambda);
|
|
69
|
+
double tmp = prob[0];
|
|
70
|
+
for (int i = 1; i < RNG_POISSON_N_PRECOMPUTED_CDF; ++i) {
|
|
71
|
+
tmp *= lambda / (double)i;
|
|
72
|
+
prob[i] = prob[i - 1] + tmp;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
poisson_parameters& operator=(const poisson_parameters& other) {
|
|
78
|
+
if (this == &other) {
|
|
79
|
+
return *this;
|
|
80
|
+
}
|
|
81
|
+
for (int i = 0; i < RNG_POISSON_N_PRECOMPUTED_CDF; i++) {
|
|
82
|
+
prob[i] = other.prob[i];
|
|
83
|
+
}
|
|
84
|
+
floored_lambda_ = other.floored_lambda_;
|
|
85
|
+
log_lambda_ = other.log_lambda_;
|
|
86
|
+
lgamma_floored_lambda_ = other.lgamma_floored_lambda_;
|
|
87
|
+
sqrt_lambda_ = other.sqrt_lambda_;
|
|
88
|
+
sqrt_floored_lambda_ = other.sqrt_floored_lambda_;
|
|
89
|
+
dx_ = other.dx_;
|
|
90
|
+
delta_ = other.delta_;
|
|
91
|
+
dpdfl_ = other.dpdfl_;
|
|
92
|
+
sqrt_half_dpdfl_ = other.sqrt_half_dpdfl_;
|
|
93
|
+
inv_dpdfl_ = other.inv_dpdfl_;
|
|
94
|
+
c2_add_coeff_ = other.c2_add_coeff_;
|
|
95
|
+
c_add_coeff_ = other.c_add_coeff_;
|
|
96
|
+
c1_ = other.c1_;
|
|
97
|
+
c2_ = other.c2_;
|
|
98
|
+
c3_ = other.c3_;
|
|
99
|
+
c4_ = other.c4_;
|
|
100
|
+
c5_ = other.c5_;
|
|
101
|
+
c_ = other.c_;
|
|
102
|
+
return *this;
|
|
103
|
+
}
|
|
104
|
+
double prob[RNG_POISSON_N_PRECOMPUTED_CDF];
|
|
105
|
+
double floored_lambda_ = 0.0;
|
|
106
|
+
double log_lambda_ = 0.0;
|
|
107
|
+
double lgamma_floored_lambda_ = 0.0;
|
|
108
|
+
double sqrt_lambda_ = 0.0;
|
|
109
|
+
double sqrt_floored_lambda_ = 0.0;
|
|
110
|
+
double dx_ = 0.0;
|
|
111
|
+
double delta_ = 0.0;
|
|
112
|
+
double dpdfl_ = 0.0;
|
|
113
|
+
double sqrt_half_dpdfl_ = 0.0;
|
|
114
|
+
double inv_dpdfl_ = 0.0;
|
|
115
|
+
double c2_add_coeff_ = 0.0;
|
|
116
|
+
double c_add_coeff_ = 0.0;
|
|
117
|
+
double c1_ = 0.0;
|
|
118
|
+
double c2_ = 0.0;
|
|
119
|
+
double c3_ = 0.0;
|
|
120
|
+
double c4_ = 0.0;
|
|
121
|
+
double c5_ = 0.0;
|
|
122
|
+
double c_ = 0.0;
|
|
123
|
+
const double exp_one_by_78 = 1.0129030479320018583185514777512983L;
|
|
124
|
+
const double pi_4_ = 0.7853981633974483096156608458198757L;
|
|
125
|
+
const double spi_2_ = 1.2533141373155002512078826424055226L;
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
template <typename IntType>
|
|
129
|
+
class distribution_base<oneapi::mkl::rng::device::poisson<IntType, poisson_method::devroye>> {
|
|
130
|
+
public:
|
|
131
|
+
struct param_type {
|
|
132
|
+
param_type(double lambda) : lambda_(lambda) {}
|
|
133
|
+
double lambda_;
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
distribution_base(double lambda) : lambda_(lambda) {
|
|
137
|
+
#ifndef __SYCL_DEVICE_ONLY__
|
|
138
|
+
if (lambda_ <= 0.0) {
|
|
139
|
+
throw oneapi::mkl::invalid_argument("rng", "poisson", "lambda <= 0");
|
|
140
|
+
}
|
|
141
|
+
#endif
|
|
142
|
+
params_.set_lambda(lambda_);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
double lambda() const {
|
|
146
|
+
return lambda_;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
param_type param() const {
|
|
150
|
+
return param_type(lambda_);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
void param(const param_type& pt) {
|
|
154
|
+
#ifndef __SYCL_DEVICE_ONLY__
|
|
155
|
+
if (pt.lambda_ <= 0.0) {
|
|
156
|
+
throw oneapi::mkl::invalid_argument("rng", "poisson", "lambda <= 0");
|
|
157
|
+
}
|
|
158
|
+
#endif
|
|
159
|
+
lambda_ = pt.lambda_;
|
|
160
|
+
params_.set_lambda(lambda_);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
protected:
|
|
164
|
+
IntType get_one_num_small_lambdas(double uniform_var) {
|
|
165
|
+
IntType res = 0;
|
|
166
|
+
if (uniform_var < params_.prob[0]) {
|
|
167
|
+
return res;
|
|
168
|
+
}
|
|
169
|
+
else {
|
|
170
|
+
for (res = 1; res < RNG_POISSON_N_PRECOMPUTED_CDF; ++res) {
|
|
171
|
+
if (uniform_var < params_.prob[res]) {
|
|
172
|
+
return res;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
// in case uniform_var is still bigger than CDF[31] compute additional CDF coefficients
|
|
176
|
+
double prob_less_than_k = params_.prob[--res];
|
|
177
|
+
double prob_that_k = prob_less_than_k - params_.prob[res - 1];
|
|
178
|
+
do {
|
|
179
|
+
prob_that_k *= lambda_ / (double)(res++ + 1);
|
|
180
|
+
prob_less_than_k += prob_that_k;
|
|
181
|
+
} while (uniform_var >= prob_less_than_k);
|
|
182
|
+
|
|
183
|
+
return res;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
template <typename EngineType>
|
|
187
|
+
IntType get_one_num_med_lambdas(EngineType& engine) {
|
|
188
|
+
const double rounding_coeff = (1.0 - std::numeric_limits<double>::epsilon()) / 2.0;
|
|
189
|
+
const double max_inttype_val = (std::numeric_limits<IntType>::max)() + rounding_coeff;
|
|
190
|
+
double res_;
|
|
191
|
+
bool rejection_flag = true;
|
|
192
|
+
do {
|
|
193
|
+
const double uniform_var = params_.c_ * engine.generate_single(0.0, 1.0);
|
|
194
|
+
const double exponential_var = exponential_.generate_single(engine);
|
|
195
|
+
double w = 0.0;
|
|
196
|
+
if (uniform_var <= params_.c1_) {
|
|
197
|
+
const double gaussian_var = gaussian_.generate_single(engine);
|
|
198
|
+
const double y = -sycl::fabs(gaussian_var) * params_.sqrt_floored_lambda_ - 1.0;
|
|
199
|
+
res_ = sycl::floor(y);
|
|
200
|
+
w = -gaussian_var * gaussian_var / 2.0;
|
|
201
|
+
if (res_ < -params_.floored_lambda_)
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
else if (uniform_var <= params_.c2_) {
|
|
205
|
+
const double gaussian_var = gaussian_.generate_single(engine);
|
|
206
|
+
const double y = 1.0 + sycl::fabs(gaussian_var) * params_.sqrt_half_dpdfl_;
|
|
207
|
+
res_ = sycl::ceil(y);
|
|
208
|
+
w = y * (2.0 - y) * params_.inv_dpdfl_;
|
|
209
|
+
if (res_ > params_.delta_)
|
|
210
|
+
continue;
|
|
211
|
+
}
|
|
212
|
+
else if (uniform_var <= params_.c3_)
|
|
213
|
+
res_ = -1.0;
|
|
214
|
+
else if (uniform_var <= params_.c4_)
|
|
215
|
+
res_ = 0.0;
|
|
216
|
+
else if (uniform_var <= params_.c5_)
|
|
217
|
+
res_ = 1.0;
|
|
218
|
+
else {
|
|
219
|
+
const double exponential_var_1 = exponential_.generate_single(engine);
|
|
220
|
+
const double y =
|
|
221
|
+
params_.delta_ + exponential_var_1 * 2.0 * params_.dpdfl_ / params_.delta_;
|
|
222
|
+
res_ = sycl::ceil(y);
|
|
223
|
+
w = -params_.delta_ * params_.inv_dpdfl_ * (1.0 + y / 2.0);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
rejection_flag = ((w - exponential_var - res_ * params_.log_lambda_) >
|
|
227
|
+
(params_.lgamma_floored_lambda_ -
|
|
228
|
+
sycl::lgamma(res_ + params_.floored_lambda_ + 1.0)));
|
|
229
|
+
|
|
230
|
+
rejection_flag |= (res_ + params_.floored_lambda_) >= max_inttype_val;
|
|
231
|
+
|
|
232
|
+
} while (rejection_flag);
|
|
233
|
+
|
|
234
|
+
return ((IntType)(res_ + params_.floored_lambda_ + rounding_coeff));
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
template <typename EngineType>
|
|
238
|
+
auto generate(EngineType& engine) ->
|
|
239
|
+
typename std::conditional<EngineType::vec_size == 1, IntType,
|
|
240
|
+
sycl::vec<IntType, EngineType::vec_size>>::type {
|
|
241
|
+
using OutType = typename std::conditional<EngineType::vec_size == 1, IntType,
|
|
242
|
+
sycl::vec<IntType, EngineType::vec_size>>::type;
|
|
243
|
+
OutType res;
|
|
244
|
+
if constexpr (EngineType::vec_size == 1) {
|
|
245
|
+
res = 0;
|
|
246
|
+
if (lambda_ < RNG_POISSON_LAMBDA_LOW_BOUND) {
|
|
247
|
+
double uniform_var = engine.generate(0.0, 1.0);
|
|
248
|
+
return get_one_num_small_lambdas(uniform_var);
|
|
249
|
+
}
|
|
250
|
+
else if (lambda_ < RNG_POISSON_LAMBDA_HUGE_BOUND) {
|
|
251
|
+
const double rounding_coeff = (1.0 - std::numeric_limits<double>::epsilon()) / 2.0;
|
|
252
|
+
const double max_inttype_val =
|
|
253
|
+
(std::numeric_limits<IntType>::max)() + rounding_coeff;
|
|
254
|
+
double res_;
|
|
255
|
+
bool rejection_flag = true;
|
|
256
|
+
do {
|
|
257
|
+
const double uniform_var = params_.c_ * engine.generate(0.0, 1.0);
|
|
258
|
+
const double exponential_var = exponential_.generate(engine);
|
|
259
|
+
double w = 0.0;
|
|
260
|
+
if (uniform_var <= params_.c1_) {
|
|
261
|
+
const double gaussian_var = gaussian_.generate(engine);
|
|
262
|
+
const double y =
|
|
263
|
+
-sycl::fabs(gaussian_var) * params_.sqrt_floored_lambda_ - 1.0;
|
|
264
|
+
res_ = sycl::floor(y);
|
|
265
|
+
w = -gaussian_var * gaussian_var / 2.0;
|
|
266
|
+
if (res_ < -params_.floored_lambda_)
|
|
267
|
+
continue;
|
|
268
|
+
}
|
|
269
|
+
else if (uniform_var <= params_.c2_) {
|
|
270
|
+
const double gaussian_var = gaussian_.generate(engine);
|
|
271
|
+
const double y = 1.0 + sycl::fabs(gaussian_var) * params_.sqrt_half_dpdfl_;
|
|
272
|
+
res_ = sycl::ceil(y);
|
|
273
|
+
w = y * (2.0 - y) * params_.inv_dpdfl_;
|
|
274
|
+
if (res_ > params_.delta_)
|
|
275
|
+
continue;
|
|
276
|
+
}
|
|
277
|
+
else if (uniform_var <= params_.c3_)
|
|
278
|
+
res_ = -1.0;
|
|
279
|
+
else if (uniform_var <= params_.c4_)
|
|
280
|
+
res_ = 0.0;
|
|
281
|
+
else if (uniform_var <= params_.c5_)
|
|
282
|
+
res_ = 1.0;
|
|
283
|
+
else {
|
|
284
|
+
const double exponential_var_1 = exponential_.generate(engine);
|
|
285
|
+
const double y = params_.delta_ +
|
|
286
|
+
exponential_var_1 * 2.0 * params_.dpdfl_ / params_.delta_;
|
|
287
|
+
res_ = sycl::ceil(y);
|
|
288
|
+
w = -params_.delta_ * params_.inv_dpdfl_ * (1.0 + y / 2.0);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
rejection_flag = ((w - exponential_var - res_ * params_.log_lambda_) >
|
|
292
|
+
(params_.lgamma_floored_lambda_ -
|
|
293
|
+
sycl::lgamma(res_ + params_.floored_lambda_ + 1.0)));
|
|
294
|
+
|
|
295
|
+
rejection_flag |= (res_ + params_.floored_lambda_) >= max_inttype_val;
|
|
296
|
+
|
|
297
|
+
} while (rejection_flag);
|
|
298
|
+
|
|
299
|
+
return ((IntType)(res_ + params_.floored_lambda_ + rounding_coeff));
|
|
300
|
+
}
|
|
301
|
+
else {
|
|
302
|
+
res = static_cast<IntType>(lambda_ +
|
|
303
|
+
params_.sqrt_lambda_ * gaussian_.generate(engine));
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
else {
|
|
307
|
+
if (lambda_ < RNG_POISSON_LAMBDA_LOW_BOUND) {
|
|
308
|
+
auto uniform_var = engine.generate(0.0, 1.0);
|
|
309
|
+
for (int i = 0; i < EngineType::vec_size; ++i) {
|
|
310
|
+
res[i] = get_one_num_small_lambdas(uniform_var[i]);
|
|
311
|
+
}
|
|
312
|
+
return res;
|
|
313
|
+
}
|
|
314
|
+
else if (lambda_ < RNG_POISSON_LAMBDA_HUGE_BOUND) {
|
|
315
|
+
for (int i = 0; i < EngineType::vec_size; ++i) {
|
|
316
|
+
res[i] = get_one_num_med_lambdas(engine);
|
|
317
|
+
}
|
|
318
|
+
return res;
|
|
319
|
+
}
|
|
320
|
+
else {
|
|
321
|
+
sycl::vec<double, EngineType::vec_size> res_fp =
|
|
322
|
+
lambda_ + params_.sqrt_lambda_ * gaussian_.generate(engine);
|
|
323
|
+
res_fp = sycl::floor(res_fp);
|
|
324
|
+
res = res_fp.template convert<IntType>();
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
return res;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
template <typename EngineType>
|
|
331
|
+
IntType generate_single(EngineType& engine) {
|
|
332
|
+
IntType res = 0;
|
|
333
|
+
if (lambda_ < RNG_POISSON_LAMBDA_LOW_BOUND) {
|
|
334
|
+
double uniform_var = engine.generate_single(0.0, 1.0);
|
|
335
|
+
return get_one_num_small_lambdas(uniform_var);
|
|
336
|
+
}
|
|
337
|
+
else if (lambda_ < RNG_POISSON_LAMBDA_HUGE_BOUND) {
|
|
338
|
+
return get_one_num_med_lambdas(engine);
|
|
339
|
+
}
|
|
340
|
+
else {
|
|
341
|
+
res = static_cast<IntType>(lambda_ +
|
|
342
|
+
params_.sqrt_lambda_ * gaussian_.generate_single(engine));
|
|
343
|
+
}
|
|
344
|
+
return res;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
distribution_base<oneapi::mkl::rng::device::gaussian<double>> gaussian_ = { 0.0, 1.0 };
|
|
348
|
+
distribution_base<oneapi::mkl::rng::device::exponential<double>> exponential_ = { 0.0, 1.0 };
|
|
349
|
+
poisson_parameters params_;
|
|
350
|
+
double lambda_;
|
|
351
|
+
};
|
|
352
|
+
|
|
353
|
+
} // namespace oneapi::mkl::rng::device::detail
|
|
354
|
+
|
|
355
|
+
#endif // _MKL_RNG_DEVICE_POISSON_IMPL_HPP_
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/*******************************************************************************
|
|
2
|
+
* Copyright 2020 Intel Corporation
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing,
|
|
11
|
+
* software distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions
|
|
14
|
+
* and limitations under the License.
|
|
15
|
+
*
|
|
16
|
+
*
|
|
17
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
18
|
+
*******************************************************************************/
|
|
19
|
+
|
|
20
|
+
#ifndef _MKL_RNG_DETAIL_DETAIL_TYPES_HPP_
|
|
21
|
+
#define _MKL_RNG_DETAIL_DETAIL_TYPES_HPP_
|
|
22
|
+
|
|
23
|
+
#include <sycl/sycl.hpp>
|
|
24
|
+
|
|
25
|
+
namespace oneapi {
|
|
26
|
+
namespace mkl {
|
|
27
|
+
namespace rng {
|
|
28
|
+
namespace device {
|
|
29
|
+
|
|
30
|
+
namespace detail {
|
|
31
|
+
|
|
32
|
+
// Type of device
|
|
33
|
+
namespace device_type {
|
|
34
|
+
struct generic {}; // currently only generic DPC++ version is supported
|
|
35
|
+
} // namespace device_type
|
|
36
|
+
|
|
37
|
+
// internal structure to specify state of engine for each device
|
|
38
|
+
template <typename EngineType, typename DeviceType>
|
|
39
|
+
struct engine_state_device {};
|
|
40
|
+
|
|
41
|
+
template <typename EngineType>
|
|
42
|
+
union engine_state {};
|
|
43
|
+
|
|
44
|
+
typedef struct {
|
|
45
|
+
std::uint32_t hex[2];
|
|
46
|
+
} dp_struct_t;
|
|
47
|
+
|
|
48
|
+
typedef struct {
|
|
49
|
+
std::uint32_t hex[1];
|
|
50
|
+
} sp_struct_t;
|
|
51
|
+
|
|
52
|
+
} // namespace detail
|
|
53
|
+
} // namespace device
|
|
54
|
+
} // namespace rng
|
|
55
|
+
} // namespace mkl
|
|
56
|
+
} // namespace oneapi
|
|
57
|
+
|
|
58
|
+
#endif // _MKL_RNG_DETAIL_DETAIL_TYPES_HPP_
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/*******************************************************************************
|
|
2
|
+
* Copyright 2023 Intel Corporation
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing,
|
|
11
|
+
* software distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions
|
|
14
|
+
* and limitations under the License.
|
|
15
|
+
*
|
|
16
|
+
*
|
|
17
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
18
|
+
*******************************************************************************/
|
|
19
|
+
|
|
20
|
+
#ifndef _MKL_RNG_DEVICE_UNIFORM_BITS_IMPL_HPP_
|
|
21
|
+
#define _MKL_RNG_DEVICE_UNIFORM_BITS_IMPL_HPP_
|
|
22
|
+
|
|
23
|
+
#include "engine_base.hpp"
|
|
24
|
+
|
|
25
|
+
namespace oneapi::mkl::rng::device::detail {
|
|
26
|
+
|
|
27
|
+
template <typename UIntType>
|
|
28
|
+
class distribution_base<oneapi::mkl::rng::device::uniform_bits<UIntType>> {
|
|
29
|
+
protected:
|
|
30
|
+
template <typename EngineType>
|
|
31
|
+
auto generate(EngineType& engine) ->
|
|
32
|
+
typename std::conditional<EngineType::vec_size == 1, UIntType,
|
|
33
|
+
sycl::vec<UIntType, EngineType::vec_size>>::type {
|
|
34
|
+
static_assert(std::is_same<EngineType, philox4x32x10<EngineType::vec_size>>::value ||
|
|
35
|
+
std::is_same<EngineType, mcg59<EngineType::vec_size>>::value,
|
|
36
|
+
"oneMKL: uniform_bits works only with philox4x32x10/mcg59 engines");
|
|
37
|
+
return engine.template generate_uniform_bits<UIntType>();
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
template <typename EngineType>
|
|
41
|
+
UIntType generate_single(EngineType& engine) {
|
|
42
|
+
static_assert(std::is_same<EngineType, philox4x32x10<EngineType::vec_size>>::value ||
|
|
43
|
+
std::is_same<EngineType, mcg59<EngineType::vec_size>>::value,
|
|
44
|
+
"oneMKL: uniform_bits works only with philox4x32x10/mcg59 engines");
|
|
45
|
+
return engine.template generate_single_uniform_bits<UIntType>();
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
} // namespace oneapi::mkl::rng::device::detail
|
|
50
|
+
|
|
51
|
+
#endif // _MKL_RNG_DEVICE_UNIFORM_BITS_IMPL_HPP_
|