pyopencl 2024.3__cp312-cp312-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (43) hide show
  1. pyopencl/.libs/libOpenCL-1ef0e16e.so.1.0.0 +0 -0
  2. pyopencl/__init__.py +2410 -0
  3. pyopencl/_cl.cpython-312-x86_64-linux-musl.so +0 -0
  4. pyopencl/_cluda.py +54 -0
  5. pyopencl/_mymako.py +14 -0
  6. pyopencl/algorithm.py +1449 -0
  7. pyopencl/array.py +3437 -0
  8. pyopencl/bitonic_sort.py +242 -0
  9. pyopencl/bitonic_sort_templates.py +594 -0
  10. pyopencl/cache.py +535 -0
  11. pyopencl/capture_call.py +177 -0
  12. pyopencl/characterize/__init__.py +456 -0
  13. pyopencl/characterize/performance.py +237 -0
  14. pyopencl/cl/pyopencl-airy.cl +324 -0
  15. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  16. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  17. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  18. pyopencl/cl/pyopencl-complex.h +303 -0
  19. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  20. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  21. pyopencl/cl/pyopencl-random123/array.h +325 -0
  22. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  23. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  24. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  25. pyopencl/clmath.py +280 -0
  26. pyopencl/clrandom.py +409 -0
  27. pyopencl/cltypes.py +137 -0
  28. pyopencl/compyte/.gitignore +21 -0
  29. pyopencl/compyte/__init__.py +0 -0
  30. pyopencl/compyte/array.py +214 -0
  31. pyopencl/compyte/dtypes.py +290 -0
  32. pyopencl/compyte/pyproject.toml +54 -0
  33. pyopencl/elementwise.py +1171 -0
  34. pyopencl/invoker.py +421 -0
  35. pyopencl/ipython_ext.py +68 -0
  36. pyopencl/reduction.py +786 -0
  37. pyopencl/scan.py +1915 -0
  38. pyopencl/tools.py +1527 -0
  39. pyopencl/version.py +9 -0
  40. pyopencl-2024.3.dist-info/METADATA +108 -0
  41. pyopencl-2024.3.dist-info/RECORD +43 -0
  42. pyopencl-2024.3.dist-info/WHEEL +5 -0
  43. pyopencl-2024.3.dist-info/licenses/LICENSE +104 -0
pyopencl/clmath.py ADDED
@@ -0,0 +1,280 @@
1
+ # pylint:disable=unexpected-keyword-arg # for @elwise_kernel_runner
2
+
3
+ __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
4
+
5
+ __license__ = """
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import numpy as np
26
+
27
+ import pyopencl.array as cl_array
28
+ import pyopencl.elementwise as elementwise
29
+ from pyopencl.array import _get_common_dtype
30
+
31
+
32
+ def _make_unary_array_func(name):
33
+ @cl_array.elwise_kernel_runner
34
+ def knl_runner(result, arg):
35
+ if arg.dtype.kind == "c":
36
+ from pyopencl.elementwise import complex_dtype_to_name
37
+ fname = "{}_{}".format(complex_dtype_to_name(arg.dtype), name)
38
+ else:
39
+ fname = name
40
+
41
+ return elementwise.get_unary_func_kernel(
42
+ result.context, fname, arg.dtype)
43
+
44
+ def f(array, queue=None):
45
+ result = array._new_like_me(queue=queue)
46
+ event1 = knl_runner(result, array, queue=queue)
47
+ result.add_event(event1)
48
+ return result
49
+
50
+ return f
51
+
52
+
53
+ # See table 6.8 in the CL 1.1 spec
54
+ acos = _make_unary_array_func("acos")
55
+ acosh = _make_unary_array_func("acosh")
56
+ acospi = _make_unary_array_func("acospi")
57
+
58
+ asin = _make_unary_array_func("asin")
59
+ asinh = _make_unary_array_func("asinh")
60
+ asinpi = _make_unary_array_func("asinpi")
61
+
62
+
63
+ @cl_array.elwise_kernel_runner
64
+ def _atan2(result, arg1, arg2):
65
+ return elementwise.get_float_binary_func_kernel(
66
+ result.context, "atan2", arg1.dtype, arg2.dtype, result.dtype)
67
+
68
+
69
+ @cl_array.elwise_kernel_runner
70
+ def _atan2pi(result, arg1, arg2):
71
+ return elementwise.get_float_binary_func_kernel(
72
+ result.context, "atan2pi", arg1.dtype, arg2.dtype, result.dtype)
73
+
74
+
75
+ atan = _make_unary_array_func("atan")
76
+
77
+
78
+ def atan2(y, x, queue=None):
79
+ """
80
+ .. versionadded:: 2013.1
81
+ """
82
+ queue = queue or y.queue
83
+ result = y._new_like_me(_get_common_dtype(y, x, queue))
84
+ result.add_event(_atan2(result, y, x, queue=queue))
85
+ return result
86
+
87
+
88
+ atanh = _make_unary_array_func("atanh")
89
+ atanpi = _make_unary_array_func("atanpi")
90
+
91
+
92
+ def atan2pi(y, x, queue=None):
93
+ """
94
+ .. versionadded:: 2013.1
95
+ """
96
+ queue = queue or y.queue
97
+ result = y._new_like_me(_get_common_dtype(y, x, queue))
98
+ result.add_event(_atan2pi(result, y, x, queue=queue))
99
+ return result
100
+
101
+
102
+ cbrt = _make_unary_array_func("cbrt")
103
+ ceil = _make_unary_array_func("ceil")
104
+ # TODO: copysign
105
+
106
+ cos = _make_unary_array_func("cos")
107
+ cosh = _make_unary_array_func("cosh")
108
+ cospi = _make_unary_array_func("cospi")
109
+
110
+ erfc = _make_unary_array_func("erfc")
111
+ erf = _make_unary_array_func("erf")
112
+ exp = _make_unary_array_func("exp")
113
+ exp2 = _make_unary_array_func("exp2")
114
+ exp10 = _make_unary_array_func("exp10")
115
+ expm1 = _make_unary_array_func("expm1")
116
+
117
+ fabs = _make_unary_array_func("fabs")
118
+ # TODO: fdim
119
+ floor = _make_unary_array_func("floor")
120
+ # TODO: fma
121
+ # TODO: fmax
122
+ # TODO: fmin
123
+
124
+
125
+ @cl_array.elwise_kernel_runner
126
+ def _fmod(result, arg, mod):
127
+ return elementwise.get_fmod_kernel(result.context, result.dtype,
128
+ arg.dtype, mod.dtype)
129
+
130
+
131
+ def fmod(arg, mod, queue=None):
132
+ """Return the floating point remainder of the division ``arg / mod``,
133
+ for each element in ``arg`` and ``mod``."""
134
+ queue = (queue or arg.queue) or mod.queue
135
+ result = arg._new_like_me(_get_common_dtype(arg, mod, queue))
136
+ result.add_event(_fmod(result, arg, mod, queue=queue))
137
+ return result
138
+
139
+ # TODO: fract
140
+
141
+
142
+ @cl_array.elwise_kernel_runner
143
+ def _frexp(sig, expt, arg):
144
+ return elementwise.get_frexp_kernel(sig.context, sig.dtype,
145
+ expt.dtype, arg.dtype)
146
+
147
+
148
+ def frexp(arg, queue=None):
149
+ """Return a tuple ``(significands, exponents)`` such that
150
+ ``arg == significand * 2**exponent``.
151
+ """
152
+ sig = arg._new_like_me(queue=queue)
153
+ expt = arg._new_like_me(queue=queue, dtype=np.int32)
154
+ event1 = _frexp(sig, expt, arg, queue=queue)
155
+ sig.add_event(event1)
156
+ expt.add_event(event1)
157
+ return sig, expt
158
+
159
+ # TODO: hypot
160
+
161
+
162
+ ilogb = _make_unary_array_func("ilogb")
163
+
164
+
165
+ @cl_array.elwise_kernel_runner
166
+ def _ldexp(result, sig, exp):
167
+ return elementwise.get_ldexp_kernel(result.context, result.dtype,
168
+ sig.dtype, exp.dtype)
169
+
170
+
171
+ def ldexp(significand, exponent, queue=None):
172
+ """Return a new array of floating point values composed from the
173
+ entries of ``significand`` and ``exponent``, paired together as
174
+ ``result = significand * 2**exponent``.
175
+ """
176
+ result = significand._new_like_me(queue=queue)
177
+ result.add_event(_ldexp(result, significand, exponent))
178
+ return result
179
+
180
+
181
+ lgamma = _make_unary_array_func("lgamma")
182
+ # TODO: lgamma_r
183
+
184
+ log = _make_unary_array_func("log")
185
+ log2 = _make_unary_array_func("log2")
186
+ log10 = _make_unary_array_func("log10")
187
+ log1p = _make_unary_array_func("log1p")
188
+ logb = _make_unary_array_func("logb")
189
+
190
+ # TODO: mad
191
+ # TODO: maxmag
192
+ # TODO: minmag
193
+
194
+
195
+ @cl_array.elwise_kernel_runner
196
+ def _modf(intpart, fracpart, arg):
197
+ return elementwise.get_modf_kernel(intpart.context, intpart.dtype,
198
+ fracpart.dtype, arg.dtype)
199
+
200
+
201
+ def modf(arg, queue=None):
202
+ """Return a tuple ``(fracpart, intpart)`` of arrays containing the
203
+ integer and fractional parts of ``arg``.
204
+ """
205
+ intpart = arg._new_like_me(queue=queue)
206
+ fracpart = arg._new_like_me(queue=queue)
207
+ event1 = _modf(intpart, fracpart, arg, queue=queue)
208
+ fracpart.add_event(event1)
209
+ intpart.add_event(event1)
210
+ return fracpart, intpart
211
+
212
+
213
+ nan = _make_unary_array_func("nan")
214
+
215
+ # TODO: nextafter
216
+ # TODO: remainder
217
+ # TODO: remquo
218
+
219
+ rint = _make_unary_array_func("rint")
220
+ # TODO: rootn
221
+ round = _make_unary_array_func("round")
222
+
223
+ sin = _make_unary_array_func("sin")
224
+ # TODO: sincos
225
+ sinh = _make_unary_array_func("sinh")
226
+ sinpi = _make_unary_array_func("sinpi")
227
+
228
+ sqrt = _make_unary_array_func("sqrt")
229
+
230
+ tan = _make_unary_array_func("tan")
231
+ tanh = _make_unary_array_func("tanh")
232
+ tanpi = _make_unary_array_func("tanpi")
233
+ tgamma = _make_unary_array_func("tgamma")
234
+ trunc = _make_unary_array_func("trunc")
235
+
236
+
237
+ # no point wrapping half_ or native_
238
+
239
+ # TODO: table 6.10, integer functions
240
+ # TODO: table 6.12, clamp et al
241
+
242
+ @cl_array.elwise_kernel_runner
243
+ def _bessel_jn(result, n, x):
244
+ return elementwise.get_bessel_kernel(result.context, "j", result.dtype,
245
+ np.dtype(type(n)), x.dtype)
246
+
247
+
248
+ @cl_array.elwise_kernel_runner
249
+ def _bessel_yn(result, n, x):
250
+ return elementwise.get_bessel_kernel(result.context, "y", result.dtype,
251
+ np.dtype(type(n)), x.dtype)
252
+
253
+
254
+ @cl_array.elwise_kernel_runner
255
+ def _hankel_01(h0, h1, x):
256
+ if h0.dtype != h1.dtype:
257
+ raise TypeError("types of h0 and h1 must match")
258
+ return elementwise.get_hankel_01_kernel(
259
+ h0.context, h0.dtype, x.dtype)
260
+
261
+
262
+ def bessel_jn(n, x, queue=None):
263
+ result = x._new_like_me(queue=queue)
264
+ result.add_event(_bessel_jn(result, n, x, queue=queue))
265
+ return result
266
+
267
+
268
+ def bessel_yn(n, x, queue=None):
269
+ result = x._new_like_me(queue=queue)
270
+ result.add_event(_bessel_yn(result, n, x, queue=queue))
271
+ return result
272
+
273
+
274
+ def hankel_01(x, queue=None):
275
+ h0 = x._new_like_me(queue=queue)
276
+ h1 = x._new_like_me(queue=queue)
277
+ event1 = _hankel_01(h0, h1, x, queue=queue)
278
+ h0.add_event(event1)
279
+ h1.add_event(event1)
280
+ return h0, h1
pyopencl/clrandom.py ADDED
@@ -0,0 +1,409 @@
1
+ __copyright__ = "Copyright (C) 2009-16 Andreas Kloeckner"
2
+
3
+ __license__ = """
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in
12
+ all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
+ THE SOFTWARE.
21
+ """
22
+
23
+
24
+ # {{{ documentation
25
+
26
+ __doc__ = """
27
+ PyOpenCL includes and uses some of the `Random123 random number generators
28
+ <https://www.deshawresearch.com/resources.html>`__ by D.E. Shaw
29
+ Research. In addition to being usable through the convenience functions above,
30
+ they are available in any piece of code compiled through PyOpenCL by::
31
+
32
+ #include <pyopencl-random123/philox.cl>
33
+ #include <pyopencl-random123/threefry.cl>
34
+
35
+ See the `Philox source
36
+ <https://github.com/inducer/pyopencl/blob/main/pyopencl/cl/pyopencl-random123/philox.cl>`__
37
+ and the `Threefry source
38
+ <https://github.com/inducer/pyopencl/blob/main/pyopencl/cl/pyopencl-random123/threefry.cl>`__
39
+ for some documentation if you're planning on using Random123 directly.
40
+
41
+ .. autoclass:: PhiloxGenerator
42
+
43
+ .. autoclass:: ThreefryGenerator
44
+
45
+ .. autofunction:: rand
46
+ .. autofunction:: fill_rand
47
+
48
+ """
49
+
50
+ # }}}
51
+
52
+ import numpy as np
53
+
54
+ from pytools import memoize_method
55
+
56
+ import pyopencl as cl
57
+ import pyopencl.array as cl_array
58
+ import pyopencl.cltypes as cltypes
59
+ from pyopencl.tools import first_arg_dependent_memoize
60
+
61
+
62
+ # {{{ Random123 generators
63
+
64
+ class Random123GeneratorBase:
65
+ """
66
+ .. versionadded:: 2016.2
67
+
68
+ .. automethod:: fill_uniform
69
+ .. automethod:: uniform
70
+ .. automethod:: fill_normal
71
+ .. automethod:: normal
72
+ """
73
+
74
+ @property
75
+ def header_name(self):
76
+ raise NotImplementedError
77
+
78
+ @property
79
+ def generator_name(self):
80
+ raise NotImplementedError
81
+
82
+ @property
83
+ def key_length(self):
84
+ raise NotImplementedError
85
+
86
+ def __init__(self, context, key=None, counter=None, seed=None):
87
+ int32_info = np.iinfo(np.int32)
88
+ from random import Random
89
+
90
+ rng = Random(seed)
91
+
92
+ if key is not None and counter is not None and seed is not None:
93
+ raise TypeError("seed is unused and may not be specified "
94
+ "if both counter and key are given")
95
+
96
+ if key is None:
97
+ key = [
98
+ rng.randrange(
99
+ int(int32_info.min), int(int32_info.max)+1)
100
+ for i in range(self.key_length-1)]
101
+ if counter is None:
102
+ counter = [
103
+ rng.randrange(
104
+ int(int32_info.min), int(int32_info.max)+1)
105
+ for i in range(4)]
106
+
107
+ self.context = context
108
+ self.key = key
109
+ self.counter = counter
110
+
111
+ self.counter_max = int32_info.max
112
+
113
+ @memoize_method
114
+ def get_gen_kernel(self, dtype, distribution):
115
+ size_multiplier = 1
116
+ arg_dtype = dtype
117
+
118
+ rng_key = (distribution, dtype)
119
+
120
+ if rng_key in [("uniform", np.float64), ("normal", np.float64)]:
121
+ c_type = "double"
122
+ scale1_const = "((double) %r)" % (1/2**32)
123
+ scale2_const = "((double) %r)" % (1/2**64)
124
+ if distribution == "normal":
125
+ transform = "box_muller"
126
+ else:
127
+ transform = ""
128
+
129
+ rng_expr = (
130
+ "shift + scale * "
131
+ "%s( %s * convert_double4(gen)"
132
+ "+ %s * convert_double4(gen))"
133
+ % (transform, scale1_const, scale2_const))
134
+
135
+ counter_multiplier = 2
136
+
137
+ elif rng_key in [(dist, cmp_dtype)
138
+ for dist in ["normal", "uniform"]
139
+ for cmp_dtype in [
140
+ np.float32,
141
+ cltypes.float2,
142
+ cltypes.float3,
143
+ cltypes.float4,
144
+ ]]:
145
+ c_type = "float"
146
+ scale_const = "((float) %r)" % (1/2**32)
147
+
148
+ if distribution == "normal":
149
+ transform = "box_muller"
150
+ else:
151
+ transform = ""
152
+
153
+ rng_expr = (
154
+ "shift + scale * %s(%s * convert_float4(gen))"
155
+ % (transform, scale_const))
156
+ counter_multiplier = 1
157
+ arg_dtype = np.float32
158
+ try:
159
+ _, size_multiplier = cltypes.vec_type_to_scalar_and_count[dtype]
160
+ except KeyError:
161
+ pass
162
+
163
+ elif rng_key == ("uniform", np.int32):
164
+ c_type = "int"
165
+ rng_expr = (
166
+ "shift + convert_int4((convert_long4(gen) * scale) / %s)"
167
+ % (str(2**32)+"l")
168
+ )
169
+ counter_multiplier = 1
170
+
171
+ elif rng_key == ("uniform", np.int64):
172
+ c_type = "long"
173
+ rng_expr = (
174
+ "shift"
175
+ "+ convert_long4(gen) * (scale/two32) "
176
+ "+ ((convert_long4(gen) * scale) / two32)"
177
+ .replace("two32", (str(2**32)+"l")))
178
+ counter_multiplier = 2
179
+
180
+ else:
181
+ raise TypeError(
182
+ "unsupported RNG distribution/data type combination '%s/%s'"
183
+ % rng_key)
184
+
185
+ kernel_name = f"rng_gen_{self.generator_name}_{distribution}"
186
+ src = """//CL//
187
+ #include <{header_name}>
188
+
189
+ #ifndef M_PI
190
+ #ifdef M_PI_F
191
+ #define M_PI M_PI_F
192
+ #else
193
+ #define M_PI 3.14159265359f
194
+ #endif
195
+ #endif
196
+
197
+ typedef {output_t} output_t;
198
+ typedef {output_t}4 output_vec_t;
199
+ typedef {gen_name}_ctr_t ctr_t;
200
+ typedef {gen_name}_key_t key_t;
201
+
202
+ uint4 gen_bits(key_t *key, ctr_t *ctr)
203
+ {{
204
+ union {{
205
+ ctr_t ctr_el;
206
+ uint4 vec_el;
207
+ }} u;
208
+
209
+ u.ctr_el = {gen_name}(*ctr, *key);
210
+ if (++ctr->v[0] == 0)
211
+ if (++ctr->v[1] == 0)
212
+ ++ctr->v[2];
213
+
214
+ return u.vec_el;
215
+ }}
216
+
217
+ #if {include_box_muller}
218
+ output_vec_t box_muller(output_vec_t x)
219
+ {{
220
+ #define BOX_MULLER(I, COMPA, COMPB) \
221
+ output_t r##I = sqrt(-2*log(x.COMPA)); \
222
+ output_t c##I; \
223
+ output_t s##I = sincos((output_t) (2*M_PI) * x.COMPB, &c##I);
224
+
225
+ BOX_MULLER(0, x, y);
226
+ BOX_MULLER(1, z, w);
227
+ return (output_vec_t) (r0*c0, r0*s0, r1*c1, r1*s1);
228
+ }}
229
+ #endif
230
+
231
+ #define GET_RANDOM_NUM(gen) {rng_expr}
232
+
233
+ kernel void {kernel_name}(
234
+ int k1,
235
+ #if {key_length} > 2
236
+ int k2, int k3,
237
+ #endif
238
+ int c0, int c1, int c2, int c3,
239
+ global output_t *output,
240
+ long out_size,
241
+ output_t scale,
242
+ output_t shift)
243
+ {{
244
+ #if {key_length} == 2
245
+ key_t k = {{{{get_global_id(0), k1}}}};
246
+ #else
247
+ key_t k = {{{{get_global_id(0), k1, k2, k3}}}};
248
+ #endif
249
+
250
+ ctr_t c = {{{{c0, c1, c2, c3}}}};
251
+
252
+ // output bulk
253
+ unsigned long idx = get_global_id(0)*4;
254
+ while (idx + 4 < out_size)
255
+ {{
256
+ output_vec_t ran = GET_RANDOM_NUM(gen_bits(&k, &c));
257
+ vstore4(ran, 0, &output[idx]);
258
+ idx += 4*get_global_size(0);
259
+ }}
260
+
261
+ // output tail
262
+ output_vec_t tail_ran = GET_RANDOM_NUM(gen_bits(&k, &c));
263
+ if (idx < out_size)
264
+ output[idx] = tail_ran.x;
265
+ if (idx+1 < out_size)
266
+ output[idx+1] = tail_ran.y;
267
+ if (idx+2 < out_size)
268
+ output[idx+2] = tail_ran.z;
269
+ if (idx+3 < out_size)
270
+ output[idx+3] = tail_ran.w;
271
+ }}
272
+ """.format(
273
+ kernel_name=kernel_name,
274
+ gen_name=self.generator_name,
275
+ header_name=self.header_name,
276
+ output_t=c_type,
277
+ key_length=self.key_length,
278
+ include_box_muller=int(distribution == "normal"),
279
+ rng_expr=rng_expr
280
+ )
281
+
282
+ prg = cl.Program(self.context, src).build()
283
+ knl = getattr(prg, kernel_name)
284
+ knl.set_scalar_arg_dtypes(
285
+ [np.int32] * (self.key_length - 1 + 4)
286
+ + [None, np.int64, arg_dtype, arg_dtype])
287
+
288
+ return knl, counter_multiplier, size_multiplier
289
+
290
+ def _fill(self, distribution, ary, scale, shift, queue=None):
291
+ """Fill *ary* with uniformly distributed random numbers in the interval
292
+ *(a, b)*, endpoints excluded.
293
+
294
+ :return: a :class:`pyopencl.Event`
295
+ """
296
+
297
+ if queue is None:
298
+ queue = ary.queue
299
+
300
+ knl, counter_multiplier, size_multiplier = \
301
+ self.get_gen_kernel(ary.dtype, distribution)
302
+
303
+ args = self.key + self.counter + [
304
+ ary.data, ary.size*size_multiplier,
305
+ scale, shift]
306
+
307
+ n = ary.size
308
+ from pyopencl.array import _splay
309
+ gsize, lsize = _splay(queue.device, ary.size)
310
+
311
+ evt = knl(queue, gsize, lsize, *args)
312
+ ary.add_event(evt)
313
+
314
+ self.counter[0] += n * counter_multiplier
315
+ c1_incr, self.counter[0] = divmod(self.counter[0], self.counter_max)
316
+ if c1_incr:
317
+ self.counter[1] += c1_incr
318
+ c2_incr, self.counter[1] = divmod(self.counter[1], self.counter_max)
319
+ self.counter[2] += c2_incr
320
+
321
+ return evt
322
+
323
+ def fill_uniform(self, ary, a=0, b=1, queue=None):
324
+ return self._fill("uniform", ary,
325
+ scale=(b-a), shift=a, queue=queue)
326
+
327
+ def uniform(self, *args, **kwargs):
328
+ """Make a new empty array, apply :meth:`fill_uniform` to it.
329
+ """
330
+ a = kwargs.pop("a", 0)
331
+ b = kwargs.pop("b", 1)
332
+
333
+ result = cl_array.empty(*args, **kwargs)
334
+ self.fill_uniform(result, queue=result.queue, a=a, b=b)
335
+ return result
336
+
337
+ def fill_normal(self, ary, mu=0, sigma=1, queue=None):
338
+ """Fill *ary* with normally distributed numbers with mean *mu* and
339
+ standard deviation *sigma*.
340
+ """
341
+
342
+ return self._fill("normal", ary, scale=sigma, shift=mu, queue=queue)
343
+
344
+ def normal(self, *args, **kwargs):
345
+ """Make a new empty array, apply :meth:`fill_normal` to it.
346
+ """
347
+ mu = kwargs.pop("mu", 0)
348
+ sigma = kwargs.pop("sigma", 1)
349
+
350
+ result = cl_array.empty(*args, **kwargs)
351
+ self.fill_normal(result, queue=result.queue, mu=mu, sigma=sigma)
352
+ return result
353
+
354
+
355
+ class PhiloxGenerator(Random123GeneratorBase):
356
+ __doc__ = Random123GeneratorBase.__doc__
357
+
358
+ header_name = "pyopencl-random123/philox.cl"
359
+ generator_name = "philox4x32"
360
+ key_length = 2
361
+
362
+
363
+ class ThreefryGenerator(Random123GeneratorBase):
364
+ __doc__ = Random123GeneratorBase.__doc__
365
+
366
+ header_name = "pyopencl-random123/threefry.cl"
367
+ generator_name = "threefry4x32"
368
+ key_length = 4
369
+
370
+ # }}}
371
+
372
+
373
+ @first_arg_dependent_memoize
374
+ def _get_generator(context):
375
+ if context.devices[0].type & cl.device_type.CPU:
376
+ gen = PhiloxGenerator(context)
377
+ else:
378
+ gen = ThreefryGenerator(context)
379
+
380
+ return gen
381
+
382
+
383
+ def fill_rand(result, queue=None, a=0, b=1):
384
+ """Fill *result* with random values in the range :math:`[0, 1)`.
385
+ """
386
+ if queue is None:
387
+ queue = result.queue
388
+ gen = _get_generator(queue.context)
389
+ gen.fill_uniform(result, a=a, b=b)
390
+
391
+
392
+ def rand(queue, shape, dtype, luxury=None, a=0, b=1):
393
+ """Return an array of *shape* filled with random values of *dtype*
394
+ in the range :math:`[a, b)`.
395
+ """
396
+
397
+ if luxury is not None:
398
+ from warnings import warn
399
+ warn("Specifying the 'luxury' argument is deprecated and will stop being "
400
+ "supported in PyOpenCL 2018.x", stacklevel=2)
401
+
402
+ from pyopencl.array import Array
403
+ gen = _get_generator(queue.context)
404
+ result = Array(queue, shape, dtype)
405
+ gen.fill_uniform(result, a=a, b=b)
406
+ return result
407
+
408
+
409
+ # vim: filetype=pyopencl:foldmethod=marker