pyopencl 2025.2.7__cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (47) hide show
  1. pyopencl/.libs/libOpenCL-83a5a7fd.so.1.0.0 +0 -0
  2. pyopencl/__init__.py +1995 -0
  3. pyopencl/_cl.cpython-314t-x86_64-linux-gnu.so +0 -0
  4. pyopencl/_cl.pyi +2009 -0
  5. pyopencl/_cluda.py +57 -0
  6. pyopencl/_monkeypatch.py +1104 -0
  7. pyopencl/_mymako.py +17 -0
  8. pyopencl/algorithm.py +1454 -0
  9. pyopencl/array.py +3530 -0
  10. pyopencl/bitonic_sort.py +245 -0
  11. pyopencl/bitonic_sort_templates.py +597 -0
  12. pyopencl/cache.py +535 -0
  13. pyopencl/capture_call.py +200 -0
  14. pyopencl/characterize/__init__.py +461 -0
  15. pyopencl/characterize/performance.py +240 -0
  16. pyopencl/cl/pyopencl-airy.cl +324 -0
  17. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  18. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  19. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  20. pyopencl/cl/pyopencl-complex.h +303 -0
  21. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  22. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  23. pyopencl/cl/pyopencl-random123/array.h +325 -0
  24. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  25. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  26. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  27. pyopencl/clmath.py +281 -0
  28. pyopencl/clrandom.py +412 -0
  29. pyopencl/cltypes.py +217 -0
  30. pyopencl/compyte/.gitignore +21 -0
  31. pyopencl/compyte/__init__.py +0 -0
  32. pyopencl/compyte/array.py +211 -0
  33. pyopencl/compyte/dtypes.py +314 -0
  34. pyopencl/compyte/pyproject.toml +49 -0
  35. pyopencl/elementwise.py +1288 -0
  36. pyopencl/invoker.py +417 -0
  37. pyopencl/ipython_ext.py +70 -0
  38. pyopencl/py.typed +0 -0
  39. pyopencl/reduction.py +815 -0
  40. pyopencl/scan.py +1921 -0
  41. pyopencl/tools.py +1680 -0
  42. pyopencl/typing.py +61 -0
  43. pyopencl/version.py +11 -0
  44. pyopencl-2025.2.7.dist-info/METADATA +108 -0
  45. pyopencl-2025.2.7.dist-info/RECORD +47 -0
  46. pyopencl-2025.2.7.dist-info/WHEEL +6 -0
  47. pyopencl-2025.2.7.dist-info/licenses/LICENSE +104 -0
pyopencl/clmath.py ADDED
@@ -0,0 +1,281 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
5
+
6
+ __license__ = """
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in
15
+ all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
+ THE SOFTWARE.
24
+ """
25
+
26
+ import numpy as np
27
+
28
+ import pyopencl.array as cl_array
29
+ import pyopencl.elementwise as elementwise
30
+ from pyopencl.array import _get_common_dtype
31
+
32
+
33
+ def _make_unary_array_func(name):
34
+ @cl_array.elwise_kernel_runner
35
+ def knl_runner(result, arg):
36
+ if arg.dtype.kind == "c":
37
+ from pyopencl.elementwise import complex_dtype_to_name
38
+ fname = "{}_{}".format(complex_dtype_to_name(arg.dtype), name)
39
+ else:
40
+ fname = name
41
+
42
+ return elementwise.get_unary_func_kernel(
43
+ result.context, fname, arg.dtype)
44
+
45
+ def f(array, queue=None):
46
+ result = array._new_like_me(queue=queue)
47
+ event1 = knl_runner(result, array, queue=queue)
48
+ result.add_event(event1)
49
+ return result
50
+
51
+ return f
52
+
53
+
54
+ # See table 6.8 in the CL 1.1 spec
55
+ acos = _make_unary_array_func("acos")
56
+ acosh = _make_unary_array_func("acosh")
57
+ acospi = _make_unary_array_func("acospi")
58
+
59
+ asin = _make_unary_array_func("asin")
60
+ asinh = _make_unary_array_func("asinh")
61
+ asinpi = _make_unary_array_func("asinpi")
62
+
63
+
64
+ @cl_array.elwise_kernel_runner
65
+ def _atan2(result, arg1, arg2):
66
+ return elementwise.get_float_binary_func_kernel(
67
+ result.context, "atan2", arg1.dtype, arg2.dtype, result.dtype)
68
+
69
+
70
+ @cl_array.elwise_kernel_runner
71
+ def _atan2pi(result, arg1, arg2):
72
+ return elementwise.get_float_binary_func_kernel(
73
+ result.context, "atan2pi", arg1.dtype, arg2.dtype, result.dtype)
74
+
75
+
76
+ atan = _make_unary_array_func("atan")
77
+
78
+
79
+ def atan2(y, x, queue=None):
80
+ """
81
+ .. versionadded:: 2013.1
82
+ """
83
+ queue = queue or y.queue
84
+ result = y._new_like_me(_get_common_dtype(y, x, queue))
85
+ result.add_event(_atan2(result, y, x, queue=queue))
86
+ return result
87
+
88
+
89
+ atanh = _make_unary_array_func("atanh")
90
+ atanpi = _make_unary_array_func("atanpi")
91
+
92
+
93
+ def atan2pi(y, x, queue=None):
94
+ """
95
+ .. versionadded:: 2013.1
96
+ """
97
+ queue = queue or y.queue
98
+ result = y._new_like_me(_get_common_dtype(y, x, queue))
99
+ result.add_event(_atan2pi(result, y, x, queue=queue))
100
+ return result
101
+
102
+
103
+ cbrt = _make_unary_array_func("cbrt")
104
+ ceil = _make_unary_array_func("ceil")
105
+ # TODO: copysign
106
+
107
+ cos = _make_unary_array_func("cos")
108
+ cosh = _make_unary_array_func("cosh")
109
+ cospi = _make_unary_array_func("cospi")
110
+
111
+ erfc = _make_unary_array_func("erfc")
112
+ erf = _make_unary_array_func("erf")
113
+ exp = _make_unary_array_func("exp")
114
+ exp2 = _make_unary_array_func("exp2")
115
+ exp10 = _make_unary_array_func("exp10")
116
+ expm1 = _make_unary_array_func("expm1")
117
+
118
+ fabs = _make_unary_array_func("fabs")
119
+ # TODO: fdim
120
+ floor = _make_unary_array_func("floor")
121
+ # TODO: fma
122
+ # TODO: fmax
123
+ # TODO: fmin
124
+
125
+
126
+ @cl_array.elwise_kernel_runner
127
+ def _fmod(result, arg, mod):
128
+ return elementwise.get_fmod_kernel(result.context, result.dtype,
129
+ arg.dtype, mod.dtype)
130
+
131
+
132
+ def fmod(arg, mod, queue=None):
133
+ """Return the floating point remainder of the division ``arg / mod``,
134
+ for each element in ``arg`` and ``mod``."""
135
+ queue = (queue or arg.queue) or mod.queue
136
+ result = arg._new_like_me(_get_common_dtype(arg, mod, queue))
137
+ result.add_event(_fmod(result, arg, mod, queue=queue))
138
+ return result
139
+
140
+ # TODO: fract
141
+
142
+
143
+ @cl_array.elwise_kernel_runner
144
+ def _frexp(sig, expt, arg):
145
+ return elementwise.get_frexp_kernel(sig.context, sig.dtype,
146
+ expt.dtype, arg.dtype)
147
+
148
+
149
+ def frexp(arg, queue=None):
150
+ """Return a tuple ``(significands, exponents)`` such that
151
+ ``arg == significand * 2**exponent``.
152
+ """
153
+ sig = arg._new_like_me(queue=queue)
154
+ expt = arg._new_like_me(queue=queue, dtype=np.int32)
155
+ event1 = _frexp(sig, expt, arg, queue=queue)
156
+ sig.add_event(event1)
157
+ expt.add_event(event1)
158
+ return sig, expt
159
+
160
+ # TODO: hypot
161
+
162
+
163
+ ilogb = _make_unary_array_func("ilogb")
164
+
165
+
166
+ @cl_array.elwise_kernel_runner
167
+ def _ldexp(result, sig, exp):
168
+ return elementwise.get_ldexp_kernel(result.context, result.dtype,
169
+ sig.dtype, exp.dtype)
170
+
171
+
172
+ def ldexp(significand, exponent, queue=None):
173
+ """Return a new array of floating point values composed from the
174
+ entries of ``significand`` and ``exponent``, paired together as
175
+ ``result = significand * 2**exponent``.
176
+ """
177
+ result = significand._new_like_me(queue=queue)
178
+ result.add_event(_ldexp(result, significand, exponent))
179
+ return result
180
+
181
+
182
+ lgamma = _make_unary_array_func("lgamma")
183
+ # TODO: lgamma_r
184
+
185
+ log = _make_unary_array_func("log")
186
+ log2 = _make_unary_array_func("log2")
187
+ log10 = _make_unary_array_func("log10")
188
+ log1p = _make_unary_array_func("log1p")
189
+ logb = _make_unary_array_func("logb")
190
+
191
+ # TODO: mad
192
+ # TODO: maxmag
193
+ # TODO: minmag
194
+
195
+
196
+ @cl_array.elwise_kernel_runner
197
+ def _modf(intpart, fracpart, arg):
198
+ return elementwise.get_modf_kernel(intpart.context, intpart.dtype,
199
+ fracpart.dtype, arg.dtype)
200
+
201
+
202
+ def modf(arg, queue=None):
203
+ """Return a tuple ``(fracpart, intpart)`` of arrays containing the
204
+ integer and fractional parts of ``arg``.
205
+ """
206
+ intpart = arg._new_like_me(queue=queue)
207
+ fracpart = arg._new_like_me(queue=queue)
208
+ event1 = _modf(intpart, fracpart, arg, queue=queue)
209
+ fracpart.add_event(event1)
210
+ intpart.add_event(event1)
211
+ return fracpart, intpart
212
+
213
+
214
+ nan = _make_unary_array_func("nan")
215
+
216
+ # TODO: nextafter
217
+ # TODO: remainder
218
+ # TODO: remquo
219
+
220
+ rint = _make_unary_array_func("rint")
221
+ # TODO: rootn
222
+ round = _make_unary_array_func("round")
223
+
224
+ sin = _make_unary_array_func("sin")
225
+ # TODO: sincos
226
+ sinh = _make_unary_array_func("sinh")
227
+ sinpi = _make_unary_array_func("sinpi")
228
+
229
+ sqrt = _make_unary_array_func("sqrt")
230
+
231
+ tan = _make_unary_array_func("tan")
232
+ tanh = _make_unary_array_func("tanh")
233
+ tanpi = _make_unary_array_func("tanpi")
234
+ tgamma = _make_unary_array_func("tgamma")
235
+ trunc = _make_unary_array_func("trunc")
236
+
237
+
238
+ # no point wrapping half_ or native_
239
+
240
+ # TODO: table 6.10, integer functions
241
+ # TODO: table 6.12, clamp et al
242
+
243
+ @cl_array.elwise_kernel_runner
244
+ def _bessel_jn(result, n, x):
245
+ return elementwise.get_bessel_kernel(result.context, "j", result.dtype,
246
+ np.dtype(type(n)), x.dtype)
247
+
248
+
249
+ @cl_array.elwise_kernel_runner
250
+ def _bessel_yn(result, n, x):
251
+ return elementwise.get_bessel_kernel(result.context, "y", result.dtype,
252
+ np.dtype(type(n)), x.dtype)
253
+
254
+
255
+ @cl_array.elwise_kernel_runner
256
+ def _hankel_01(h0, h1, x):
257
+ if h0.dtype != h1.dtype:
258
+ raise TypeError("types of h0 and h1 must match")
259
+ return elementwise.get_hankel_01_kernel(
260
+ h0.context, h0.dtype, x.dtype)
261
+
262
+
263
+ def bessel_jn(n, x, queue=None):
264
+ result = x._new_like_me(queue=queue)
265
+ result.add_event(_bessel_jn(result, n, x, queue=queue))
266
+ return result
267
+
268
+
269
+ def bessel_yn(n, x, queue=None):
270
+ result = x._new_like_me(queue=queue)
271
+ result.add_event(_bessel_yn(result, n, x, queue=queue))
272
+ return result
273
+
274
+
275
+ def hankel_01(x, queue=None):
276
+ h0 = x._new_like_me(queue=queue)
277
+ h1 = x._new_like_me(queue=queue)
278
+ event1 = _hankel_01(h0, h1, x, queue=queue)
279
+ h0.add_event(event1)
280
+ h1.add_event(event1)
281
+ return h0, h1
pyopencl/clrandom.py ADDED
@@ -0,0 +1,412 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ __copyright__ = "Copyright (C) 2009-16 Andreas Kloeckner"
5
+
6
+ __license__ = """
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in
15
+ all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
+ THE SOFTWARE.
24
+ """
25
+
26
+
27
+ # {{{ documentation
28
+
29
+ __doc__ = """
30
+ PyOpenCL includes and uses some of the `Random123 random number generators
31
+ <https://www.deshawresearch.com/resources.html>`__ by D.E. Shaw
32
+ Research. In addition to being usable through the convenience functions above,
33
+ they are available in any piece of code compiled through PyOpenCL by::
34
+
35
+ #include <pyopencl-random123/philox.cl>
36
+ #include <pyopencl-random123/threefry.cl>
37
+
38
+ See the `Philox source
39
+ <https://github.com/inducer/pyopencl/blob/main/pyopencl/cl/pyopencl-random123/philox.cl>`__
40
+ and the `Threefry source
41
+ <https://github.com/inducer/pyopencl/blob/main/pyopencl/cl/pyopencl-random123/threefry.cl>`__
42
+ for some documentation if you're planning on using Random123 directly.
43
+
44
+ .. autoclass:: PhiloxGenerator
45
+
46
+ .. autoclass:: ThreefryGenerator
47
+
48
+ .. autofunction:: rand
49
+ .. autofunction:: fill_rand
50
+
51
+ """
52
+
53
+ # }}}
54
+
55
+ import numpy as np
56
+
57
+ from pytools import memoize_method
58
+
59
+ import pyopencl as cl
60
+ import pyopencl.array as cl_array
61
+ import pyopencl.cltypes as cltypes
62
+ from pyopencl.tools import first_arg_dependent_memoize
63
+
64
+
65
+ # {{{ Random123 generators
66
+
67
+ class Random123GeneratorBase:
68
+ """
69
+ .. versionadded:: 2016.2
70
+
71
+ .. automethod:: fill_uniform
72
+ .. automethod:: uniform
73
+ .. automethod:: fill_normal
74
+ .. automethod:: normal
75
+ """
76
+
77
+ @property
78
+ def header_name(self):
79
+ raise NotImplementedError
80
+
81
+ @property
82
+ def generator_name(self):
83
+ raise NotImplementedError
84
+
85
+ @property
86
+ def key_length(self):
87
+ raise NotImplementedError
88
+
89
+ def __init__(self, context, key=None, counter=None, seed=None):
90
+ int32_info = np.iinfo(np.int32)
91
+ from random import Random
92
+
93
+ rng = Random(seed)
94
+
95
+ if key is not None and counter is not None and seed is not None:
96
+ raise TypeError("seed is unused and may not be specified "
97
+ "if both counter and key are given")
98
+
99
+ if key is None:
100
+ key = [
101
+ rng.randrange(
102
+ int(int32_info.min), int(int32_info.max)+1)
103
+ for i in range(self.key_length-1)]
104
+ if counter is None:
105
+ counter = [
106
+ rng.randrange(
107
+ int(int32_info.min), int(int32_info.max)+1)
108
+ for i in range(4)]
109
+
110
+ self.context = context
111
+ self.key = key
112
+ self.counter = counter
113
+
114
+ self.counter_max = int32_info.max
115
+
116
+ @memoize_method
117
+ def get_gen_kernel(self, dtype, distribution):
118
+ size_multiplier = 1
119
+ arg_dtype = dtype
120
+
121
+ rng_key = (distribution, dtype)
122
+
123
+ if rng_key in [("uniform", np.float64), ("normal", np.float64)]:
124
+ c_type = "double"
125
+ scale1_const = "((double) %r)" % (1/2**32)
126
+ scale2_const = "((double) %r)" % (1/2**64)
127
+ if distribution == "normal":
128
+ transform = "box_muller"
129
+ else:
130
+ transform = ""
131
+
132
+ rng_expr = (
133
+ "shift + scale * "
134
+ "%s( %s * convert_double4(gen)"
135
+ "+ %s * convert_double4(gen))"
136
+ % (transform, scale1_const, scale2_const))
137
+
138
+ counter_multiplier = 2
139
+
140
+ elif rng_key in [(dist, cmp_dtype)
141
+ for dist in ["normal", "uniform"]
142
+ for cmp_dtype in [
143
+ np.float32,
144
+ cltypes.float2,
145
+ cltypes.float3,
146
+ cltypes.float4,
147
+ ]]:
148
+ c_type = "float"
149
+ scale_const = "((float) %r)" % (1/2**32)
150
+
151
+ if distribution == "normal":
152
+ transform = "box_muller"
153
+ else:
154
+ transform = ""
155
+
156
+ rng_expr = (
157
+ "shift + scale * %s(%s * convert_float4(gen))"
158
+ % (transform, scale_const))
159
+ counter_multiplier = 1
160
+ arg_dtype = np.float32
161
+ try:
162
+ _, size_multiplier = cltypes.vec_type_to_scalar_and_count[dtype]
163
+ except KeyError:
164
+ pass
165
+
166
+ elif rng_key == ("uniform", np.int32):
167
+ c_type = "int"
168
+ rng_expr = (
169
+ "shift + convert_int4((convert_long4(gen) * scale) / %s)"
170
+ % (str(2**32)+"l")
171
+ )
172
+ counter_multiplier = 1
173
+
174
+ elif rng_key == ("uniform", np.int64):
175
+ c_type = "long"
176
+ rng_expr = (
177
+ "shift"
178
+ "+ convert_long4(gen) * (scale/two32) "
179
+ "+ ((convert_long4(gen) * scale) / two32)"
180
+ .replace("two32", (str(2**32)+"l")))
181
+ counter_multiplier = 2
182
+
183
+ else:
184
+ raise TypeError(
185
+ "unsupported RNG distribution/data type combination '%s/%s'"
186
+ % rng_key)
187
+
188
+ kernel_name = f"rng_gen_{self.generator_name}_{distribution}"
189
+ src = """//CL//
190
+ #include <{header_name}>
191
+
192
+ #ifndef M_PI
193
+ #ifdef M_PI_F
194
+ #define M_PI M_PI_F
195
+ #else
196
+ #define M_PI 3.14159265359f
197
+ #endif
198
+ #endif
199
+
200
+ typedef {output_t} output_t;
201
+ typedef {output_t}4 output_vec_t;
202
+ typedef {gen_name}_ctr_t ctr_t;
203
+ typedef {gen_name}_key_t key_t;
204
+
205
+ uint4 gen_bits(key_t *key, ctr_t *ctr)
206
+ {{
207
+ union {{
208
+ ctr_t ctr_el;
209
+ uint4 vec_el;
210
+ }} u;
211
+
212
+ u.ctr_el = {gen_name}(*ctr, *key);
213
+ if (++ctr->v[0] == 0)
214
+ if (++ctr->v[1] == 0)
215
+ ++ctr->v[2];
216
+
217
+ return u.vec_el;
218
+ }}
219
+
220
+ #if {include_box_muller}
221
+ output_vec_t box_muller(output_vec_t x)
222
+ {{
223
+ #define BOX_MULLER(I, COMPA, COMPB) \
224
+ output_t r##I = sqrt(-2*log(x.COMPA)); \
225
+ output_t c##I; \
226
+ output_t s##I = sincos((output_t) (2*M_PI) * x.COMPB, &c##I);
227
+
228
+ BOX_MULLER(0, x, y);
229
+ BOX_MULLER(1, z, w);
230
+ return (output_vec_t) (r0*c0, r0*s0, r1*c1, r1*s1);
231
+ }}
232
+ #endif
233
+
234
+ #define GET_RANDOM_NUM(gen) {rng_expr}
235
+
236
+ kernel void {kernel_name}(
237
+ int k1,
238
+ #if {key_length} > 2
239
+ int k2, int k3,
240
+ #endif
241
+ int c0, int c1, int c2, int c3,
242
+ global output_t *output,
243
+ long out_size,
244
+ output_t scale,
245
+ output_t shift)
246
+ {{
247
+ #if {key_length} == 2
248
+ key_t k = {{{{get_global_id(0), k1}}}};
249
+ #else
250
+ key_t k = {{{{get_global_id(0), k1, k2, k3}}}};
251
+ #endif
252
+
253
+ ctr_t c = {{{{c0, c1, c2, c3}}}};
254
+
255
+ // output bulk
256
+ unsigned long idx = get_global_id(0)*4;
257
+ while (idx + 4 < out_size)
258
+ {{
259
+ output_vec_t ran = GET_RANDOM_NUM(gen_bits(&k, &c));
260
+ vstore4(ran, 0, &output[idx]);
261
+ idx += 4*get_global_size(0);
262
+ }}
263
+
264
+ // output tail
265
+ output_vec_t tail_ran = GET_RANDOM_NUM(gen_bits(&k, &c));
266
+ if (idx < out_size)
267
+ output[idx] = tail_ran.x;
268
+ if (idx+1 < out_size)
269
+ output[idx+1] = tail_ran.y;
270
+ if (idx+2 < out_size)
271
+ output[idx+2] = tail_ran.z;
272
+ if (idx+3 < out_size)
273
+ output[idx+3] = tail_ran.w;
274
+ }}
275
+ """.format(
276
+ kernel_name=kernel_name,
277
+ gen_name=self.generator_name,
278
+ header_name=self.header_name,
279
+ output_t=c_type,
280
+ key_length=self.key_length,
281
+ include_box_muller=int(distribution == "normal"),
282
+ rng_expr=rng_expr
283
+ )
284
+
285
+ prg = cl.Program(self.context, src).build()
286
+ knl = getattr(prg, kernel_name)
287
+ knl.set_scalar_arg_dtypes(
288
+ [np.int32] * (self.key_length - 1 + 4)
289
+ + [None, np.int64, arg_dtype, arg_dtype])
290
+
291
+ return knl, counter_multiplier, size_multiplier
292
+
293
+ def _fill(self, distribution, ary, scale, shift, queue=None):
294
+ """Fill *ary* with uniformly distributed random numbers in the interval
295
+ *(a, b)*, endpoints excluded.
296
+
297
+ :return: a :class:`pyopencl.Event`
298
+ """
299
+
300
+ if queue is None:
301
+ queue = ary.queue
302
+
303
+ knl, counter_multiplier, size_multiplier = \
304
+ self.get_gen_kernel(ary.dtype, distribution)
305
+
306
+ args = self.key + self.counter + [
307
+ ary.data, ary.size*size_multiplier,
308
+ scale, shift]
309
+
310
+ n = ary.size
311
+ from pyopencl.array import _splay
312
+ gsize, lsize = _splay(queue.device, ary.size)
313
+
314
+ evt = knl(queue, gsize, lsize, *args)
315
+ ary.add_event(evt)
316
+
317
+ self.counter[0] += n * counter_multiplier
318
+ c1_incr, self.counter[0] = divmod(self.counter[0], self.counter_max)
319
+ if c1_incr:
320
+ self.counter[1] += c1_incr
321
+ c2_incr, self.counter[1] = divmod(self.counter[1], self.counter_max)
322
+ self.counter[2] += c2_incr
323
+
324
+ return evt
325
+
326
+ def fill_uniform(self, ary, a=0, b=1, queue=None):
327
+ return self._fill("uniform", ary,
328
+ scale=(b-a), shift=a, queue=queue)
329
+
330
+ def uniform(self, *args, **kwargs):
331
+ """Make a new empty array, apply :meth:`fill_uniform` to it.
332
+ """
333
+ a = kwargs.pop("a", 0)
334
+ b = kwargs.pop("b", 1)
335
+
336
+ result = cl_array.empty(*args, **kwargs)
337
+ self.fill_uniform(result, queue=result.queue, a=a, b=b)
338
+ return result
339
+
340
+ def fill_normal(self, ary, mu=0, sigma=1, queue=None):
341
+ """Fill *ary* with normally distributed numbers with mean *mu* and
342
+ standard deviation *sigma*.
343
+ """
344
+
345
+ return self._fill("normal", ary, scale=sigma, shift=mu, queue=queue)
346
+
347
+ def normal(self, *args, **kwargs):
348
+ """Make a new empty array, apply :meth:`fill_normal` to it.
349
+ """
350
+ mu = kwargs.pop("mu", 0)
351
+ sigma = kwargs.pop("sigma", 1)
352
+
353
+ result = cl_array.empty(*args, **kwargs)
354
+ self.fill_normal(result, queue=result.queue, mu=mu, sigma=sigma)
355
+ return result
356
+
357
+
358
+ class PhiloxGenerator(Random123GeneratorBase):
359
+ __doc__ = Random123GeneratorBase.__doc__
360
+
361
+ header_name = "pyopencl-random123/philox.cl"
362
+ generator_name = "philox4x32"
363
+ key_length = 2
364
+
365
+
366
+ class ThreefryGenerator(Random123GeneratorBase):
367
+ __doc__ = Random123GeneratorBase.__doc__
368
+
369
+ header_name = "pyopencl-random123/threefry.cl"
370
+ generator_name = "threefry4x32"
371
+ key_length = 4
372
+
373
+ # }}}
374
+
375
+
376
+ @first_arg_dependent_memoize
377
+ def _get_generator(context):
378
+ if context.devices[0].type & cl.device_type.CPU:
379
+ gen = PhiloxGenerator(context)
380
+ else:
381
+ gen = ThreefryGenerator(context)
382
+
383
+ return gen
384
+
385
+
386
+ def fill_rand(result, queue=None, a=0, b=1):
387
+ """Fill *result* with random values in the range :math:`[0, 1)`.
388
+ """
389
+ if queue is None:
390
+ queue = result.queue
391
+ gen = _get_generator(queue.context)
392
+ gen.fill_uniform(result, a=a, b=b)
393
+
394
+
395
+ def rand(queue, shape, dtype, luxury=None, a=0, b=1):
396
+ """Return an array of *shape* filled with random values of *dtype*
397
+ in the range :math:`[a, b)`.
398
+ """
399
+
400
+ if luxury is not None:
401
+ from warnings import warn
402
+ warn("Specifying the 'luxury' argument is deprecated and will stop being "
403
+ "supported in PyOpenCL 2018.x", stacklevel=2)
404
+
405
+ from pyopencl.array import Array
406
+ gen = _get_generator(queue.context)
407
+ result = Array(queue, shape, dtype)
408
+ gen.fill_uniform(result, a=a, b=b)
409
+ return result
410
+
411
+
412
+ # vim: filetype=pyopencl:foldmethod=marker