pyopencl 2024.2__cp311-cp311-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (122) hide show
  1. pyopencl/__init__.py +2393 -0
  2. pyopencl/_cl.cpython-311-darwin.so +0 -0
  3. pyopencl/_cluda.py +54 -0
  4. pyopencl/_mymako.py +14 -0
  5. pyopencl/algorithm.py +1444 -0
  6. pyopencl/array.py +3427 -0
  7. pyopencl/bitonic_sort.py +238 -0
  8. pyopencl/bitonic_sort_templates.py +594 -0
  9. pyopencl/cache.py +534 -0
  10. pyopencl/capture_call.py +176 -0
  11. pyopencl/characterize/__init__.py +433 -0
  12. pyopencl/characterize/performance.py +237 -0
  13. pyopencl/cl/pyopencl-airy.cl +324 -0
  14. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  15. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  16. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  17. pyopencl/cl/pyopencl-complex.h +303 -0
  18. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  19. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  20. pyopencl/cl/pyopencl-random123/array.h +325 -0
  21. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  22. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  23. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  24. pyopencl/clmath.py +280 -0
  25. pyopencl/clrandom.py +408 -0
  26. pyopencl/cltypes.py +137 -0
  27. pyopencl/compyte/__init__.py +0 -0
  28. pyopencl/compyte/array.py +214 -0
  29. pyopencl/compyte/dtypes.py +290 -0
  30. pyopencl/compyte/ndarray/__init__.py +0 -0
  31. pyopencl/compyte/ndarray/gen_elemwise.py +1907 -0
  32. pyopencl/compyte/ndarray/gen_reduction.py +1511 -0
  33. pyopencl/compyte/ndarray/setup_opencl.py +101 -0
  34. pyopencl/compyte/ndarray/test_gpu_elemwise.py +411 -0
  35. pyopencl/compyte/ndarray/test_gpu_ndarray.py +487 -0
  36. pyopencl/elementwise.py +1164 -0
  37. pyopencl/invoker.py +418 -0
  38. pyopencl/ipython_ext.py +68 -0
  39. pyopencl/reduction.py +780 -0
  40. pyopencl/scan.py +1898 -0
  41. pyopencl/tools.py +1513 -0
  42. pyopencl/version.py +3 -0
  43. pyopencl-2024.2.data/data/CITATION.cff +74 -0
  44. pyopencl-2024.2.data/data/LICENSE +282 -0
  45. pyopencl-2024.2.data/data/Makefile.in +21 -0
  46. pyopencl-2024.2.data/data/README.rst +70 -0
  47. pyopencl-2024.2.data/data/README_SETUP.txt +34 -0
  48. pyopencl-2024.2.data/data/aksetup_helper.py +1013 -0
  49. pyopencl-2024.2.data/data/configure.py +6 -0
  50. pyopencl-2024.2.data/data/contrib/cldis.py +91 -0
  51. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/README +29 -0
  52. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/translate.py +1441 -0
  53. pyopencl-2024.2.data/data/contrib/pyopencl.vim +84 -0
  54. pyopencl-2024.2.data/data/doc/Makefile +23 -0
  55. pyopencl-2024.2.data/data/doc/algorithm.rst +214 -0
  56. pyopencl-2024.2.data/data/doc/array.rst +305 -0
  57. pyopencl-2024.2.data/data/doc/conf.py +26 -0
  58. pyopencl-2024.2.data/data/doc/howto.rst +105 -0
  59. pyopencl-2024.2.data/data/doc/index.rst +137 -0
  60. pyopencl-2024.2.data/data/doc/make_constants.py +561 -0
  61. pyopencl-2024.2.data/data/doc/misc.rst +885 -0
  62. pyopencl-2024.2.data/data/doc/runtime.rst +51 -0
  63. pyopencl-2024.2.data/data/doc/runtime_const.rst +30 -0
  64. pyopencl-2024.2.data/data/doc/runtime_gl.rst +78 -0
  65. pyopencl-2024.2.data/data/doc/runtime_memory.rst +527 -0
  66. pyopencl-2024.2.data/data/doc/runtime_platform.rst +184 -0
  67. pyopencl-2024.2.data/data/doc/runtime_program.rst +364 -0
  68. pyopencl-2024.2.data/data/doc/runtime_queue.rst +182 -0
  69. pyopencl-2024.2.data/data/doc/subst.rst +36 -0
  70. pyopencl-2024.2.data/data/doc/tools.rst +4 -0
  71. pyopencl-2024.2.data/data/doc/types.rst +42 -0
  72. pyopencl-2024.2.data/data/examples/black-hole-accretion.py +2227 -0
  73. pyopencl-2024.2.data/data/examples/demo-struct-reduce.py +75 -0
  74. pyopencl-2024.2.data/data/examples/demo.py +39 -0
  75. pyopencl-2024.2.data/data/examples/demo_array.py +32 -0
  76. pyopencl-2024.2.data/data/examples/demo_array_svm.py +37 -0
  77. pyopencl-2024.2.data/data/examples/demo_elementwise.py +34 -0
  78. pyopencl-2024.2.data/data/examples/demo_elementwise_complex.py +53 -0
  79. pyopencl-2024.2.data/data/examples/demo_mandelbrot.py +183 -0
  80. pyopencl-2024.2.data/data/examples/demo_meta_codepy.py +56 -0
  81. pyopencl-2024.2.data/data/examples/demo_meta_template.py +55 -0
  82. pyopencl-2024.2.data/data/examples/dump-performance.py +38 -0
  83. pyopencl-2024.2.data/data/examples/dump-properties.py +86 -0
  84. pyopencl-2024.2.data/data/examples/gl_interop_demo.py +84 -0
  85. pyopencl-2024.2.data/data/examples/gl_particle_animation.py +218 -0
  86. pyopencl-2024.2.data/data/examples/ipython-demo.ipynb +203 -0
  87. pyopencl-2024.2.data/data/examples/median-filter.py +99 -0
  88. pyopencl-2024.2.data/data/examples/n-body.py +1070 -0
  89. pyopencl-2024.2.data/data/examples/narray.py +37 -0
  90. pyopencl-2024.2.data/data/examples/noisyImage.jpg +0 -0
  91. pyopencl-2024.2.data/data/examples/pi-monte-carlo.py +1166 -0
  92. pyopencl-2024.2.data/data/examples/svm.py +82 -0
  93. pyopencl-2024.2.data/data/examples/transpose.py +229 -0
  94. pyopencl-2024.2.data/data/pytest.ini +3 -0
  95. pyopencl-2024.2.data/data/src/bitlog.cpp +51 -0
  96. pyopencl-2024.2.data/data/src/bitlog.hpp +83 -0
  97. pyopencl-2024.2.data/data/src/clinfo_ext.h +134 -0
  98. pyopencl-2024.2.data/data/src/mempool.hpp +444 -0
  99. pyopencl-2024.2.data/data/src/pyopencl_ext.h +77 -0
  100. pyopencl-2024.2.data/data/src/tools.hpp +90 -0
  101. pyopencl-2024.2.data/data/src/wrap_cl.cpp +61 -0
  102. pyopencl-2024.2.data/data/src/wrap_cl.hpp +5853 -0
  103. pyopencl-2024.2.data/data/src/wrap_cl_part_1.cpp +369 -0
  104. pyopencl-2024.2.data/data/src/wrap_cl_part_2.cpp +702 -0
  105. pyopencl-2024.2.data/data/src/wrap_constants.cpp +1274 -0
  106. pyopencl-2024.2.data/data/src/wrap_helpers.hpp +213 -0
  107. pyopencl-2024.2.data/data/src/wrap_mempool.cpp +731 -0
  108. pyopencl-2024.2.data/data/test/add-vectors-32.spv +0 -0
  109. pyopencl-2024.2.data/data/test/add-vectors-64.spv +0 -0
  110. pyopencl-2024.2.data/data/test/empty-header.h +1 -0
  111. pyopencl-2024.2.data/data/test/test_algorithm.py +1180 -0
  112. pyopencl-2024.2.data/data/test/test_array.py +2392 -0
  113. pyopencl-2024.2.data/data/test/test_arrays_in_structs.py +100 -0
  114. pyopencl-2024.2.data/data/test/test_clmath.py +529 -0
  115. pyopencl-2024.2.data/data/test/test_clrandom.py +75 -0
  116. pyopencl-2024.2.data/data/test/test_enqueue_copy.py +271 -0
  117. pyopencl-2024.2.data/data/test/test_wrapper.py +1554 -0
  118. pyopencl-2024.2.dist-info/LICENSE +282 -0
  119. pyopencl-2024.2.dist-info/METADATA +105 -0
  120. pyopencl-2024.2.dist-info/RECORD +122 -0
  121. pyopencl-2024.2.dist-info/WHEEL +5 -0
  122. pyopencl-2024.2.dist-info/top_level.txt +1 -0
pyopencl/clmath.py ADDED
@@ -0,0 +1,280 @@
1
+ # pylint:disable=unexpected-keyword-arg # for @elwise_kernel_runner
2
+
3
+ __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
4
+
5
+ __license__ = """
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import numpy as np
26
+
27
+ import pyopencl.array as cl_array
28
+ import pyopencl.elementwise as elementwise
29
+ from pyopencl.array import _get_common_dtype
30
+
31
+
32
+ def _make_unary_array_func(name):
33
+ @cl_array.elwise_kernel_runner
34
+ def knl_runner(result, arg):
35
+ if arg.dtype.kind == "c":
36
+ from pyopencl.elementwise import complex_dtype_to_name
37
+ fname = "{}_{}".format(complex_dtype_to_name(arg.dtype), name)
38
+ else:
39
+ fname = name
40
+
41
+ return elementwise.get_unary_func_kernel(
42
+ result.context, fname, arg.dtype)
43
+
44
+ def f(array, queue=None):
45
+ result = array._new_like_me(queue=queue)
46
+ event1 = knl_runner(result, array, queue=queue)
47
+ result.add_event(event1)
48
+ return result
49
+
50
+ return f
51
+
52
+
53
+ # See table 6.8 in the CL 1.1 spec
54
+ acos = _make_unary_array_func("acos")
55
+ acosh = _make_unary_array_func("acosh")
56
+ acospi = _make_unary_array_func("acospi")
57
+
58
+ asin = _make_unary_array_func("asin")
59
+ asinh = _make_unary_array_func("asinh")
60
+ asinpi = _make_unary_array_func("asinpi")
61
+
62
+
63
+ @cl_array.elwise_kernel_runner
64
+ def _atan2(result, arg1, arg2):
65
+ return elementwise.get_float_binary_func_kernel(
66
+ result.context, "atan2", arg1.dtype, arg2.dtype, result.dtype)
67
+
68
+
69
+ @cl_array.elwise_kernel_runner
70
+ def _atan2pi(result, arg1, arg2):
71
+ return elementwise.get_float_binary_func_kernel(
72
+ result.context, "atan2pi", arg1.dtype, arg2.dtype, result.dtype)
73
+
74
+
75
+ atan = _make_unary_array_func("atan")
76
+
77
+
78
+ def atan2(y, x, queue=None):
79
+ """
80
+ .. versionadded:: 2013.1
81
+ """
82
+ queue = queue or y.queue
83
+ result = y._new_like_me(_get_common_dtype(y, x, queue))
84
+ result.add_event(_atan2(result, y, x, queue=queue))
85
+ return result
86
+
87
+
88
+ atanh = _make_unary_array_func("atanh")
89
+ atanpi = _make_unary_array_func("atanpi")
90
+
91
+
92
+ def atan2pi(y, x, queue=None):
93
+ """
94
+ .. versionadded:: 2013.1
95
+ """
96
+ queue = queue or y.queue
97
+ result = y._new_like_me(_get_common_dtype(y, x, queue))
98
+ result.add_event(_atan2pi(result, y, x, queue=queue))
99
+ return result
100
+
101
+
102
+ cbrt = _make_unary_array_func("cbrt")
103
+ ceil = _make_unary_array_func("ceil")
104
+ # TODO: copysign
105
+
106
+ cos = _make_unary_array_func("cos")
107
+ cosh = _make_unary_array_func("cosh")
108
+ cospi = _make_unary_array_func("cospi")
109
+
110
+ erfc = _make_unary_array_func("erfc")
111
+ erf = _make_unary_array_func("erf")
112
+ exp = _make_unary_array_func("exp")
113
+ exp2 = _make_unary_array_func("exp2")
114
+ exp10 = _make_unary_array_func("exp10")
115
+ expm1 = _make_unary_array_func("expm1")
116
+
117
+ fabs = _make_unary_array_func("fabs")
118
+ # TODO: fdim
119
+ floor = _make_unary_array_func("floor")
120
+ # TODO: fma
121
+ # TODO: fmax
122
+ # TODO: fmin
123
+
124
+
125
+ @cl_array.elwise_kernel_runner
126
+ def _fmod(result, arg, mod):
127
+ return elementwise.get_fmod_kernel(result.context, result.dtype,
128
+ arg.dtype, mod.dtype)
129
+
130
+
131
+ def fmod(arg, mod, queue=None):
132
+ """Return the floating point remainder of the division ``arg / mod``,
133
+ for each element in ``arg`` and ``mod``."""
134
+ queue = (queue or arg.queue) or mod.queue
135
+ result = arg._new_like_me(_get_common_dtype(arg, mod, queue))
136
+ result.add_event(_fmod(result, arg, mod, queue=queue))
137
+ return result
138
+
139
+ # TODO: fract
140
+
141
+
142
+ @cl_array.elwise_kernel_runner
143
+ def _frexp(sig, expt, arg):
144
+ return elementwise.get_frexp_kernel(sig.context, sig.dtype,
145
+ expt.dtype, arg.dtype)
146
+
147
+
148
+ def frexp(arg, queue=None):
149
+ """Return a tuple ``(significands, exponents)`` such that
150
+ ``arg == significand * 2**exponent``.
151
+ """
152
+ sig = arg._new_like_me(queue=queue)
153
+ expt = arg._new_like_me(queue=queue, dtype=np.int32)
154
+ event1 = _frexp(sig, expt, arg, queue=queue)
155
+ sig.add_event(event1)
156
+ expt.add_event(event1)
157
+ return sig, expt
158
+
159
+ # TODO: hypot
160
+
161
+
162
+ ilogb = _make_unary_array_func("ilogb")
163
+
164
+
165
+ @cl_array.elwise_kernel_runner
166
+ def _ldexp(result, sig, exp):
167
+ return elementwise.get_ldexp_kernel(result.context, result.dtype,
168
+ sig.dtype, exp.dtype)
169
+
170
+
171
+ def ldexp(significand, exponent, queue=None):
172
+ """Return a new array of floating point values composed from the
173
+ entries of ``significand`` and ``exponent``, paired together as
174
+ ``result = significand * 2**exponent``.
175
+ """
176
+ result = significand._new_like_me(queue=queue)
177
+ result.add_event(_ldexp(result, significand, exponent))
178
+ return result
179
+
180
+
181
+ lgamma = _make_unary_array_func("lgamma")
182
+ # TODO: lgamma_r
183
+
184
+ log = _make_unary_array_func("log")
185
+ log2 = _make_unary_array_func("log2")
186
+ log10 = _make_unary_array_func("log10")
187
+ log1p = _make_unary_array_func("log1p")
188
+ logb = _make_unary_array_func("logb")
189
+
190
+ # TODO: mad
191
+ # TODO: maxmag
192
+ # TODO: minmag
193
+
194
+
195
+ @cl_array.elwise_kernel_runner
196
+ def _modf(intpart, fracpart, arg):
197
+ return elementwise.get_modf_kernel(intpart.context, intpart.dtype,
198
+ fracpart.dtype, arg.dtype)
199
+
200
+
201
+ def modf(arg, queue=None):
202
+ """Return a tuple ``(fracpart, intpart)`` of arrays containing the
203
+ integer and fractional parts of ``arg``.
204
+ """
205
+ intpart = arg._new_like_me(queue=queue)
206
+ fracpart = arg._new_like_me(queue=queue)
207
+ event1 = _modf(intpart, fracpart, arg, queue=queue)
208
+ fracpart.add_event(event1)
209
+ intpart.add_event(event1)
210
+ return fracpart, intpart
211
+
212
+
213
+ nan = _make_unary_array_func("nan")
214
+
215
+ # TODO: nextafter
216
+ # TODO: remainder
217
+ # TODO: remquo
218
+
219
+ rint = _make_unary_array_func("rint")
220
+ # TODO: rootn
221
+ round = _make_unary_array_func("round")
222
+
223
+ sin = _make_unary_array_func("sin")
224
+ # TODO: sincos
225
+ sinh = _make_unary_array_func("sinh")
226
+ sinpi = _make_unary_array_func("sinpi")
227
+
228
+ sqrt = _make_unary_array_func("sqrt")
229
+
230
+ tan = _make_unary_array_func("tan")
231
+ tanh = _make_unary_array_func("tanh")
232
+ tanpi = _make_unary_array_func("tanpi")
233
+ tgamma = _make_unary_array_func("tgamma")
234
+ trunc = _make_unary_array_func("trunc")
235
+
236
+
237
+ # no point wrapping half_ or native_
238
+
239
+ # TODO: table 6.10, integer functions
240
+ # TODO: table 6.12, clamp et al
241
+
242
+ @cl_array.elwise_kernel_runner
243
+ def _bessel_jn(result, n, x):
244
+ return elementwise.get_bessel_kernel(result.context, "j", result.dtype,
245
+ np.dtype(type(n)), x.dtype)
246
+
247
+
248
+ @cl_array.elwise_kernel_runner
249
+ def _bessel_yn(result, n, x):
250
+ return elementwise.get_bessel_kernel(result.context, "y", result.dtype,
251
+ np.dtype(type(n)), x.dtype)
252
+
253
+
254
+ @cl_array.elwise_kernel_runner
255
+ def _hankel_01(h0, h1, x):
256
+ if h0.dtype != h1.dtype:
257
+ raise TypeError("types of h0 and h1 must match")
258
+ return elementwise.get_hankel_01_kernel(
259
+ h0.context, h0.dtype, x.dtype)
260
+
261
+
262
+ def bessel_jn(n, x, queue=None):
263
+ result = x._new_like_me(queue=queue)
264
+ result.add_event(_bessel_jn(result, n, x, queue=queue))
265
+ return result
266
+
267
+
268
+ def bessel_yn(n, x, queue=None):
269
+ result = x._new_like_me(queue=queue)
270
+ result.add_event(_bessel_yn(result, n, x, queue=queue))
271
+ return result
272
+
273
+
274
+ def hankel_01(x, queue=None):
275
+ h0 = x._new_like_me(queue=queue)
276
+ h1 = x._new_like_me(queue=queue)
277
+ event1 = _hankel_01(h0, h1, x, queue=queue)
278
+ h0.add_event(event1)
279
+ h1.add_event(event1)
280
+ return h0, h1
pyopencl/clrandom.py ADDED
@@ -0,0 +1,408 @@
1
+ __copyright__ = "Copyright (C) 2009-16 Andreas Kloeckner"
2
+
3
+ __license__ = """
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in
12
+ all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
+ THE SOFTWARE.
21
+ """
22
+
23
+
24
+ # {{{ documentation
25
+
26
+ __doc__ = """
27
+ PyOpenCL includes and uses some of the `Random123 random number generators
28
+ <https://www.deshawresearch.com/resources.html>`__ by D.E. Shaw
29
+ Research. In addition to being usable through the convenience functions above,
30
+ they are available in any piece of code compiled through PyOpenCL by::
31
+
32
+ #include <pyopencl-random123/philox.cl>
33
+ #include <pyopencl-random123/threefry.cl>
34
+
35
+ See the `Philox source
36
+ <https://github.com/inducer/pyopencl/blob/main/pyopencl/cl/pyopencl-random123/philox.cl>`__
37
+ and the `Threefry source
38
+ <https://github.com/inducer/pyopencl/blob/main/pyopencl/cl/pyopencl-random123/threefry.cl>`__
39
+ for some documentation if you're planning on using Random123 directly.
40
+
41
+ .. autoclass:: PhiloxGenerator
42
+
43
+ .. autoclass:: ThreefryGenerator
44
+
45
+ .. autofunction:: rand
46
+ .. autofunction:: fill_rand
47
+
48
+ """
49
+
50
+ # }}}
51
+
52
+ import numpy as np
53
+ from pytools import memoize_method
54
+
55
+ import pyopencl as cl
56
+ import pyopencl.array as cl_array
57
+ import pyopencl.cltypes as cltypes
58
+ from pyopencl.tools import first_arg_dependent_memoize
59
+
60
+
61
+ # {{{ Random123 generators
62
+
63
+ class Random123GeneratorBase:
64
+ """
65
+ .. versionadded:: 2016.2
66
+
67
+ .. automethod:: fill_uniform
68
+ .. automethod:: uniform
69
+ .. automethod:: fill_normal
70
+ .. automethod:: normal
71
+ """
72
+
73
+ @property
74
+ def header_name(self):
75
+ raise NotImplementedError
76
+
77
+ @property
78
+ def generator_name(self):
79
+ raise NotImplementedError
80
+
81
+ @property
82
+ def key_length(self):
83
+ raise NotImplementedError
84
+
85
+ def __init__(self, context, key=None, counter=None, seed=None):
86
+ int32_info = np.iinfo(np.int32)
87
+ from random import Random
88
+
89
+ rng = Random(seed)
90
+
91
+ if key is not None and counter is not None and seed is not None:
92
+ raise TypeError("seed is unused and may not be specified "
93
+ "if both counter and key are given")
94
+
95
+ if key is None:
96
+ key = [
97
+ rng.randrange(
98
+ int(int32_info.min), int(int32_info.max)+1)
99
+ for i in range(self.key_length-1)]
100
+ if counter is None:
101
+ counter = [
102
+ rng.randrange(
103
+ int(int32_info.min), int(int32_info.max)+1)
104
+ for i in range(4)]
105
+
106
+ self.context = context
107
+ self.key = key
108
+ self.counter = counter
109
+
110
+ self.counter_max = int32_info.max
111
+
112
+ @memoize_method
113
+ def get_gen_kernel(self, dtype, distribution):
114
+ size_multiplier = 1
115
+ arg_dtype = dtype
116
+
117
+ rng_key = (distribution, dtype)
118
+
119
+ if rng_key in [("uniform", np.float64), ("normal", np.float64)]:
120
+ c_type = "double"
121
+ scale1_const = "((double) %r)" % (1/2**32)
122
+ scale2_const = "((double) %r)" % (1/2**64)
123
+ if distribution == "normal":
124
+ transform = "box_muller"
125
+ else:
126
+ transform = ""
127
+
128
+ rng_expr = (
129
+ "shift + scale * "
130
+ "%s( %s * convert_double4(gen)"
131
+ "+ %s * convert_double4(gen))"
132
+ % (transform, scale1_const, scale2_const))
133
+
134
+ counter_multiplier = 2
135
+
136
+ elif rng_key in [(dist, cmp_dtype)
137
+ for dist in ["normal", "uniform"]
138
+ for cmp_dtype in [
139
+ np.float32,
140
+ cltypes.float2,
141
+ cltypes.float3,
142
+ cltypes.float4,
143
+ ]]:
144
+ c_type = "float"
145
+ scale_const = "((float) %r)" % (1/2**32)
146
+
147
+ if distribution == "normal":
148
+ transform = "box_muller"
149
+ else:
150
+ transform = ""
151
+
152
+ rng_expr = (
153
+ "shift + scale * %s(%s * convert_float4(gen))"
154
+ % (transform, scale_const))
155
+ counter_multiplier = 1
156
+ arg_dtype = np.float32
157
+ try:
158
+ _, size_multiplier = cltypes.vec_type_to_scalar_and_count[dtype]
159
+ except KeyError:
160
+ pass
161
+
162
+ elif rng_key == ("uniform", np.int32):
163
+ c_type = "int"
164
+ rng_expr = (
165
+ "shift + convert_int4((convert_long4(gen) * scale) / %s)"
166
+ % (str(2**32)+"l")
167
+ )
168
+ counter_multiplier = 1
169
+
170
+ elif rng_key == ("uniform", np.int64):
171
+ c_type = "long"
172
+ rng_expr = (
173
+ "shift"
174
+ "+ convert_long4(gen) * (scale/two32) "
175
+ "+ ((convert_long4(gen) * scale) / two32)"
176
+ .replace("two32", (str(2**32)+"l")))
177
+ counter_multiplier = 2
178
+
179
+ else:
180
+ raise TypeError(
181
+ "unsupported RNG distribution/data type combination '%s/%s'"
182
+ % rng_key)
183
+
184
+ kernel_name = f"rng_gen_{self.generator_name}_{distribution}"
185
+ src = """//CL//
186
+ #include <{header_name}>
187
+
188
+ #ifndef M_PI
189
+ #ifdef M_PI_F
190
+ #define M_PI M_PI_F
191
+ #else
192
+ #define M_PI 3.14159265359f
193
+ #endif
194
+ #endif
195
+
196
+ typedef {output_t} output_t;
197
+ typedef {output_t}4 output_vec_t;
198
+ typedef {gen_name}_ctr_t ctr_t;
199
+ typedef {gen_name}_key_t key_t;
200
+
201
+ uint4 gen_bits(key_t *key, ctr_t *ctr)
202
+ {{
203
+ union {{
204
+ ctr_t ctr_el;
205
+ uint4 vec_el;
206
+ }} u;
207
+
208
+ u.ctr_el = {gen_name}(*ctr, *key);
209
+ if (++ctr->v[0] == 0)
210
+ if (++ctr->v[1] == 0)
211
+ ++ctr->v[2];
212
+
213
+ return u.vec_el;
214
+ }}
215
+
216
+ #if {include_box_muller}
217
+ output_vec_t box_muller(output_vec_t x)
218
+ {{
219
+ #define BOX_MULLER(I, COMPA, COMPB) \
220
+ output_t r##I = sqrt(-2*log(x.COMPA)); \
221
+ output_t c##I; \
222
+ output_t s##I = sincos((output_t) (2*M_PI) * x.COMPB, &c##I);
223
+
224
+ BOX_MULLER(0, x, y);
225
+ BOX_MULLER(1, z, w);
226
+ return (output_vec_t) (r0*c0, r0*s0, r1*c1, r1*s1);
227
+ }}
228
+ #endif
229
+
230
+ #define GET_RANDOM_NUM(gen) {rng_expr}
231
+
232
+ kernel void {kernel_name}(
233
+ int k1,
234
+ #if {key_length} > 2
235
+ int k2, int k3,
236
+ #endif
237
+ int c0, int c1, int c2, int c3,
238
+ global output_t *output,
239
+ long out_size,
240
+ output_t scale,
241
+ output_t shift)
242
+ {{
243
+ #if {key_length} == 2
244
+ key_t k = {{{{get_global_id(0), k1}}}};
245
+ #else
246
+ key_t k = {{{{get_global_id(0), k1, k2, k3}}}};
247
+ #endif
248
+
249
+ ctr_t c = {{{{c0, c1, c2, c3}}}};
250
+
251
+ // output bulk
252
+ unsigned long idx = get_global_id(0)*4;
253
+ while (idx + 4 < out_size)
254
+ {{
255
+ output_vec_t ran = GET_RANDOM_NUM(gen_bits(&k, &c));
256
+ vstore4(ran, 0, &output[idx]);
257
+ idx += 4*get_global_size(0);
258
+ }}
259
+
260
+ // output tail
261
+ output_vec_t tail_ran = GET_RANDOM_NUM(gen_bits(&k, &c));
262
+ if (idx < out_size)
263
+ output[idx] = tail_ran.x;
264
+ if (idx+1 < out_size)
265
+ output[idx+1] = tail_ran.y;
266
+ if (idx+2 < out_size)
267
+ output[idx+2] = tail_ran.z;
268
+ if (idx+3 < out_size)
269
+ output[idx+3] = tail_ran.w;
270
+ }}
271
+ """.format(
272
+ kernel_name=kernel_name,
273
+ gen_name=self.generator_name,
274
+ header_name=self.header_name,
275
+ output_t=c_type,
276
+ key_length=self.key_length,
277
+ include_box_muller=int(distribution == "normal"),
278
+ rng_expr=rng_expr
279
+ )
280
+
281
+ prg = cl.Program(self.context, src).build()
282
+ knl = getattr(prg, kernel_name)
283
+ knl.set_scalar_arg_dtypes(
284
+ [np.int32] * (self.key_length - 1 + 4)
285
+ + [None, np.int64, arg_dtype, arg_dtype])
286
+
287
+ return knl, counter_multiplier, size_multiplier
288
+
289
+ def _fill(self, distribution, ary, scale, shift, queue=None):
290
+ """Fill *ary* with uniformly distributed random numbers in the interval
291
+ *(a, b)*, endpoints excluded.
292
+
293
+ :return: a :class:`pyopencl.Event`
294
+ """
295
+
296
+ if queue is None:
297
+ queue = ary.queue
298
+
299
+ knl, counter_multiplier, size_multiplier = \
300
+ self.get_gen_kernel(ary.dtype, distribution)
301
+
302
+ args = self.key + self.counter + [
303
+ ary.data, ary.size*size_multiplier,
304
+ scale, shift]
305
+
306
+ n = ary.size
307
+ from pyopencl.array import _splay
308
+ gsize, lsize = _splay(queue.device, ary.size)
309
+
310
+ evt = knl(queue, gsize, lsize, *args)
311
+ ary.add_event(evt)
312
+
313
+ self.counter[0] += n * counter_multiplier
314
+ c1_incr, self.counter[0] = divmod(self.counter[0], self.counter_max)
315
+ if c1_incr:
316
+ self.counter[1] += c1_incr
317
+ c2_incr, self.counter[1] = divmod(self.counter[1], self.counter_max)
318
+ self.counter[2] += c2_incr
319
+
320
+ return evt
321
+
322
+ def fill_uniform(self, ary, a=0, b=1, queue=None):
323
+ return self._fill("uniform", ary,
324
+ scale=(b-a), shift=a, queue=queue)
325
+
326
+ def uniform(self, *args, **kwargs):
327
+ """Make a new empty array, apply :meth:`fill_uniform` to it.
328
+ """
329
+ a = kwargs.pop("a", 0)
330
+ b = kwargs.pop("b", 1)
331
+
332
+ result = cl_array.empty(*args, **kwargs)
333
+ self.fill_uniform(result, queue=result.queue, a=a, b=b)
334
+ return result
335
+
336
+ def fill_normal(self, ary, mu=0, sigma=1, queue=None):
337
+ """Fill *ary* with normally distributed numbers with mean *mu* and
338
+ standard deviation *sigma*.
339
+ """
340
+
341
+ return self._fill("normal", ary, scale=sigma, shift=mu, queue=queue)
342
+
343
+ def normal(self, *args, **kwargs):
344
+ """Make a new empty array, apply :meth:`fill_normal` to it.
345
+ """
346
+ mu = kwargs.pop("mu", 0)
347
+ sigma = kwargs.pop("sigma", 1)
348
+
349
+ result = cl_array.empty(*args, **kwargs)
350
+ self.fill_normal(result, queue=result.queue, mu=mu, sigma=sigma)
351
+ return result
352
+
353
+
354
+ class PhiloxGenerator(Random123GeneratorBase):
355
+ __doc__ = Random123GeneratorBase.__doc__
356
+
357
+ header_name = "pyopencl-random123/philox.cl"
358
+ generator_name = "philox4x32"
359
+ key_length = 2
360
+
361
+
362
+ class ThreefryGenerator(Random123GeneratorBase):
363
+ __doc__ = Random123GeneratorBase.__doc__
364
+
365
+ header_name = "pyopencl-random123/threefry.cl"
366
+ generator_name = "threefry4x32"
367
+ key_length = 4
368
+
369
+ # }}}
370
+
371
+
372
+ @first_arg_dependent_memoize
373
+ def _get_generator(context):
374
+ if context.devices[0].type & cl.device_type.CPU:
375
+ gen = PhiloxGenerator(context)
376
+ else:
377
+ gen = ThreefryGenerator(context)
378
+
379
+ return gen
380
+
381
+
382
+ def fill_rand(result, queue=None, a=0, b=1):
383
+ """Fill *result* with random values in the range :math:`[0, 1)`.
384
+ """
385
+ if queue is None:
386
+ queue = result.queue
387
+ gen = _get_generator(queue.context)
388
+ gen.fill_uniform(result, a=a, b=b)
389
+
390
+
391
+ def rand(queue, shape, dtype, luxury=None, a=0, b=1):
392
+ """Return an array of *shape* filled with random values of *dtype*
393
+ in the range :math:`[a, b)`.
394
+ """
395
+
396
+ if luxury is not None:
397
+ from warnings import warn
398
+ warn("Specifying the 'luxury' argument is deprecated and will stop being "
399
+ "supported in PyOpenCL 2018.x", stacklevel=2)
400
+
401
+ from pyopencl.array import Array
402
+ gen = _get_generator(queue.context)
403
+ result = Array(queue, shape, dtype)
404
+ gen.fill_uniform(result, a=a, b=b)
405
+ return result
406
+
407
+
408
+ # vim: filetype=pyopencl:foldmethod=marker