pyopencl 2024.1__cp39-cp39-macosx_11_0_arm64.whl → 2024.2.1__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +82 -80
- pyopencl/_cl.cpython-39-darwin.so +0 -0
- pyopencl/algorithm.py +8 -10
- pyopencl/array.py +16 -12
- pyopencl/bitonic_sort.py +5 -4
- pyopencl/cache.py +22 -22
- pyopencl/capture_call.py +4 -3
- pyopencl/characterize/__init__.py +4 -2
- pyopencl/characterize/performance.py +2 -1
- pyopencl/clmath.py +2 -1
- pyopencl/clrandom.py +5 -369
- pyopencl/cltypes.py +4 -1
- pyopencl/compyte/dtypes.py +1 -1
- pyopencl/compyte/ndarray/gen_elemwise.py +6 -5
- pyopencl/compyte/ndarray/gen_reduction.py +6 -6
- pyopencl/compyte/ndarray/setup_opencl.py +3 -2
- pyopencl/compyte/ndarray/test_gpu_elemwise.py +5 -4
- pyopencl/compyte/ndarray/test_gpu_ndarray.py +0 -1
- pyopencl/elementwise.py +4 -6
- pyopencl/invoker.py +15 -9
- pyopencl/ipython_ext.py +1 -1
- pyopencl/reduction.py +5 -5
- pyopencl/scan.py +17 -21
- pyopencl/tools.py +13 -16
- pyopencl/version.py +1 -1
- pyopencl-2024.2.1.data/data/CITATION.cff +74 -0
- pyopencl-2024.2.1.data/data/CMakeLists.txt +83 -0
- {pyopencl-2024.1.dist-info → pyopencl-2024.2.1.data/data}/LICENSE +0 -23
- pyopencl-2024.2.1.data/data/Makefile.in +21 -0
- pyopencl-2024.2.1.data/data/README.rst +70 -0
- pyopencl-2024.2.1.data/data/README_SETUP.txt +34 -0
- pyopencl-2024.2.1.data/data/aksetup_helper.py +1013 -0
- pyopencl-2024.2.1.data/data/configure.py +6 -0
- pyopencl-2024.2.1.data/data/contrib/cldis.py +91 -0
- pyopencl-2024.2.1.data/data/contrib/fortran-to-opencl/README +29 -0
- pyopencl-2024.2.1.data/data/contrib/fortran-to-opencl/translate.py +1441 -0
- pyopencl-2024.2.1.data/data/contrib/pyopencl.vim +84 -0
- pyopencl-2024.2.1.data/data/doc/Makefile +23 -0
- pyopencl-2024.2.1.data/data/doc/algorithm.rst +214 -0
- pyopencl-2024.2.1.data/data/doc/array.rst +305 -0
- pyopencl-2024.2.1.data/data/doc/conf.py +26 -0
- pyopencl-2024.2.1.data/data/doc/howto.rst +105 -0
- pyopencl-2024.2.1.data/data/doc/index.rst +137 -0
- pyopencl-2024.2.1.data/data/doc/make_constants.py +561 -0
- pyopencl-2024.2.1.data/data/doc/misc.rst +885 -0
- pyopencl-2024.2.1.data/data/doc/runtime.rst +51 -0
- pyopencl-2024.2.1.data/data/doc/runtime_const.rst +30 -0
- pyopencl-2024.2.1.data/data/doc/runtime_gl.rst +78 -0
- pyopencl-2024.2.1.data/data/doc/runtime_memory.rst +527 -0
- pyopencl-2024.2.1.data/data/doc/runtime_platform.rst +184 -0
- pyopencl-2024.2.1.data/data/doc/runtime_program.rst +364 -0
- pyopencl-2024.2.1.data/data/doc/runtime_queue.rst +182 -0
- pyopencl-2024.2.1.data/data/doc/subst.rst +36 -0
- pyopencl-2024.2.1.data/data/doc/tools.rst +4 -0
- pyopencl-2024.2.1.data/data/doc/types.rst +42 -0
- pyopencl-2024.2.1.data/data/examples/black-hole-accretion.py +2227 -0
- pyopencl-2024.2.1.data/data/examples/demo-struct-reduce.py +75 -0
- pyopencl-2024.2.1.data/data/examples/demo.py +39 -0
- pyopencl-2024.2.1.data/data/examples/demo_array.py +32 -0
- pyopencl-2024.2.1.data/data/examples/demo_array_svm.py +37 -0
- pyopencl-2024.2.1.data/data/examples/demo_elementwise.py +34 -0
- pyopencl-2024.2.1.data/data/examples/demo_elementwise_complex.py +53 -0
- pyopencl-2024.2.1.data/data/examples/demo_mandelbrot.py +183 -0
- pyopencl-2024.2.1.data/data/examples/demo_meta_codepy.py +56 -0
- pyopencl-2024.2.1.data/data/examples/demo_meta_template.py +55 -0
- pyopencl-2024.2.1.data/data/examples/dump-performance.py +38 -0
- pyopencl-2024.2.1.data/data/examples/dump-properties.py +86 -0
- pyopencl-2024.2.1.data/data/examples/gl_interop_demo.py +84 -0
- pyopencl-2024.2.1.data/data/examples/gl_particle_animation.py +218 -0
- pyopencl-2024.2.1.data/data/examples/ipython-demo.ipynb +203 -0
- pyopencl-2024.2.1.data/data/examples/median-filter.py +99 -0
- pyopencl-2024.2.1.data/data/examples/n-body.py +1070 -0
- pyopencl-2024.2.1.data/data/examples/narray.py +37 -0
- pyopencl-2024.2.1.data/data/examples/noisyImage.jpg +0 -0
- pyopencl-2024.2.1.data/data/examples/pi-monte-carlo.py +1166 -0
- pyopencl-2024.2.1.data/data/examples/svm.py +82 -0
- pyopencl-2024.2.1.data/data/examples/transpose.py +229 -0
- pyopencl-2024.2.1.data/data/pytest.ini +3 -0
- pyopencl-2024.2.1.data/data/src/bitlog.cpp +51 -0
- pyopencl-2024.2.1.data/data/src/bitlog.hpp +83 -0
- pyopencl-2024.2.1.data/data/src/clinfo_ext.h +134 -0
- pyopencl-2024.2.1.data/data/src/mempool.hpp +444 -0
- pyopencl-2024.2.1.data/data/src/pyopencl_ext.h +77 -0
- pyopencl-2024.2.1.data/data/src/tools.hpp +90 -0
- pyopencl-2024.2.1.data/data/src/wrap_cl.cpp +61 -0
- pyopencl-2024.2.1.data/data/src/wrap_cl.hpp +5853 -0
- pyopencl-2024.2.1.data/data/src/wrap_cl_part_1.cpp +369 -0
- pyopencl-2024.2.1.data/data/src/wrap_cl_part_2.cpp +702 -0
- pyopencl-2024.2.1.data/data/src/wrap_constants.cpp +1274 -0
- pyopencl-2024.2.1.data/data/src/wrap_helpers.hpp +213 -0
- pyopencl-2024.2.1.data/data/src/wrap_mempool.cpp +731 -0
- pyopencl-2024.2.1.data/data/test/add-vectors-32.spv +0 -0
- pyopencl-2024.2.1.data/data/test/add-vectors-64.spv +0 -0
- pyopencl-2024.2.1.data/data/test/empty-header.h +1 -0
- pyopencl-2024.2.1.data/data/test/test_algorithm.py +1180 -0
- pyopencl-2024.2.1.data/data/test/test_array.py +2392 -0
- pyopencl-2024.2.1.data/data/test/test_arrays_in_structs.py +100 -0
- pyopencl-2024.2.1.data/data/test/test_clmath.py +529 -0
- pyopencl-2024.2.1.data/data/test/test_clrandom.py +75 -0
- pyopencl-2024.2.1.data/data/test/test_enqueue_copy.py +271 -0
- pyopencl-2024.2.1.data/data/test/test_wrapper.py +1554 -0
- pyopencl-2024.2.1.dist-info/LICENSE +282 -0
- {pyopencl-2024.1.dist-info → pyopencl-2024.2.1.dist-info}/METADATA +12 -12
- pyopencl-2024.2.1.dist-info/RECORD +123 -0
- {pyopencl-2024.1.dist-info → pyopencl-2024.2.1.dist-info}/WHEEL +1 -1
- pyopencl/cl/pyopencl-ranluxcl.cl +0 -957
- pyopencl-2024.1.dist-info/RECORD +0 -48
- {pyopencl-2024.1.dist-info → pyopencl-2024.2.1.dist-info}/top_level.txt +0 -0
pyopencl/clrandom.py
CHANGED
|
@@ -24,7 +24,7 @@ THE SOFTWARE.
|
|
|
24
24
|
# {{{ documentation
|
|
25
25
|
|
|
26
26
|
__doc__ = """
|
|
27
|
-
PyOpenCL
|
|
27
|
+
PyOpenCL includes and uses some of the `Random123 random number generators
|
|
28
28
|
<https://www.deshawresearch.com/resources.html>`__ by D.E. Shaw
|
|
29
29
|
Research. In addition to being usable through the convenience functions above,
|
|
30
30
|
they are available in any piece of code compiled through PyOpenCL by::
|
|
@@ -38,15 +38,6 @@ and the `Threefry source
|
|
|
38
38
|
<https://github.com/inducer/pyopencl/blob/main/pyopencl/cl/pyopencl-random123/threefry.cl>`__
|
|
39
39
|
for some documentation if you're planning on using Random123 directly.
|
|
40
40
|
|
|
41
|
-
.. note::
|
|
42
|
-
|
|
43
|
-
PyOpenCL previously had documented support for the RANLUXCL random number
|
|
44
|
-
generator (``https://bitbucket.org/ivarun/ranluxcl``) by Ivar Ursin
|
|
45
|
-
Nikolaisen. This support is now deprecated because of the general slowness
|
|
46
|
-
of these generators and will be removed from PyOpenCL in the 2018.x series.
|
|
47
|
-
All users are encouraged to switch to one of the Random123 generators,
|
|
48
|
-
:class:`PhiloxGenerator` or :class:`ThreefryGenerator`.
|
|
49
|
-
|
|
50
41
|
.. autoclass:: PhiloxGenerator
|
|
51
42
|
|
|
52
43
|
.. autoclass:: ThreefryGenerator
|
|
@@ -58,363 +49,13 @@ for some documentation if you're planning on using Random123 directly.
|
|
|
58
49
|
|
|
59
50
|
# }}}
|
|
60
51
|
|
|
52
|
+
import numpy as np
|
|
53
|
+
from pytools import memoize_method
|
|
54
|
+
|
|
61
55
|
import pyopencl as cl
|
|
62
56
|
import pyopencl.array as cl_array
|
|
63
57
|
import pyopencl.cltypes as cltypes
|
|
64
58
|
from pyopencl.tools import first_arg_dependent_memoize
|
|
65
|
-
from pytools import memoize_method
|
|
66
|
-
|
|
67
|
-
import numpy as np
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
# {{{ RanluxGenerator (deprecated)
|
|
71
|
-
|
|
72
|
-
class RanluxGenerator:
|
|
73
|
-
"""
|
|
74
|
-
.. warning::
|
|
75
|
-
|
|
76
|
-
This class is deprecated, to be removed in PyOpenCL 2018.x.
|
|
77
|
-
|
|
78
|
-
.. versionadded:: 2011.2
|
|
79
|
-
|
|
80
|
-
.. attribute:: state
|
|
81
|
-
|
|
82
|
-
A :class:`pyopencl.array.Array` containing the state of the generator.
|
|
83
|
-
|
|
84
|
-
.. attribute:: nskip
|
|
85
|
-
|
|
86
|
-
nskip is an integer which can (optionally) be defined in the kernel
|
|
87
|
-
code as RANLUXCL_NSKIP. If this is done the generator will be faster
|
|
88
|
-
for luxury setting 0 and 1, or when the p-value is manually set to a
|
|
89
|
-
multiple of 24.
|
|
90
|
-
"""
|
|
91
|
-
|
|
92
|
-
def __init__(self, queue, num_work_items=None,
|
|
93
|
-
luxury=None, seed=None, no_warmup=False,
|
|
94
|
-
use_legacy_init=False, max_work_items=None):
|
|
95
|
-
"""
|
|
96
|
-
:param queue: :class:`pyopencl.CommandQueue`, only used for initialization
|
|
97
|
-
:param luxury: the "luxury value" of the generator, and should be 0-4,
|
|
98
|
-
where 0 is fastest and 4 produces the best numbers. It can also be
|
|
99
|
-
>=24, in which case it directly sets the p-value of RANLUXCL.
|
|
100
|
-
:param num_work_items: is the number of generators to initialize,
|
|
101
|
-
usually corresponding to the number of work-items in the NDRange
|
|
102
|
-
RANLUXCL will be used with. May be *None*, in which case a default
|
|
103
|
-
value is used.
|
|
104
|
-
:param max_work_items: should reflect the maximum number of work-items
|
|
105
|
-
that will be used on any parallel instance of RANLUXCL. So for
|
|
106
|
-
instance if we are launching 5120 work-items on GPU1 and 10240
|
|
107
|
-
work-items on GPU2, GPU1's RANLUXCLTab would be generated by
|
|
108
|
-
calling ranluxcl_intialization with numWorkitems = 5120 while
|
|
109
|
-
GPU2's RANLUXCLTab would use numWorkitems = 10240. However
|
|
110
|
-
maxWorkitems must be at least 10240 for both GPU1 and GPU2, and it
|
|
111
|
-
must be set to the same value for both. (may be *None*)
|
|
112
|
-
|
|
113
|
-
.. versionchanged:: 2013.1
|
|
114
|
-
|
|
115
|
-
Added default value for ``num_work_items``.
|
|
116
|
-
"""
|
|
117
|
-
|
|
118
|
-
from warnings import warn
|
|
119
|
-
warn("Ranlux random number generation is deprecated and will go away "
|
|
120
|
-
"in 2022.", DeprecationWarning, stacklevel=2)
|
|
121
|
-
|
|
122
|
-
if luxury is None:
|
|
123
|
-
luxury = 4
|
|
124
|
-
|
|
125
|
-
if num_work_items is None:
|
|
126
|
-
if queue.device.type & cl.device_type.CPU:
|
|
127
|
-
num_work_items = 8 * queue.device.max_compute_units
|
|
128
|
-
else:
|
|
129
|
-
num_work_items = 64 * queue.device.max_compute_units
|
|
130
|
-
|
|
131
|
-
if seed is None:
|
|
132
|
-
from time import time
|
|
133
|
-
seed = int(time()*1e6) % 2 << 30
|
|
134
|
-
|
|
135
|
-
self.context = queue.context
|
|
136
|
-
self.luxury = luxury
|
|
137
|
-
self.num_work_items = num_work_items
|
|
138
|
-
|
|
139
|
-
from pyopencl.characterize import has_double_support
|
|
140
|
-
self.support_double = has_double_support(queue.device)
|
|
141
|
-
|
|
142
|
-
self.no_warmup = no_warmup
|
|
143
|
-
self.use_legacy_init = use_legacy_init
|
|
144
|
-
self.max_work_items = max_work_items
|
|
145
|
-
|
|
146
|
-
src = """
|
|
147
|
-
%(defines)s
|
|
148
|
-
|
|
149
|
-
#include <pyopencl-ranluxcl.cl>
|
|
150
|
-
|
|
151
|
-
kernel void init_ranlux(unsigned seeds,
|
|
152
|
-
global ranluxcl_state_t *ranluxcltab)
|
|
153
|
-
{
|
|
154
|
-
if (get_global_id(0) < %(num_work_items)d)
|
|
155
|
-
ranluxcl_initialization(seeds, ranluxcltab);
|
|
156
|
-
}
|
|
157
|
-
""" % {
|
|
158
|
-
"defines": self.generate_settings_defines(),
|
|
159
|
-
"num_work_items": num_work_items
|
|
160
|
-
}
|
|
161
|
-
prg = cl.Program(queue.context, src).build()
|
|
162
|
-
|
|
163
|
-
# {{{ compute work group size
|
|
164
|
-
|
|
165
|
-
wg_size = None
|
|
166
|
-
|
|
167
|
-
import sys
|
|
168
|
-
import platform
|
|
169
|
-
if ("darwin" in sys.platform
|
|
170
|
-
and "Apple" in queue.device.platform.vendor
|
|
171
|
-
and platform.mac_ver()[0].startswith("10.7")
|
|
172
|
-
and queue.device.type & cl.device_type.CPU):
|
|
173
|
-
wg_size = (1,)
|
|
174
|
-
|
|
175
|
-
self.wg_size = wg_size
|
|
176
|
-
|
|
177
|
-
# }}}
|
|
178
|
-
|
|
179
|
-
self.state = cl_array.empty(queue, (num_work_items, 112), dtype=np.uint8)
|
|
180
|
-
self.state.fill(17)
|
|
181
|
-
|
|
182
|
-
prg.init_ranlux(queue, (num_work_items,), self.wg_size, np.uint32(seed),
|
|
183
|
-
self.state.data)
|
|
184
|
-
|
|
185
|
-
def generate_settings_defines(self, include_double_pragma=True):
|
|
186
|
-
lines = []
|
|
187
|
-
if include_double_pragma and self.support_double:
|
|
188
|
-
lines.append("""
|
|
189
|
-
#if __OPENCL_C_VERSION__ < 120
|
|
190
|
-
#pragma OPENCL EXTENSION cl_khr_fp64: enable
|
|
191
|
-
#endif
|
|
192
|
-
""")
|
|
193
|
-
|
|
194
|
-
lines.append("#define RANLUXCL_LUX %d" % self.luxury)
|
|
195
|
-
|
|
196
|
-
if self.no_warmup:
|
|
197
|
-
lines.append("#define RANLUXCL_NO_WARMUP")
|
|
198
|
-
|
|
199
|
-
if self.support_double:
|
|
200
|
-
lines.append("#define RANLUXCL_SUPPORT_DOUBLE")
|
|
201
|
-
|
|
202
|
-
if self.use_legacy_init:
|
|
203
|
-
lines.append("#define RANLUXCL_USE_LEGACY_INITIALIZATION")
|
|
204
|
-
|
|
205
|
-
if self.max_work_items:
|
|
206
|
-
lines.append(
|
|
207
|
-
"#define RANLUXCL_MAXWORKITEMS %d" % self.max_work_items)
|
|
208
|
-
|
|
209
|
-
return "\n".join(lines)
|
|
210
|
-
|
|
211
|
-
@memoize_method
|
|
212
|
-
def get_gen_kernel(self, dtype, distribution="uniform"):
|
|
213
|
-
size_multiplier = 1
|
|
214
|
-
arg_dtype = dtype
|
|
215
|
-
|
|
216
|
-
if dtype == np.float64:
|
|
217
|
-
bits = 64
|
|
218
|
-
c_type = "double"
|
|
219
|
-
rng_expr = "(shift + scale * gen)"
|
|
220
|
-
elif dtype == np.float32:
|
|
221
|
-
bits = 32
|
|
222
|
-
c_type = "float"
|
|
223
|
-
rng_expr = "(shift + scale * gen)"
|
|
224
|
-
elif dtype == cltypes.float2:
|
|
225
|
-
bits = 32
|
|
226
|
-
c_type = "float"
|
|
227
|
-
rng_expr = "(shift + scale * gen)"
|
|
228
|
-
size_multiplier = 2
|
|
229
|
-
arg_dtype = np.float32
|
|
230
|
-
elif dtype in [cltypes.float3, cltypes.float4]:
|
|
231
|
-
bits = 32
|
|
232
|
-
c_type = "float"
|
|
233
|
-
rng_expr = "(shift + scale * gen)"
|
|
234
|
-
size_multiplier = 4
|
|
235
|
-
arg_dtype = np.float32
|
|
236
|
-
elif dtype == np.int32:
|
|
237
|
-
assert distribution == "uniform"
|
|
238
|
-
bits = 32
|
|
239
|
-
c_type = "int"
|
|
240
|
-
rng_expr = ("(shift "
|
|
241
|
-
"+ convert_int4((float) scale * gen) "
|
|
242
|
-
"+ convert_int4(((float) scale / (1<<24)) * gen))")
|
|
243
|
-
|
|
244
|
-
elif dtype == np.int64:
|
|
245
|
-
assert distribution == "uniform"
|
|
246
|
-
if self.support_double:
|
|
247
|
-
bits = 64
|
|
248
|
-
else:
|
|
249
|
-
bits = 32
|
|
250
|
-
c_type = "long"
|
|
251
|
-
rng_expr = ("(shift "
|
|
252
|
-
"+ convert_long4((float) scale * gen) "
|
|
253
|
-
"+ convert_long4(((float) scale / (1l<<24)) * gen)"
|
|
254
|
-
"+ convert_long4(((float) scale / (1l<<48)) * gen)"
|
|
255
|
-
")")
|
|
256
|
-
|
|
257
|
-
else:
|
|
258
|
-
raise TypeError("unsupported RNG data type '%s'" % dtype)
|
|
259
|
-
|
|
260
|
-
rl_flavor = "%d%s" % (bits, {
|
|
261
|
-
"uniform": "",
|
|
262
|
-
"normal": "norm"
|
|
263
|
-
}[distribution])
|
|
264
|
-
|
|
265
|
-
src = """//CL//
|
|
266
|
-
%(defines)s
|
|
267
|
-
|
|
268
|
-
#include <pyopencl-ranluxcl.cl>
|
|
269
|
-
|
|
270
|
-
typedef %(output_t)s output_t;
|
|
271
|
-
typedef %(output_t)s4 output_vec_t;
|
|
272
|
-
#define NUM_WORKITEMS %(num_work_items)d
|
|
273
|
-
#define RANLUX_FUNC ranluxcl%(rlflavor)s
|
|
274
|
-
#define GET_RANDOM_NUM(gen) %(rng_expr)s
|
|
275
|
-
|
|
276
|
-
kernel void generate(
|
|
277
|
-
global ranluxcl_state_t *ranluxcltab,
|
|
278
|
-
global output_t *output,
|
|
279
|
-
unsigned long out_size,
|
|
280
|
-
output_t scale,
|
|
281
|
-
output_t shift)
|
|
282
|
-
{
|
|
283
|
-
|
|
284
|
-
ranluxcl_state_t ranluxclstate;
|
|
285
|
-
ranluxcl_download_seed(&ranluxclstate, ranluxcltab);
|
|
286
|
-
|
|
287
|
-
// output bulk
|
|
288
|
-
unsigned long idx = get_global_id(0)*4;
|
|
289
|
-
while (idx + 4 < out_size)
|
|
290
|
-
{
|
|
291
|
-
output_vec_t ran = GET_RANDOM_NUM(RANLUX_FUNC(&ranluxclstate));
|
|
292
|
-
vstore4(ran, 0, &output[idx]);
|
|
293
|
-
idx += 4*NUM_WORKITEMS;
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
// output tail
|
|
297
|
-
output_vec_t tail_ran = GET_RANDOM_NUM(RANLUX_FUNC(&ranluxclstate));
|
|
298
|
-
if (idx < out_size)
|
|
299
|
-
output[idx] = tail_ran.x;
|
|
300
|
-
if (idx+1 < out_size)
|
|
301
|
-
output[idx+1] = tail_ran.y;
|
|
302
|
-
if (idx+2 < out_size)
|
|
303
|
-
output[idx+2] = tail_ran.z;
|
|
304
|
-
if (idx+3 < out_size)
|
|
305
|
-
output[idx+3] = tail_ran.w;
|
|
306
|
-
|
|
307
|
-
ranluxcl_upload_seed(&ranluxclstate, ranluxcltab);
|
|
308
|
-
}
|
|
309
|
-
""" % {
|
|
310
|
-
"defines": self.generate_settings_defines(),
|
|
311
|
-
"rlflavor": rl_flavor,
|
|
312
|
-
"output_t": c_type,
|
|
313
|
-
"num_work_items": self.num_work_items,
|
|
314
|
-
"rng_expr": rng_expr
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
prg = cl.Program(self.context, src).build()
|
|
318
|
-
knl = prg.generate
|
|
319
|
-
knl.set_scalar_arg_dtypes([None, None, np.uint64, arg_dtype, arg_dtype])
|
|
320
|
-
|
|
321
|
-
return knl, size_multiplier
|
|
322
|
-
|
|
323
|
-
def fill_uniform(self, ary, a=0, b=1, queue=None):
|
|
324
|
-
"""Fill *ary* with uniformly distributed random numbers in the interval
|
|
325
|
-
*(a, b)*, endpoints excluded.
|
|
326
|
-
|
|
327
|
-
:return: a :class:`pyopencl.Event`
|
|
328
|
-
|
|
329
|
-
.. versionchanged:: 2014.1.1
|
|
330
|
-
|
|
331
|
-
Added return value.
|
|
332
|
-
"""
|
|
333
|
-
|
|
334
|
-
if queue is None:
|
|
335
|
-
queue = ary.queue
|
|
336
|
-
|
|
337
|
-
knl, size_multiplier = self.get_gen_kernel(ary.dtype, "uniform")
|
|
338
|
-
evt = knl(queue,
|
|
339
|
-
(self.num_work_items,), None,
|
|
340
|
-
self.state.data, ary.data, ary.size*size_multiplier,
|
|
341
|
-
b-a, a, wait_for=ary.events)
|
|
342
|
-
ary.add_event(evt)
|
|
343
|
-
self.state.add_event(evt)
|
|
344
|
-
return ary
|
|
345
|
-
|
|
346
|
-
def uniform(self, *args, **kwargs):
|
|
347
|
-
"""Make a new empty array, apply :meth:`fill_uniform` to it.
|
|
348
|
-
"""
|
|
349
|
-
a = kwargs.pop("a", 0)
|
|
350
|
-
b = kwargs.pop("b", 1)
|
|
351
|
-
|
|
352
|
-
result = cl_array.empty(*args, **kwargs)
|
|
353
|
-
self.fill_uniform(result, queue=result.queue, a=a, b=b)
|
|
354
|
-
return result
|
|
355
|
-
|
|
356
|
-
def fill_normal(self, ary, mu=0, sigma=1, queue=None):
|
|
357
|
-
"""Fill *ary* with normally distributed numbers with mean *mu* and
|
|
358
|
-
standard deviation *sigma*.
|
|
359
|
-
|
|
360
|
-
.. versionchanged:: 2014.1.1
|
|
361
|
-
|
|
362
|
-
Added return value.
|
|
363
|
-
"""
|
|
364
|
-
|
|
365
|
-
if queue is None:
|
|
366
|
-
queue = ary.queue
|
|
367
|
-
|
|
368
|
-
knl, size_multiplier = self.get_gen_kernel(ary.dtype, "normal")
|
|
369
|
-
evt = knl(queue,
|
|
370
|
-
(self.num_work_items,), self.wg_size,
|
|
371
|
-
self.state.data, ary.data, ary.size*size_multiplier, sigma, mu,
|
|
372
|
-
wait_for=ary.events)
|
|
373
|
-
ary.add_event(evt)
|
|
374
|
-
self.state.add_event(evt)
|
|
375
|
-
return evt
|
|
376
|
-
|
|
377
|
-
def normal(self, *args, **kwargs):
|
|
378
|
-
"""Make a new empty array, apply :meth:`fill_normal` to it.
|
|
379
|
-
"""
|
|
380
|
-
mu = kwargs.pop("mu", 0)
|
|
381
|
-
sigma = kwargs.pop("sigma", 1)
|
|
382
|
-
|
|
383
|
-
result = cl_array.empty(*args, **kwargs)
|
|
384
|
-
self.fill_normal(result, queue=result.queue, mu=mu, sigma=sigma)
|
|
385
|
-
return result
|
|
386
|
-
|
|
387
|
-
@memoize_method
|
|
388
|
-
def get_sync_kernel(self):
|
|
389
|
-
src = """//CL//
|
|
390
|
-
{defines}
|
|
391
|
-
|
|
392
|
-
#include <pyopencl-ranluxcl.cl>
|
|
393
|
-
|
|
394
|
-
kernel void sync(
|
|
395
|
-
global ranluxcl_state_t *ranluxcltab)
|
|
396
|
-
{{
|
|
397
|
-
ranluxcl_state_t ranluxclstate;
|
|
398
|
-
ranluxcl_download_seed(&ranluxclstate, ranluxcltab);
|
|
399
|
-
ranluxcl_synchronize(&ranluxclstate);
|
|
400
|
-
ranluxcl_upload_seed(&ranluxclstate, ranluxcltab);
|
|
401
|
-
}}
|
|
402
|
-
""".format(
|
|
403
|
-
defines=self.generate_settings_defines(),
|
|
404
|
-
)
|
|
405
|
-
prg = cl.Program(self.context, src).build()
|
|
406
|
-
return prg.sync
|
|
407
|
-
|
|
408
|
-
def synchronize(self, queue):
|
|
409
|
-
"""The generator gets inefficient when different work items invoke the
|
|
410
|
-
generator a differing number of times. This function ensures
|
|
411
|
-
efficiency.
|
|
412
|
-
"""
|
|
413
|
-
|
|
414
|
-
self.get_sync_kernel()(queue, (self.num_work_items,),
|
|
415
|
-
self.wg_size, self.state.data)
|
|
416
|
-
|
|
417
|
-
# }}}
|
|
418
59
|
|
|
419
60
|
|
|
420
61
|
# {{{ Random123 generators
|
|
@@ -738,14 +379,9 @@ def _get_generator(context):
|
|
|
738
379
|
return gen
|
|
739
380
|
|
|
740
381
|
|
|
741
|
-
def fill_rand(result, queue=None,
|
|
382
|
+
def fill_rand(result, queue=None, a=0, b=1):
|
|
742
383
|
"""Fill *result* with random values in the range :math:`[0, 1)`.
|
|
743
384
|
"""
|
|
744
|
-
if luxury is not None:
|
|
745
|
-
from warnings import warn
|
|
746
|
-
warn("Specifying the 'luxury' argument is deprecated and will stop being "
|
|
747
|
-
"supported in PyOpenCL 2018.x", stacklevel=2)
|
|
748
|
-
|
|
749
385
|
if queue is None:
|
|
750
386
|
queue = result.queue
|
|
751
387
|
gen = _get_generator(queue.context)
|
pyopencl/cltypes.py
CHANGED
|
@@ -18,9 +18,12 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
18
18
|
THE SOFTWARE.
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
+
import warnings
|
|
22
|
+
|
|
21
23
|
import numpy as np
|
|
24
|
+
|
|
22
25
|
from pyopencl.tools import get_or_register_dtype
|
|
23
|
-
|
|
26
|
+
|
|
24
27
|
|
|
25
28
|
if __file__.endswith("array.py"):
|
|
26
29
|
warnings.warn(
|
pyopencl/compyte/dtypes.py
CHANGED
|
@@ -111,8 +111,8 @@ class DTypeRegistry:
|
|
|
111
111
|
# {{{ C types
|
|
112
112
|
|
|
113
113
|
def fill_registry_with_c_types(reg, respect_windows, include_bool=True):
|
|
114
|
-
from sys import platform
|
|
115
114
|
import struct
|
|
115
|
+
from sys import platform
|
|
116
116
|
|
|
117
117
|
if include_bool:
|
|
118
118
|
# bool is of unspecified size in the OpenCL spec and may in fact be
|
|
@@ -7,9 +7,9 @@ that ndim is 0 as with all scalar type.
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
import numpy
|
|
10
|
+
import pygpu_ndarray as gpu_ndarray
|
|
10
11
|
import StringIO
|
|
11
12
|
|
|
12
|
-
import pygpu_ndarray as gpu_ndarray
|
|
13
13
|
_CL_MODE = hasattr(gpu_ndarray, "set_opencl_context")
|
|
14
14
|
|
|
15
15
|
|
|
@@ -20,6 +20,7 @@ if _CL_MODE:
|
|
|
20
20
|
from pyopencl.tools import dtype_to_ctype
|
|
21
21
|
# import pyopencl._mymako as mako
|
|
22
22
|
from pyopencl._cluda import CLUDA_PREAMBLE
|
|
23
|
+
|
|
23
24
|
# TODO: use mako to get rid of the %if
|
|
24
25
|
CLUDA_PREAMBLE = CLUDA_PREAMBLE[:455]
|
|
25
26
|
CLUDA_PREAMBLE += """
|
|
@@ -51,12 +52,12 @@ else:
|
|
|
51
52
|
#define GDIM_2 gridDim.z
|
|
52
53
|
"""
|
|
53
54
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
from theano.tensor import TensorType
|
|
55
|
+
import logging
|
|
56
|
+
|
|
57
57
|
import theano
|
|
58
|
+
from theano import Apply, scalar
|
|
59
|
+
from theano.tensor import TensorType
|
|
58
60
|
|
|
59
|
-
import logging
|
|
60
61
|
_logger_name = 'compyte.gen_elemwise'
|
|
61
62
|
_logger = logging.getLogger(_logger_name)
|
|
62
63
|
_logger.setLevel(logging.INFO)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import numpy
|
|
2
2
|
import StringIO
|
|
3
3
|
|
|
4
|
-
|
|
5
4
|
_CL_MODE = False # "pyopencl" in __name__
|
|
6
5
|
|
|
7
6
|
|
|
@@ -12,6 +11,7 @@ if _CL_MODE:
|
|
|
12
11
|
from pyopencl.tools import dtype_to_ctype
|
|
13
12
|
# import pyopencl._mymako as mako
|
|
14
13
|
from pyopencl._cluda import CLUDA_PREAMBLE
|
|
14
|
+
|
|
15
15
|
# TODO: use mako to get rid of the %if
|
|
16
16
|
CLUDA_PREAMBLE = CLUDA_PREAMBLE[:455]
|
|
17
17
|
CLUDA_PREAMBLE += """
|
|
@@ -43,13 +43,13 @@ else:
|
|
|
43
43
|
#define GDIM_2 gridDim.z
|
|
44
44
|
"""
|
|
45
45
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
from theano.tensor import TensorType
|
|
49
|
-
from theano.sandbox.cuda import CudaNdarrayType
|
|
46
|
+
import logging
|
|
47
|
+
|
|
50
48
|
import theano
|
|
49
|
+
from theano import Apply, scalar
|
|
50
|
+
from theano.sandbox.cuda import CudaNdarrayType
|
|
51
|
+
from theano.tensor import TensorType
|
|
51
52
|
|
|
52
|
-
import logging
|
|
53
53
|
_logger_name = 'compyte.gen_reduction'
|
|
54
54
|
_logger = logging.getLogger(_logger_name)
|
|
55
55
|
_logger.setLevel(logging.INFO)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
3
|
-
from distutils.core import setup, Extension
|
|
4
2
|
from distutils.command.build_ext import build_ext
|
|
3
|
+
from distutils.core import Extension, setup
|
|
5
4
|
from distutils.dep_util import newer
|
|
5
|
+
|
|
6
6
|
import numpy as np
|
|
7
7
|
|
|
8
8
|
|
|
@@ -82,6 +82,7 @@ class build_ext_nvcc(build_ext):
|
|
|
82
82
|
self.build_extension(ext)
|
|
83
83
|
|
|
84
84
|
import sys
|
|
85
|
+
|
|
85
86
|
if sys.platform == 'darwin':
|
|
86
87
|
libcl_args = {'extra_link_args': ['-framework', 'OpenCL']}
|
|
87
88
|
else:
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
# TODO: test other dtype
|
|
2
|
+
from functools import reduce
|
|
3
|
+
|
|
2
4
|
import numpy
|
|
5
|
+
import pygpu_ndarray as gpu_ndarray
|
|
3
6
|
import theano
|
|
4
7
|
|
|
5
|
-
import pygpu_ndarray as gpu_ndarray
|
|
6
8
|
from .gen_elemwise import MyGpuNdArray, elemwise_collapses
|
|
7
|
-
from .test_gpu_ndarray import (dtypes_all, enable_double,
|
|
8
|
-
|
|
9
|
-
from functools import reduce
|
|
9
|
+
from .test_gpu_ndarray import (dtypes_all, enable_double, gen_gpu_nd_array,
|
|
10
|
+
product)
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def rand(shape, dtype):
|
pyopencl/elementwise.py
CHANGED
|
@@ -31,14 +31,12 @@ import enum
|
|
|
31
31
|
from typing import Any, List, Optional, Tuple, Union
|
|
32
32
|
|
|
33
33
|
import numpy as np
|
|
34
|
+
from pytools import memoize_method
|
|
34
35
|
|
|
35
36
|
import pyopencl as cl
|
|
36
|
-
from pyopencl.tools import context_dependent_memoize
|
|
37
37
|
from pyopencl.tools import (
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
from pytools import memoize_method
|
|
38
|
+
DtypedArgument, KernelTemplateBase, ScalarArg, VectorArg,
|
|
39
|
+
context_dependent_memoize, dtype_to_c_struct, dtype_to_ctype)
|
|
42
40
|
|
|
43
41
|
|
|
44
42
|
# {{{ elementwise kernel code generator
|
|
@@ -121,7 +119,7 @@ def get_elwise_kernel_and_types(
|
|
|
121
119
|
use_range: bool = False,
|
|
122
120
|
**kwargs: Any) -> Tuple[cl.Kernel, List[DtypedArgument]]:
|
|
123
121
|
|
|
124
|
-
from pyopencl.tools import
|
|
122
|
+
from pyopencl.tools import get_arg_offset_adjuster_code, parse_arg_list
|
|
125
123
|
parsed_args = parse_arg_list(arguments, with_offset=True)
|
|
126
124
|
|
|
127
125
|
auto_preamble = kwargs.pop("auto_preamble", True)
|
pyopencl/invoker.py
CHANGED
|
@@ -22,14 +22,16 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
22
22
|
THE SOFTWARE.
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
-
import
|
|
26
|
-
|
|
25
|
+
from typing import Any, Tuple
|
|
27
26
|
from warnings import warn
|
|
28
|
-
|
|
27
|
+
|
|
28
|
+
import numpy as np
|
|
29
29
|
from pytools.persistent_dict import WriteOncePersistentDict
|
|
30
30
|
from pytools.py_codegen import Indentation, PythonCodeGenerator
|
|
31
|
-
|
|
31
|
+
|
|
32
32
|
import pyopencl as cl
|
|
33
|
+
import pyopencl._cl as _cl
|
|
34
|
+
from pyopencl.tools import VectorArg, _NumpyTypesKeyBuilder
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
# {{{ arg packing helpers
|
|
@@ -373,10 +375,13 @@ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
|
|
|
373
375
|
# }}}
|
|
374
376
|
|
|
375
377
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
378
|
+
if not cl._PYOPENCL_NO_CACHE:
|
|
379
|
+
from pytools.py_codegen import PicklableModule
|
|
380
|
+
invoker_cache: WriteOncePersistentDict[Any, Tuple[PicklableModule, str]] \
|
|
381
|
+
= WriteOncePersistentDict(
|
|
382
|
+
"pyopencl-invoker-cache-v42-nano",
|
|
383
|
+
key_builder=_NumpyTypesKeyBuilder(),
|
|
384
|
+
in_mem_cache_size=0)
|
|
380
385
|
|
|
381
386
|
|
|
382
387
|
def generate_enqueue_and_set_args(function_name,
|
|
@@ -400,7 +405,8 @@ def generate_enqueue_and_set_args(function_name,
|
|
|
400
405
|
|
|
401
406
|
if not from_cache:
|
|
402
407
|
pmod, enqueue_name = _generate_enqueue_and_set_args_module(*cache_key)
|
|
403
|
-
|
|
408
|
+
if not cl._PYOPENCL_NO_CACHE:
|
|
409
|
+
invoker_cache.store_if_not_present(cache_key, (pmod, enqueue_name))
|
|
404
410
|
|
|
405
411
|
return (
|
|
406
412
|
pmod.mod_globals[enqueue_name],
|
pyopencl/ipython_ext.py
CHANGED
pyopencl/reduction.py
CHANGED
|
@@ -35,9 +35,8 @@ import numpy as np
|
|
|
35
35
|
|
|
36
36
|
import pyopencl as cl
|
|
37
37
|
from pyopencl.tools import (
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
_process_code_for_macro)
|
|
38
|
+
DtypedArgument, KernelTemplateBase, _process_code_for_macro,
|
|
39
|
+
context_dependent_memoize, dtype_to_ctype)
|
|
41
40
|
|
|
42
41
|
|
|
43
42
|
# {{{ kernel source
|
|
@@ -177,6 +176,7 @@ def _get_reduction_source(
|
|
|
177
176
|
# }}}
|
|
178
177
|
|
|
179
178
|
from mako.template import Template
|
|
179
|
+
|
|
180
180
|
from pyopencl.characterize import has_double_support
|
|
181
181
|
|
|
182
182
|
arguments = ", ".join(arg.declarator() for arg in parsed_args)
|
|
@@ -219,8 +219,8 @@ def get_reduction_kernel(
|
|
|
219
219
|
map_expr = "pyopencl_reduction_inp[i]" if stage == 2 else "in[i]"
|
|
220
220
|
|
|
221
221
|
from pyopencl.tools import (
|
|
222
|
-
|
|
223
|
-
|
|
222
|
+
VectorArg, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code,
|
|
223
|
+
parse_arg_list)
|
|
224
224
|
|
|
225
225
|
if arguments is None:
|
|
226
226
|
raise ValueError("arguments must not be None")
|