pyopencl 2024.1__cp311-cp311-macosx_11_0_arm64.whl → 2024.2__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (107) hide show
  1. pyopencl/__init__.py +82 -80
  2. pyopencl/_cl.cpython-311-darwin.so +0 -0
  3. pyopencl/algorithm.py +8 -10
  4. pyopencl/array.py +16 -12
  5. pyopencl/bitonic_sort.py +5 -4
  6. pyopencl/cache.py +22 -22
  7. pyopencl/capture_call.py +4 -3
  8. pyopencl/characterize/__init__.py +4 -2
  9. pyopencl/characterize/performance.py +2 -1
  10. pyopencl/clmath.py +2 -1
  11. pyopencl/clrandom.py +5 -369
  12. pyopencl/cltypes.py +4 -1
  13. pyopencl/compyte/dtypes.py +1 -1
  14. pyopencl/compyte/ndarray/gen_elemwise.py +6 -5
  15. pyopencl/compyte/ndarray/gen_reduction.py +6 -6
  16. pyopencl/compyte/ndarray/setup_opencl.py +3 -2
  17. pyopencl/compyte/ndarray/test_gpu_elemwise.py +5 -4
  18. pyopencl/compyte/ndarray/test_gpu_ndarray.py +0 -1
  19. pyopencl/elementwise.py +4 -6
  20. pyopencl/invoker.py +15 -9
  21. pyopencl/ipython_ext.py +1 -1
  22. pyopencl/reduction.py +5 -5
  23. pyopencl/scan.py +17 -21
  24. pyopencl/tools.py +13 -16
  25. pyopencl/version.py +1 -1
  26. pyopencl-2024.2.data/data/CITATION.cff +74 -0
  27. {pyopencl-2024.1.dist-info → pyopencl-2024.2.data/data}/LICENSE +0 -23
  28. pyopencl-2024.2.data/data/Makefile.in +21 -0
  29. pyopencl-2024.2.data/data/README.rst +70 -0
  30. pyopencl-2024.2.data/data/README_SETUP.txt +34 -0
  31. pyopencl-2024.2.data/data/aksetup_helper.py +1013 -0
  32. pyopencl-2024.2.data/data/configure.py +6 -0
  33. pyopencl-2024.2.data/data/contrib/cldis.py +91 -0
  34. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/README +29 -0
  35. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/translate.py +1441 -0
  36. pyopencl-2024.2.data/data/contrib/pyopencl.vim +84 -0
  37. pyopencl-2024.2.data/data/doc/Makefile +23 -0
  38. pyopencl-2024.2.data/data/doc/algorithm.rst +214 -0
  39. pyopencl-2024.2.data/data/doc/array.rst +305 -0
  40. pyopencl-2024.2.data/data/doc/conf.py +26 -0
  41. pyopencl-2024.2.data/data/doc/howto.rst +105 -0
  42. pyopencl-2024.2.data/data/doc/index.rst +137 -0
  43. pyopencl-2024.2.data/data/doc/make_constants.py +561 -0
  44. pyopencl-2024.2.data/data/doc/misc.rst +885 -0
  45. pyopencl-2024.2.data/data/doc/runtime.rst +51 -0
  46. pyopencl-2024.2.data/data/doc/runtime_const.rst +30 -0
  47. pyopencl-2024.2.data/data/doc/runtime_gl.rst +78 -0
  48. pyopencl-2024.2.data/data/doc/runtime_memory.rst +527 -0
  49. pyopencl-2024.2.data/data/doc/runtime_platform.rst +184 -0
  50. pyopencl-2024.2.data/data/doc/runtime_program.rst +364 -0
  51. pyopencl-2024.2.data/data/doc/runtime_queue.rst +182 -0
  52. pyopencl-2024.2.data/data/doc/subst.rst +36 -0
  53. pyopencl-2024.2.data/data/doc/tools.rst +4 -0
  54. pyopencl-2024.2.data/data/doc/types.rst +42 -0
  55. pyopencl-2024.2.data/data/examples/black-hole-accretion.py +2227 -0
  56. pyopencl-2024.2.data/data/examples/demo-struct-reduce.py +75 -0
  57. pyopencl-2024.2.data/data/examples/demo.py +39 -0
  58. pyopencl-2024.2.data/data/examples/demo_array.py +32 -0
  59. pyopencl-2024.2.data/data/examples/demo_array_svm.py +37 -0
  60. pyopencl-2024.2.data/data/examples/demo_elementwise.py +34 -0
  61. pyopencl-2024.2.data/data/examples/demo_elementwise_complex.py +53 -0
  62. pyopencl-2024.2.data/data/examples/demo_mandelbrot.py +183 -0
  63. pyopencl-2024.2.data/data/examples/demo_meta_codepy.py +56 -0
  64. pyopencl-2024.2.data/data/examples/demo_meta_template.py +55 -0
  65. pyopencl-2024.2.data/data/examples/dump-performance.py +38 -0
  66. pyopencl-2024.2.data/data/examples/dump-properties.py +86 -0
  67. pyopencl-2024.2.data/data/examples/gl_interop_demo.py +84 -0
  68. pyopencl-2024.2.data/data/examples/gl_particle_animation.py +218 -0
  69. pyopencl-2024.2.data/data/examples/ipython-demo.ipynb +203 -0
  70. pyopencl-2024.2.data/data/examples/median-filter.py +99 -0
  71. pyopencl-2024.2.data/data/examples/n-body.py +1070 -0
  72. pyopencl-2024.2.data/data/examples/narray.py +37 -0
  73. pyopencl-2024.2.data/data/examples/noisyImage.jpg +0 -0
  74. pyopencl-2024.2.data/data/examples/pi-monte-carlo.py +1166 -0
  75. pyopencl-2024.2.data/data/examples/svm.py +82 -0
  76. pyopencl-2024.2.data/data/examples/transpose.py +229 -0
  77. pyopencl-2024.2.data/data/pytest.ini +3 -0
  78. pyopencl-2024.2.data/data/src/bitlog.cpp +51 -0
  79. pyopencl-2024.2.data/data/src/bitlog.hpp +83 -0
  80. pyopencl-2024.2.data/data/src/clinfo_ext.h +134 -0
  81. pyopencl-2024.2.data/data/src/mempool.hpp +444 -0
  82. pyopencl-2024.2.data/data/src/pyopencl_ext.h +77 -0
  83. pyopencl-2024.2.data/data/src/tools.hpp +90 -0
  84. pyopencl-2024.2.data/data/src/wrap_cl.cpp +61 -0
  85. pyopencl-2024.2.data/data/src/wrap_cl.hpp +5853 -0
  86. pyopencl-2024.2.data/data/src/wrap_cl_part_1.cpp +369 -0
  87. pyopencl-2024.2.data/data/src/wrap_cl_part_2.cpp +702 -0
  88. pyopencl-2024.2.data/data/src/wrap_constants.cpp +1274 -0
  89. pyopencl-2024.2.data/data/src/wrap_helpers.hpp +213 -0
  90. pyopencl-2024.2.data/data/src/wrap_mempool.cpp +731 -0
  91. pyopencl-2024.2.data/data/test/add-vectors-32.spv +0 -0
  92. pyopencl-2024.2.data/data/test/add-vectors-64.spv +0 -0
  93. pyopencl-2024.2.data/data/test/empty-header.h +1 -0
  94. pyopencl-2024.2.data/data/test/test_algorithm.py +1180 -0
  95. pyopencl-2024.2.data/data/test/test_array.py +2392 -0
  96. pyopencl-2024.2.data/data/test/test_arrays_in_structs.py +100 -0
  97. pyopencl-2024.2.data/data/test/test_clmath.py +529 -0
  98. pyopencl-2024.2.data/data/test/test_clrandom.py +75 -0
  99. pyopencl-2024.2.data/data/test/test_enqueue_copy.py +271 -0
  100. pyopencl-2024.2.data/data/test/test_wrapper.py +1554 -0
  101. pyopencl-2024.2.dist-info/LICENSE +282 -0
  102. {pyopencl-2024.1.dist-info → pyopencl-2024.2.dist-info}/METADATA +12 -12
  103. pyopencl-2024.2.dist-info/RECORD +122 -0
  104. {pyopencl-2024.1.dist-info → pyopencl-2024.2.dist-info}/WHEEL +1 -1
  105. pyopencl/cl/pyopencl-ranluxcl.cl +0 -957
  106. pyopencl-2024.1.dist-info/RECORD +0 -48
  107. {pyopencl-2024.1.dist-info → pyopencl-2024.2.dist-info}/top_level.txt +0 -0
pyopencl/clrandom.py CHANGED
@@ -24,7 +24,7 @@ THE SOFTWARE.
24
24
  # {{{ documentation
25
25
 
26
26
  __doc__ = """
27
- PyOpenCL now includes and uses some of the `Random123 random number generators
27
+ PyOpenCL includes and uses some of the `Random123 random number generators
28
28
  <https://www.deshawresearch.com/resources.html>`__ by D.E. Shaw
29
29
  Research. In addition to being usable through the convenience functions above,
30
30
  they are available in any piece of code compiled through PyOpenCL by::
@@ -38,15 +38,6 @@ and the `Threefry source
38
38
  <https://github.com/inducer/pyopencl/blob/main/pyopencl/cl/pyopencl-random123/threefry.cl>`__
39
39
  for some documentation if you're planning on using Random123 directly.
40
40
 
41
- .. note::
42
-
43
- PyOpenCL previously had documented support for the RANLUXCL random number
44
- generator (``https://bitbucket.org/ivarun/ranluxcl``) by Ivar Ursin
45
- Nikolaisen. This support is now deprecated because of the general slowness
46
- of these generators and will be removed from PyOpenCL in the 2018.x series.
47
- All users are encouraged to switch to one of the Random123 generators,
48
- :class:`PhiloxGenerator` or :class:`ThreefryGenerator`.
49
-
50
41
  .. autoclass:: PhiloxGenerator
51
42
 
52
43
  .. autoclass:: ThreefryGenerator
@@ -58,363 +49,13 @@ for some documentation if you're planning on using Random123 directly.
58
49
 
59
50
  # }}}
60
51
 
52
+ import numpy as np
53
+ from pytools import memoize_method
54
+
61
55
  import pyopencl as cl
62
56
  import pyopencl.array as cl_array
63
57
  import pyopencl.cltypes as cltypes
64
58
  from pyopencl.tools import first_arg_dependent_memoize
65
- from pytools import memoize_method
66
-
67
- import numpy as np
68
-
69
-
70
- # {{{ RanluxGenerator (deprecated)
71
-
72
- class RanluxGenerator:
73
- """
74
- .. warning::
75
-
76
- This class is deprecated, to be removed in PyOpenCL 2018.x.
77
-
78
- .. versionadded:: 2011.2
79
-
80
- .. attribute:: state
81
-
82
- A :class:`pyopencl.array.Array` containing the state of the generator.
83
-
84
- .. attribute:: nskip
85
-
86
- nskip is an integer which can (optionally) be defined in the kernel
87
- code as RANLUXCL_NSKIP. If this is done the generator will be faster
88
- for luxury setting 0 and 1, or when the p-value is manually set to a
89
- multiple of 24.
90
- """
91
-
92
- def __init__(self, queue, num_work_items=None,
93
- luxury=None, seed=None, no_warmup=False,
94
- use_legacy_init=False, max_work_items=None):
95
- """
96
- :param queue: :class:`pyopencl.CommandQueue`, only used for initialization
97
- :param luxury: the "luxury value" of the generator, and should be 0-4,
98
- where 0 is fastest and 4 produces the best numbers. It can also be
99
- >=24, in which case it directly sets the p-value of RANLUXCL.
100
- :param num_work_items: is the number of generators to initialize,
101
- usually corresponding to the number of work-items in the NDRange
102
- RANLUXCL will be used with. May be *None*, in which case a default
103
- value is used.
104
- :param max_work_items: should reflect the maximum number of work-items
105
- that will be used on any parallel instance of RANLUXCL. So for
106
- instance if we are launching 5120 work-items on GPU1 and 10240
107
- work-items on GPU2, GPU1's RANLUXCLTab would be generated by
108
- calling ranluxcl_intialization with numWorkitems = 5120 while
109
- GPU2's RANLUXCLTab would use numWorkitems = 10240. However
110
- maxWorkitems must be at least 10240 for both GPU1 and GPU2, and it
111
- must be set to the same value for both. (may be *None*)
112
-
113
- .. versionchanged:: 2013.1
114
-
115
- Added default value for ``num_work_items``.
116
- """
117
-
118
- from warnings import warn
119
- warn("Ranlux random number generation is deprecated and will go away "
120
- "in 2022.", DeprecationWarning, stacklevel=2)
121
-
122
- if luxury is None:
123
- luxury = 4
124
-
125
- if num_work_items is None:
126
- if queue.device.type & cl.device_type.CPU:
127
- num_work_items = 8 * queue.device.max_compute_units
128
- else:
129
- num_work_items = 64 * queue.device.max_compute_units
130
-
131
- if seed is None:
132
- from time import time
133
- seed = int(time()*1e6) % 2 << 30
134
-
135
- self.context = queue.context
136
- self.luxury = luxury
137
- self.num_work_items = num_work_items
138
-
139
- from pyopencl.characterize import has_double_support
140
- self.support_double = has_double_support(queue.device)
141
-
142
- self.no_warmup = no_warmup
143
- self.use_legacy_init = use_legacy_init
144
- self.max_work_items = max_work_items
145
-
146
- src = """
147
- %(defines)s
148
-
149
- #include <pyopencl-ranluxcl.cl>
150
-
151
- kernel void init_ranlux(unsigned seeds,
152
- global ranluxcl_state_t *ranluxcltab)
153
- {
154
- if (get_global_id(0) < %(num_work_items)d)
155
- ranluxcl_initialization(seeds, ranluxcltab);
156
- }
157
- """ % {
158
- "defines": self.generate_settings_defines(),
159
- "num_work_items": num_work_items
160
- }
161
- prg = cl.Program(queue.context, src).build()
162
-
163
- # {{{ compute work group size
164
-
165
- wg_size = None
166
-
167
- import sys
168
- import platform
169
- if ("darwin" in sys.platform
170
- and "Apple" in queue.device.platform.vendor
171
- and platform.mac_ver()[0].startswith("10.7")
172
- and queue.device.type & cl.device_type.CPU):
173
- wg_size = (1,)
174
-
175
- self.wg_size = wg_size
176
-
177
- # }}}
178
-
179
- self.state = cl_array.empty(queue, (num_work_items, 112), dtype=np.uint8)
180
- self.state.fill(17)
181
-
182
- prg.init_ranlux(queue, (num_work_items,), self.wg_size, np.uint32(seed),
183
- self.state.data)
184
-
185
- def generate_settings_defines(self, include_double_pragma=True):
186
- lines = []
187
- if include_double_pragma and self.support_double:
188
- lines.append("""
189
- #if __OPENCL_C_VERSION__ < 120
190
- #pragma OPENCL EXTENSION cl_khr_fp64: enable
191
- #endif
192
- """)
193
-
194
- lines.append("#define RANLUXCL_LUX %d" % self.luxury)
195
-
196
- if self.no_warmup:
197
- lines.append("#define RANLUXCL_NO_WARMUP")
198
-
199
- if self.support_double:
200
- lines.append("#define RANLUXCL_SUPPORT_DOUBLE")
201
-
202
- if self.use_legacy_init:
203
- lines.append("#define RANLUXCL_USE_LEGACY_INITIALIZATION")
204
-
205
- if self.max_work_items:
206
- lines.append(
207
- "#define RANLUXCL_MAXWORKITEMS %d" % self.max_work_items)
208
-
209
- return "\n".join(lines)
210
-
211
- @memoize_method
212
- def get_gen_kernel(self, dtype, distribution="uniform"):
213
- size_multiplier = 1
214
- arg_dtype = dtype
215
-
216
- if dtype == np.float64:
217
- bits = 64
218
- c_type = "double"
219
- rng_expr = "(shift + scale * gen)"
220
- elif dtype == np.float32:
221
- bits = 32
222
- c_type = "float"
223
- rng_expr = "(shift + scale * gen)"
224
- elif dtype == cltypes.float2:
225
- bits = 32
226
- c_type = "float"
227
- rng_expr = "(shift + scale * gen)"
228
- size_multiplier = 2
229
- arg_dtype = np.float32
230
- elif dtype in [cltypes.float3, cltypes.float4]:
231
- bits = 32
232
- c_type = "float"
233
- rng_expr = "(shift + scale * gen)"
234
- size_multiplier = 4
235
- arg_dtype = np.float32
236
- elif dtype == np.int32:
237
- assert distribution == "uniform"
238
- bits = 32
239
- c_type = "int"
240
- rng_expr = ("(shift "
241
- "+ convert_int4((float) scale * gen) "
242
- "+ convert_int4(((float) scale / (1<<24)) * gen))")
243
-
244
- elif dtype == np.int64:
245
- assert distribution == "uniform"
246
- if self.support_double:
247
- bits = 64
248
- else:
249
- bits = 32
250
- c_type = "long"
251
- rng_expr = ("(shift "
252
- "+ convert_long4((float) scale * gen) "
253
- "+ convert_long4(((float) scale / (1l<<24)) * gen)"
254
- "+ convert_long4(((float) scale / (1l<<48)) * gen)"
255
- ")")
256
-
257
- else:
258
- raise TypeError("unsupported RNG data type '%s'" % dtype)
259
-
260
- rl_flavor = "%d%s" % (bits, {
261
- "uniform": "",
262
- "normal": "norm"
263
- }[distribution])
264
-
265
- src = """//CL//
266
- %(defines)s
267
-
268
- #include <pyopencl-ranluxcl.cl>
269
-
270
- typedef %(output_t)s output_t;
271
- typedef %(output_t)s4 output_vec_t;
272
- #define NUM_WORKITEMS %(num_work_items)d
273
- #define RANLUX_FUNC ranluxcl%(rlflavor)s
274
- #define GET_RANDOM_NUM(gen) %(rng_expr)s
275
-
276
- kernel void generate(
277
- global ranluxcl_state_t *ranluxcltab,
278
- global output_t *output,
279
- unsigned long out_size,
280
- output_t scale,
281
- output_t shift)
282
- {
283
-
284
- ranluxcl_state_t ranluxclstate;
285
- ranluxcl_download_seed(&ranluxclstate, ranluxcltab);
286
-
287
- // output bulk
288
- unsigned long idx = get_global_id(0)*4;
289
- while (idx + 4 < out_size)
290
- {
291
- output_vec_t ran = GET_RANDOM_NUM(RANLUX_FUNC(&ranluxclstate));
292
- vstore4(ran, 0, &output[idx]);
293
- idx += 4*NUM_WORKITEMS;
294
- }
295
-
296
- // output tail
297
- output_vec_t tail_ran = GET_RANDOM_NUM(RANLUX_FUNC(&ranluxclstate));
298
- if (idx < out_size)
299
- output[idx] = tail_ran.x;
300
- if (idx+1 < out_size)
301
- output[idx+1] = tail_ran.y;
302
- if (idx+2 < out_size)
303
- output[idx+2] = tail_ran.z;
304
- if (idx+3 < out_size)
305
- output[idx+3] = tail_ran.w;
306
-
307
- ranluxcl_upload_seed(&ranluxclstate, ranluxcltab);
308
- }
309
- """ % {
310
- "defines": self.generate_settings_defines(),
311
- "rlflavor": rl_flavor,
312
- "output_t": c_type,
313
- "num_work_items": self.num_work_items,
314
- "rng_expr": rng_expr
315
- }
316
-
317
- prg = cl.Program(self.context, src).build()
318
- knl = prg.generate
319
- knl.set_scalar_arg_dtypes([None, None, np.uint64, arg_dtype, arg_dtype])
320
-
321
- return knl, size_multiplier
322
-
323
- def fill_uniform(self, ary, a=0, b=1, queue=None):
324
- """Fill *ary* with uniformly distributed random numbers in the interval
325
- *(a, b)*, endpoints excluded.
326
-
327
- :return: a :class:`pyopencl.Event`
328
-
329
- .. versionchanged:: 2014.1.1
330
-
331
- Added return value.
332
- """
333
-
334
- if queue is None:
335
- queue = ary.queue
336
-
337
- knl, size_multiplier = self.get_gen_kernel(ary.dtype, "uniform")
338
- evt = knl(queue,
339
- (self.num_work_items,), None,
340
- self.state.data, ary.data, ary.size*size_multiplier,
341
- b-a, a, wait_for=ary.events)
342
- ary.add_event(evt)
343
- self.state.add_event(evt)
344
- return ary
345
-
346
- def uniform(self, *args, **kwargs):
347
- """Make a new empty array, apply :meth:`fill_uniform` to it.
348
- """
349
- a = kwargs.pop("a", 0)
350
- b = kwargs.pop("b", 1)
351
-
352
- result = cl_array.empty(*args, **kwargs)
353
- self.fill_uniform(result, queue=result.queue, a=a, b=b)
354
- return result
355
-
356
- def fill_normal(self, ary, mu=0, sigma=1, queue=None):
357
- """Fill *ary* with normally distributed numbers with mean *mu* and
358
- standard deviation *sigma*.
359
-
360
- .. versionchanged:: 2014.1.1
361
-
362
- Added return value.
363
- """
364
-
365
- if queue is None:
366
- queue = ary.queue
367
-
368
- knl, size_multiplier = self.get_gen_kernel(ary.dtype, "normal")
369
- evt = knl(queue,
370
- (self.num_work_items,), self.wg_size,
371
- self.state.data, ary.data, ary.size*size_multiplier, sigma, mu,
372
- wait_for=ary.events)
373
- ary.add_event(evt)
374
- self.state.add_event(evt)
375
- return evt
376
-
377
- def normal(self, *args, **kwargs):
378
- """Make a new empty array, apply :meth:`fill_normal` to it.
379
- """
380
- mu = kwargs.pop("mu", 0)
381
- sigma = kwargs.pop("sigma", 1)
382
-
383
- result = cl_array.empty(*args, **kwargs)
384
- self.fill_normal(result, queue=result.queue, mu=mu, sigma=sigma)
385
- return result
386
-
387
- @memoize_method
388
- def get_sync_kernel(self):
389
- src = """//CL//
390
- {defines}
391
-
392
- #include <pyopencl-ranluxcl.cl>
393
-
394
- kernel void sync(
395
- global ranluxcl_state_t *ranluxcltab)
396
- {{
397
- ranluxcl_state_t ranluxclstate;
398
- ranluxcl_download_seed(&ranluxclstate, ranluxcltab);
399
- ranluxcl_synchronize(&ranluxclstate);
400
- ranluxcl_upload_seed(&ranluxclstate, ranluxcltab);
401
- }}
402
- """.format(
403
- defines=self.generate_settings_defines(),
404
- )
405
- prg = cl.Program(self.context, src).build()
406
- return prg.sync
407
-
408
- def synchronize(self, queue):
409
- """The generator gets inefficient when different work items invoke the
410
- generator a differing number of times. This function ensures
411
- efficiency.
412
- """
413
-
414
- self.get_sync_kernel()(queue, (self.num_work_items,),
415
- self.wg_size, self.state.data)
416
-
417
- # }}}
418
59
 
419
60
 
420
61
  # {{{ Random123 generators
@@ -738,14 +379,9 @@ def _get_generator(context):
738
379
  return gen
739
380
 
740
381
 
741
- def fill_rand(result, queue=None, luxury=None, a=0, b=1):
382
+ def fill_rand(result, queue=None, a=0, b=1):
742
383
  """Fill *result* with random values in the range :math:`[0, 1)`.
743
384
  """
744
- if luxury is not None:
745
- from warnings import warn
746
- warn("Specifying the 'luxury' argument is deprecated and will stop being "
747
- "supported in PyOpenCL 2018.x", stacklevel=2)
748
-
749
385
  if queue is None:
750
386
  queue = result.queue
751
387
  gen = _get_generator(queue.context)
pyopencl/cltypes.py CHANGED
@@ -18,9 +18,12 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
18
18
  THE SOFTWARE.
19
19
  """
20
20
 
21
+ import warnings
22
+
21
23
  import numpy as np
24
+
22
25
  from pyopencl.tools import get_or_register_dtype
23
- import warnings
26
+
24
27
 
25
28
  if __file__.endswith("array.py"):
26
29
  warnings.warn(
@@ -111,8 +111,8 @@ class DTypeRegistry:
111
111
  # {{{ C types
112
112
 
113
113
  def fill_registry_with_c_types(reg, respect_windows, include_bool=True):
114
- from sys import platform
115
114
  import struct
115
+ from sys import platform
116
116
 
117
117
  if include_bool:
118
118
  # bool is of unspecified size in the OpenCL spec and may in fact be
@@ -7,9 +7,9 @@ that ndim is 0 as with all scalar type.
7
7
 
8
8
 
9
9
  import numpy
10
+ import pygpu_ndarray as gpu_ndarray
10
11
  import StringIO
11
12
 
12
- import pygpu_ndarray as gpu_ndarray
13
13
  _CL_MODE = hasattr(gpu_ndarray, "set_opencl_context")
14
14
 
15
15
 
@@ -20,6 +20,7 @@ if _CL_MODE:
20
20
  from pyopencl.tools import dtype_to_ctype
21
21
  # import pyopencl._mymako as mako
22
22
  from pyopencl._cluda import CLUDA_PREAMBLE
23
+
23
24
  # TODO: use mako to get rid of the %if
24
25
  CLUDA_PREAMBLE = CLUDA_PREAMBLE[:455]
25
26
  CLUDA_PREAMBLE += """
@@ -51,12 +52,12 @@ else:
51
52
  #define GDIM_2 gridDim.z
52
53
  """
53
54
 
54
- from theano import Apply
55
- from theano import scalar
56
- from theano.tensor import TensorType
55
+ import logging
56
+
57
57
  import theano
58
+ from theano import Apply, scalar
59
+ from theano.tensor import TensorType
58
60
 
59
- import logging
60
61
  _logger_name = 'compyte.gen_elemwise'
61
62
  _logger = logging.getLogger(_logger_name)
62
63
  _logger.setLevel(logging.INFO)
@@ -1,7 +1,6 @@
1
1
  import numpy
2
2
  import StringIO
3
3
 
4
-
5
4
  _CL_MODE = False # "pyopencl" in __name__
6
5
 
7
6
 
@@ -12,6 +11,7 @@ if _CL_MODE:
12
11
  from pyopencl.tools import dtype_to_ctype
13
12
  # import pyopencl._mymako as mako
14
13
  from pyopencl._cluda import CLUDA_PREAMBLE
14
+
15
15
  # TODO: use mako to get rid of the %if
16
16
  CLUDA_PREAMBLE = CLUDA_PREAMBLE[:455]
17
17
  CLUDA_PREAMBLE += """
@@ -43,13 +43,13 @@ else:
43
43
  #define GDIM_2 gridDim.z
44
44
  """
45
45
 
46
- from theano import Apply
47
- from theano import scalar
48
- from theano.tensor import TensorType
49
- from theano.sandbox.cuda import CudaNdarrayType
46
+ import logging
47
+
50
48
  import theano
49
+ from theano import Apply, scalar
50
+ from theano.sandbox.cuda import CudaNdarrayType
51
+ from theano.tensor import TensorType
51
52
 
52
- import logging
53
53
  _logger_name = 'compyte.gen_reduction'
54
54
  _logger = logging.getLogger(_logger_name)
55
55
  _logger.setLevel(logging.INFO)
@@ -1,8 +1,8 @@
1
1
  import os
2
-
3
- from distutils.core import setup, Extension
4
2
  from distutils.command.build_ext import build_ext
3
+ from distutils.core import Extension, setup
5
4
  from distutils.dep_util import newer
5
+
6
6
  import numpy as np
7
7
 
8
8
 
@@ -82,6 +82,7 @@ class build_ext_nvcc(build_ext):
82
82
  self.build_extension(ext)
83
83
 
84
84
  import sys
85
+
85
86
  if sys.platform == 'darwin':
86
87
  libcl_args = {'extra_link_args': ['-framework', 'OpenCL']}
87
88
  else:
@@ -1,12 +1,13 @@
1
1
  # TODO: test other dtype
2
+ from functools import reduce
3
+
2
4
  import numpy
5
+ import pygpu_ndarray as gpu_ndarray
3
6
  import theano
4
7
 
5
- import pygpu_ndarray as gpu_ndarray
6
8
  from .gen_elemwise import MyGpuNdArray, elemwise_collapses
7
- from .test_gpu_ndarray import (dtypes_all, enable_double,
8
- gen_gpu_nd_array, product)
9
- from functools import reduce
9
+ from .test_gpu_ndarray import (dtypes_all, enable_double, gen_gpu_nd_array,
10
+ product)
10
11
 
11
12
 
12
13
  def rand(shape, dtype):
@@ -1,7 +1,6 @@
1
1
  import copy
2
2
 
3
3
  import numpy
4
-
5
4
  import pygpu_ndarray as gpu_ndarray
6
5
 
7
6
  enable_double = True
pyopencl/elementwise.py CHANGED
@@ -31,14 +31,12 @@ import enum
31
31
  from typing import Any, List, Optional, Tuple, Union
32
32
 
33
33
  import numpy as np
34
+ from pytools import memoize_method
34
35
 
35
36
  import pyopencl as cl
36
- from pyopencl.tools import context_dependent_memoize
37
37
  from pyopencl.tools import (
38
- dtype_to_ctype, DtypedArgument, VectorArg, ScalarArg,
39
- KernelTemplateBase, dtype_to_c_struct)
40
-
41
- from pytools import memoize_method
38
+ DtypedArgument, KernelTemplateBase, ScalarArg, VectorArg,
39
+ context_dependent_memoize, dtype_to_c_struct, dtype_to_ctype)
42
40
 
43
41
 
44
42
  # {{{ elementwise kernel code generator
@@ -121,7 +119,7 @@ def get_elwise_kernel_and_types(
121
119
  use_range: bool = False,
122
120
  **kwargs: Any) -> Tuple[cl.Kernel, List[DtypedArgument]]:
123
121
 
124
- from pyopencl.tools import parse_arg_list, get_arg_offset_adjuster_code
122
+ from pyopencl.tools import get_arg_offset_adjuster_code, parse_arg_list
125
123
  parsed_args = parse_arg_list(arguments, with_offset=True)
126
124
 
127
125
  auto_preamble = kwargs.pop("auto_preamble", True)
pyopencl/invoker.py CHANGED
@@ -22,14 +22,16 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
22
  THE SOFTWARE.
23
23
  """
24
24
 
25
- import numpy as np
26
-
25
+ from typing import Any, Tuple
27
26
  from warnings import warn
28
- import pyopencl._cl as _cl
27
+
28
+ import numpy as np
29
29
  from pytools.persistent_dict import WriteOncePersistentDict
30
30
  from pytools.py_codegen import Indentation, PythonCodeGenerator
31
- from pyopencl.tools import _NumpyTypesKeyBuilder, VectorArg
31
+
32
32
  import pyopencl as cl
33
+ import pyopencl._cl as _cl
34
+ from pyopencl.tools import VectorArg, _NumpyTypesKeyBuilder
33
35
 
34
36
 
35
37
  # {{{ arg packing helpers
@@ -373,10 +375,13 @@ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
373
375
  # }}}
374
376
 
375
377
 
376
- invoker_cache = WriteOncePersistentDict(
377
- "pyopencl-invoker-cache-v41",
378
- key_builder=_NumpyTypesKeyBuilder(),
379
- in_mem_cache_size=0)
378
+ if not cl._PYOPENCL_NO_CACHE:
379
+ from pytools.py_codegen import PicklableModule
380
+ invoker_cache: WriteOncePersistentDict[Any, Tuple[PicklableModule, str]] \
381
+ = WriteOncePersistentDict(
382
+ "pyopencl-invoker-cache-v42-nano",
383
+ key_builder=_NumpyTypesKeyBuilder(),
384
+ in_mem_cache_size=0)
380
385
 
381
386
 
382
387
  def generate_enqueue_and_set_args(function_name,
@@ -400,7 +405,8 @@ def generate_enqueue_and_set_args(function_name,
400
405
 
401
406
  if not from_cache:
402
407
  pmod, enqueue_name = _generate_enqueue_and_set_args_module(*cache_key)
403
- invoker_cache.store_if_not_present(cache_key, (pmod, enqueue_name))
408
+ if not cl._PYOPENCL_NO_CACHE:
409
+ invoker_cache.store_if_not_present(cache_key, (pmod, enqueue_name))
404
410
 
405
411
  return (
406
412
  pmod.mod_globals[enqueue_name],
pyopencl/ipython_ext.py CHANGED
@@ -1,4 +1,4 @@
1
- from IPython.core.magic import (magics_class, Magics, cell_magic, line_magic)
1
+ from IPython.core.magic import Magics, cell_magic, line_magic, magics_class
2
2
 
3
3
  import pyopencl as cl
4
4
 
pyopencl/reduction.py CHANGED
@@ -35,9 +35,8 @@ import numpy as np
35
35
 
36
36
  import pyopencl as cl
37
37
  from pyopencl.tools import (
38
- DtypedArgument, KernelTemplateBase,
39
- context_dependent_memoize, dtype_to_ctype,
40
- _process_code_for_macro)
38
+ DtypedArgument, KernelTemplateBase, _process_code_for_macro,
39
+ context_dependent_memoize, dtype_to_ctype)
41
40
 
42
41
 
43
42
  # {{{ kernel source
@@ -177,6 +176,7 @@ def _get_reduction_source(
177
176
  # }}}
178
177
 
179
178
  from mako.template import Template
179
+
180
180
  from pyopencl.characterize import has_double_support
181
181
 
182
182
  arguments = ", ".join(arg.declarator() for arg in parsed_args)
@@ -219,8 +219,8 @@ def get_reduction_kernel(
219
219
  map_expr = "pyopencl_reduction_inp[i]" if stage == 2 else "in[i]"
220
220
 
221
221
  from pyopencl.tools import (
222
- parse_arg_list, get_arg_list_scalar_arg_dtypes,
223
- get_arg_offset_adjuster_code, VectorArg)
222
+ VectorArg, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code,
223
+ parse_arg_list)
224
224
 
225
225
  if arguments is None:
226
226
  raise ValueError("arguments must not be None")