pyopencl 2024.3__cp39-cp39-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (43) hide show
  1. pyopencl/.libs/libOpenCL-1ef0e16e.so.1.0.0 +0 -0
  2. pyopencl/__init__.py +2410 -0
  3. pyopencl/_cl.cpython-39-x86_64-linux-gnu.so +0 -0
  4. pyopencl/_cluda.py +54 -0
  5. pyopencl/_mymako.py +14 -0
  6. pyopencl/algorithm.py +1449 -0
  7. pyopencl/array.py +3437 -0
  8. pyopencl/bitonic_sort.py +242 -0
  9. pyopencl/bitonic_sort_templates.py +594 -0
  10. pyopencl/cache.py +535 -0
  11. pyopencl/capture_call.py +177 -0
  12. pyopencl/characterize/__init__.py +456 -0
  13. pyopencl/characterize/performance.py +237 -0
  14. pyopencl/cl/pyopencl-airy.cl +324 -0
  15. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  16. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  17. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  18. pyopencl/cl/pyopencl-complex.h +303 -0
  19. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  20. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  21. pyopencl/cl/pyopencl-random123/array.h +325 -0
  22. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  23. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  24. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  25. pyopencl/clmath.py +280 -0
  26. pyopencl/clrandom.py +409 -0
  27. pyopencl/cltypes.py +137 -0
  28. pyopencl/compyte/.gitignore +21 -0
  29. pyopencl/compyte/__init__.py +0 -0
  30. pyopencl/compyte/array.py +214 -0
  31. pyopencl/compyte/dtypes.py +290 -0
  32. pyopencl/compyte/pyproject.toml +54 -0
  33. pyopencl/elementwise.py +1171 -0
  34. pyopencl/invoker.py +421 -0
  35. pyopencl/ipython_ext.py +68 -0
  36. pyopencl/reduction.py +786 -0
  37. pyopencl/scan.py +1915 -0
  38. pyopencl/tools.py +1527 -0
  39. pyopencl/version.py +9 -0
  40. pyopencl-2024.3.dist-info/METADATA +108 -0
  41. pyopencl-2024.3.dist-info/RECORD +43 -0
  42. pyopencl-2024.3.dist-info/WHEEL +5 -0
  43. pyopencl-2024.3.dist-info/licenses/LICENSE +104 -0
pyopencl/invoker.py ADDED
@@ -0,0 +1,421 @@
1
+ __copyright__ = """
2
+ Copyright (C) 2017 Andreas Kloeckner
3
+ """
4
+
5
+ __license__ = """
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ from typing import Any, Tuple
26
+ from warnings import warn
27
+
28
+ import numpy as np
29
+
30
+ from pytools.persistent_dict import WriteOncePersistentDict
31
+ from pytools.py_codegen import Indentation, PythonCodeGenerator
32
+
33
+ import pyopencl as cl
34
+ import pyopencl._cl as _cl
35
+ from pyopencl.tools import VectorArg, _NumpyTypesKeyBuilder
36
+
37
+
38
+ # {{{ arg packing helpers
39
+
40
+ _size_t_char = ({
41
+ 8: "Q",
42
+ 4: "L",
43
+ 2: "H",
44
+ 1: "B",
45
+ })[_cl._sizeof_size_t()]
46
+ _type_char_map = {
47
+ "n": _size_t_char.lower(),
48
+ "N": _size_t_char
49
+ }
50
+ del _size_t_char
51
+
52
+ # }}}
53
+
54
+
55
+ # {{{ generic arg handling body
56
+
57
+ def generate_generic_arg_handling_body(num_args):
58
+ gen = PythonCodeGenerator()
59
+
60
+ if num_args == 0:
61
+ gen("pass")
62
+ else:
63
+ gen_indices_and_args = []
64
+ for i in range(num_args):
65
+ gen_indices_and_args.append(i)
66
+ gen_indices_and_args.append(f"arg{i}")
67
+
68
+ gen(f"self._set_arg_multi("
69
+ f"({', '.join(str(i) for i in gen_indices_and_args)},), "
70
+ ")")
71
+
72
+ return gen
73
+
74
+ # }}}
75
+
76
+
77
+ # {{{ specific arg handling body
78
+
79
+ BUF_PACK_TYPECHARS = ["c", "b", "B", "h", "H", "i", "I", "l", "L", "f", "d"]
80
+
81
+
82
+ def generate_specific_arg_handling_body(function_name, num_cl_args, arg_types, *,
83
+ work_around_arg_count_bug, warn_about_arg_count_bug,
84
+ in_enqueue, include_debug_code):
85
+
86
+ assert work_around_arg_count_bug is not None
87
+ assert warn_about_arg_count_bug is not None
88
+
89
+ fp_arg_count = 0
90
+ cl_arg_idx = 0
91
+
92
+ gen = PythonCodeGenerator()
93
+
94
+ if not arg_types:
95
+ gen("pass")
96
+
97
+ gen_indices_and_args = []
98
+ buf_indices_and_args = []
99
+ buf_pack_indices_and_args = []
100
+
101
+ def add_buf_arg(arg_idx, typechar, expr_str):
102
+ if typechar in BUF_PACK_TYPECHARS:
103
+ buf_pack_indices_and_args.append(arg_idx)
104
+ buf_pack_indices_and_args.append(repr(typechar.encode()))
105
+ buf_pack_indices_and_args.append(expr_str)
106
+ else:
107
+ buf_indices_and_args.append(arg_idx)
108
+ buf_indices_and_args.append(f"pack('{typechar}', {expr_str})")
109
+
110
+ wait_for_parts = []
111
+
112
+ for arg_idx, arg_type in enumerate(arg_types):
113
+ arg_var = "arg%d" % arg_idx
114
+
115
+ if arg_type is None:
116
+ gen_indices_and_args.append(cl_arg_idx)
117
+ gen_indices_and_args.append(arg_var)
118
+ cl_arg_idx += 1
119
+ gen("")
120
+ continue
121
+
122
+ elif isinstance(arg_type, VectorArg):
123
+ if include_debug_code:
124
+ gen(f"if not {arg_var}.flags.forc:")
125
+ with Indentation(gen):
126
+ gen("raise RuntimeError('only contiguous arrays may '")
127
+ gen(" 'be used as arguments to this operation')")
128
+ gen("")
129
+
130
+ if in_enqueue and include_debug_code:
131
+ gen(f"assert {arg_var}.queue is None or {arg_var}.queue == queue, "
132
+ "'queues for all arrays must match the queue supplied "
133
+ "to enqueue'")
134
+
135
+ gen_indices_and_args.append(cl_arg_idx)
136
+ gen_indices_and_args.append(f"{arg_var}.base_data")
137
+ cl_arg_idx += 1
138
+
139
+ if arg_type.with_offset:
140
+ add_buf_arg(cl_arg_idx, np.dtype(np.int64).char, f"{arg_var}.offset")
141
+ cl_arg_idx += 1
142
+
143
+ if in_enqueue:
144
+ wait_for_parts .append(f"{arg_var}.events")
145
+
146
+ continue
147
+
148
+ arg_dtype = np.dtype(arg_type)
149
+
150
+ if arg_dtype.char == "V":
151
+ buf_indices_and_args.append(cl_arg_idx)
152
+ buf_indices_and_args.append(arg_var)
153
+ cl_arg_idx += 1
154
+
155
+ elif arg_dtype.kind == "c":
156
+ if warn_about_arg_count_bug:
157
+ warn("{knl_name}: arguments include complex numbers, and "
158
+ "some (but not all) of the target devices mishandle "
159
+ "struct kernel arguments (hence the workaround is "
160
+ "disabled".format(knl_name=function_name),
161
+ stacklevel=2)
162
+
163
+ if arg_dtype == np.complex64:
164
+ arg_char = "f"
165
+ elif arg_dtype == np.complex128:
166
+ arg_char = "d"
167
+ else:
168
+ raise TypeError("unexpected complex type: %s" % arg_dtype)
169
+
170
+ if (work_around_arg_count_bug == "pocl"
171
+ and arg_dtype == np.complex128
172
+ and fp_arg_count + 2 <= 8):
173
+ add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.real")
174
+ cl_arg_idx += 1
175
+ add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.imag")
176
+ cl_arg_idx += 1
177
+
178
+ elif (work_around_arg_count_bug == "apple"
179
+ and arg_dtype == np.complex128
180
+ and fp_arg_count + 2 <= 8):
181
+ raise NotImplementedError("No work-around to "
182
+ "Apple's broken structs-as-kernel arg "
183
+ "handling has been found. "
184
+ "Cannot pass complex numbers to kernels.")
185
+
186
+ else:
187
+ buf_indices_and_args.append(cl_arg_idx)
188
+ buf_indices_and_args.append(
189
+ f"pack('{arg_char}{arg_char}', {arg_var}.real, {arg_var}.imag)")
190
+ cl_arg_idx += 1
191
+
192
+ fp_arg_count += 2
193
+
194
+ else:
195
+ if arg_dtype.kind == "f":
196
+ fp_arg_count += 1
197
+
198
+ arg_char = arg_dtype.char
199
+ arg_char = _type_char_map.get(arg_char, arg_char)
200
+ add_buf_arg(cl_arg_idx, arg_char, arg_var)
201
+ cl_arg_idx += 1
202
+
203
+ gen("")
204
+
205
+ for arg_kind, args_and_indices, entry_length in [
206
+ ("", gen_indices_and_args, 2),
207
+ ("_buf", buf_indices_and_args, 2),
208
+ ("_buf_pack", buf_pack_indices_and_args, 3),
209
+ ]:
210
+ assert len(args_and_indices) % entry_length == 0
211
+ if args_and_indices:
212
+ gen(f"self._set_arg{arg_kind}_multi("
213
+ f"({', '.join(str(i) for i in args_and_indices)},), "
214
+ ")")
215
+
216
+ if cl_arg_idx != num_cl_args:
217
+ raise TypeError(
218
+ "length of argument list (%d) and "
219
+ "CL-generated number of arguments (%d) do not agree"
220
+ % (cl_arg_idx, num_cl_args))
221
+
222
+ if in_enqueue:
223
+ return gen, wait_for_parts
224
+ else:
225
+ return gen
226
+
227
+ # }}}
228
+
229
+
230
+ def _generate_enqueue_and_set_args_module(function_name,
231
+ num_passed_args, num_cl_args,
232
+ arg_types, include_debug_code,
233
+ work_around_arg_count_bug, warn_about_arg_count_bug):
234
+
235
+ arg_names = ["arg%d" % i for i in range(num_passed_args)]
236
+
237
+ def gen_arg_setting(in_enqueue):
238
+ if arg_types is None:
239
+ result = generate_generic_arg_handling_body(num_passed_args)
240
+ if in_enqueue:
241
+ return result, []
242
+ else:
243
+ return result
244
+
245
+ else:
246
+ return generate_specific_arg_handling_body(
247
+ function_name, num_cl_args, arg_types,
248
+ warn_about_arg_count_bug=warn_about_arg_count_bug,
249
+ work_around_arg_count_bug=work_around_arg_count_bug,
250
+ in_enqueue=in_enqueue, include_debug_code=include_debug_code)
251
+
252
+ gen = PythonCodeGenerator()
253
+
254
+ gen("from struct import pack")
255
+ gen("from pyopencl import status_code")
256
+ gen("import numpy as np")
257
+ gen("import pyopencl._cl as _cl")
258
+ gen("")
259
+
260
+ # {{{ generate _enqueue
261
+
262
+ from pytools import to_identifier
263
+ enqueue_name = f"enqueue_knl_{to_identifier(function_name)}"
264
+ gen("def %s(%s):"
265
+ % (enqueue_name,
266
+ ", ".join([
267
+ "self", "queue", "global_size", "local_size",
268
+ *arg_names,
269
+ "global_offset=None",
270
+ "g_times_l=False",
271
+ "allow_empty_ndrange=False",
272
+ "wait_for=None"])))
273
+
274
+ with Indentation(gen):
275
+ subgen, wait_for_parts = gen_arg_setting(in_enqueue=True)
276
+ gen.extend(subgen)
277
+
278
+ if wait_for_parts:
279
+ wait_for_expr = (
280
+ "[*(() if wait_for is None else wait_for), "
281
+ + ", ".join("*"+wfp for wfp in wait_for_parts)
282
+ + "]")
283
+ else:
284
+ wait_for_expr = "wait_for"
285
+
286
+ # Using positional args here because pybind is slow with keyword args
287
+ gen(f"""
288
+ return _cl.enqueue_nd_range_kernel(queue, self,
289
+ global_size, local_size, global_offset,
290
+ {wait_for_expr},
291
+ g_times_l, allow_empty_ndrange)
292
+ """)
293
+
294
+ # }}}
295
+
296
+ # {{{ generate set_args
297
+
298
+ gen("")
299
+ gen("def set_args(%s):"
300
+ % (", ".join(["self", *arg_names])))
301
+
302
+ with Indentation(gen):
303
+ gen.extend(gen_arg_setting(in_enqueue=False))
304
+
305
+ # }}}
306
+
307
+ return (
308
+ gen.get_picklable_module(
309
+ name=f"<pyopencl invoker for '{function_name}'>"),
310
+ enqueue_name)
311
+
312
+
313
+ # {{{ Helper functions related to argument sizes and device limits
314
+
315
+ def _get_max_parameter_size(dev):
316
+ """Return the device's maximum parameter size adjusted for PoCL."""
317
+ from pyopencl.characterize import get_pocl_version
318
+
319
+ dev_limit = dev.max_parameter_size
320
+ pocl_version = get_pocl_version(dev.platform, fallback_value=(1, 8))
321
+ if pocl_version is not None and pocl_version < (3, 0):
322
+ # Current PoCL versions (as of 04/2022) have an incorrect parameter
323
+ # size limit of 1024; see e.g. https://github.com/pocl/pocl/pull/1046
324
+ if dev_limit == 1024:
325
+ if dev.type & cl.device_type.CPU:
326
+ return 1024*1024
327
+ if dev.type & cl.device_type.GPU:
328
+ # All modern Nvidia GPUs (starting from Compute Capability 2)
329
+ # have this limit
330
+ return 4352
331
+
332
+ return dev_limit
333
+
334
+
335
+ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
336
+ """Check whether argument sizes exceed the OpenCL device limit."""
337
+
338
+ for dev in devs:
339
+ dev_ptr_size = int(dev.address_bits / 8)
340
+ dev_limit = _get_max_parameter_size(dev)
341
+
342
+ total_arg_size = 0
343
+
344
+ is_estimate = False
345
+
346
+ if arg_types:
347
+ for arg_type in arg_types:
348
+ if arg_type is None:
349
+ is_estimate = True
350
+ total_arg_size += dev_ptr_size
351
+ elif isinstance(arg_type, VectorArg):
352
+ total_arg_size += dev_ptr_size
353
+ else:
354
+ total_arg_size += np.dtype(arg_type).itemsize
355
+ else:
356
+ # Estimate that each argument has the size of a pointer on average
357
+ is_estimate = True
358
+ total_arg_size = dev_ptr_size * num_cl_args
359
+
360
+ if total_arg_size > dev_limit:
361
+ from warnings import warn
362
+ warn(f"Kernel '{function_name}' has {num_cl_args} arguments with "
363
+ f"a total size of {total_arg_size} bytes, which is higher than "
364
+ f"the limit of {dev_limit} bytes on {dev}. This might "
365
+ "lead to compilation errors, especially on GPU devices.",
366
+ stacklevel=3)
367
+ elif is_estimate and total_arg_size >= dev_limit * 0.75:
368
+ # Since total_arg_size is just an estimate, also warn in case we are
369
+ # just below the actual limit.
370
+ from warnings import warn
371
+ warn(f"Kernel '{function_name}' has {num_cl_args} arguments with "
372
+ f"a total size of {total_arg_size} bytes, which approaches "
373
+ f"the limit of {dev_limit} bytes on {dev}. This might "
374
+ "lead to compilation errors, especially on GPU devices.",
375
+ stacklevel=3)
376
+
377
+ # }}}
378
+
379
+
380
+ if not cl._PYOPENCL_NO_CACHE:
381
+ from pytools.py_codegen import PicklableModule
382
+ invoker_cache: WriteOncePersistentDict[Any, Tuple[PicklableModule, str]] \
383
+ = WriteOncePersistentDict(
384
+ "pyopencl-invoker-cache-v42-nano",
385
+ key_builder=_NumpyTypesKeyBuilder(),
386
+ in_mem_cache_size=0,
387
+ safe_sync=False)
388
+
389
+
390
+ def generate_enqueue_and_set_args(function_name,
391
+ num_passed_args, num_cl_args,
392
+ arg_types,
393
+ work_around_arg_count_bug, warn_about_arg_count_bug, devs):
394
+
395
+ _check_arg_size(function_name, num_cl_args, arg_types, devs)
396
+
397
+ cache_key = (function_name, num_passed_args, num_cl_args,
398
+ arg_types, __debug__,
399
+ work_around_arg_count_bug, warn_about_arg_count_bug)
400
+
401
+ from_cache = False
402
+ if not cl._PYOPENCL_NO_CACHE:
403
+ try:
404
+ pmod, enqueue_name = invoker_cache[cache_key]
405
+ from_cache = True
406
+ except KeyError:
407
+ pass
408
+
409
+ if not from_cache:
410
+ pmod, enqueue_name = _generate_enqueue_and_set_args_module(*cache_key)
411
+ if not cl._PYOPENCL_NO_CACHE:
412
+ invoker_cache.store_if_not_present(cache_key, (pmod, enqueue_name))
413
+
414
+ return (
415
+ pmod.mod_globals[enqueue_name],
416
+ pmod.mod_globals["set_args"])
417
+
418
+ # }}}
419
+
420
+
421
+ # vim: foldmethod=marker
@@ -0,0 +1,68 @@
1
+ from IPython.core.magic import Magics, cell_magic, line_magic, magics_class
2
+
3
+ import pyopencl as cl
4
+
5
+
6
+ @magics_class
7
+ class PyOpenCLMagics(Magics):
8
+ def _run_kernel(self, kernel, options):
9
+ try:
10
+ ctx = self.shell.user_ns["cl_ctx"]
11
+ except KeyError:
12
+ ctx = None
13
+
14
+ if not isinstance(ctx, cl.Context):
15
+ ctx = None
16
+
17
+ if ctx is None:
18
+ try:
19
+ ctx = self.shell.user_ns["ctx"]
20
+ except KeyError:
21
+ ctx = None
22
+
23
+ if ctx is None or not isinstance(ctx, cl.Context):
24
+ raise RuntimeError("unable to locate cl context, which must be "
25
+ "present in namespace as 'cl_ctx' or 'ctx'")
26
+
27
+ prg = cl.Program(ctx, kernel).build(options=options.split())
28
+
29
+ for knl in prg.all_kernels():
30
+ self.shell.user_ns[knl.function_name] = knl
31
+
32
+ @cell_magic
33
+ def cl_kernel(self, line, cell):
34
+ kernel = cell
35
+
36
+ opts, _args = self.parse_options(line, "o:")
37
+ build_options = opts.get("o", "")
38
+
39
+ self._run_kernel(kernel, build_options)
40
+
41
+ def _load_kernel_and_options(self, line):
42
+ opts, args = self.parse_options(line, "o:f:")
43
+
44
+ build_options = opts.get("o")
45
+ kernel = self.shell.find_user_code(opts.get("f") or args)
46
+
47
+ return kernel, build_options
48
+
49
+ @line_magic
50
+ def cl_kernel_from_file(self, line):
51
+ kernel, build_options = self._load_kernel_and_options(line)
52
+ self._run_kernel(kernel, build_options)
53
+
54
+ @line_magic
55
+ def cl_load_edit_kernel(self, line):
56
+ kernel, build_options = self._load_kernel_and_options(line)
57
+ header = "%%cl_kernel"
58
+
59
+ if build_options:
60
+ header = f'{header} -o "{build_options}"'
61
+
62
+ content = f"{header}\n\n{kernel}"
63
+
64
+ self.shell.set_next_input(content)
65
+
66
+
67
+ def load_ipython_extension(ip):
68
+ ip.register_magics(PyOpenCLMagics)