pyopencl 2026.1.1__cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. pyopencl/.libs/libOpenCL-34a55fe4.so.1.0.0 +0 -0
  2. pyopencl/__init__.py +1995 -0
  3. pyopencl/_cl.cpython-314t-aarch64-linux-gnu.so +0 -0
  4. pyopencl/_cl.pyi +2009 -0
  5. pyopencl/_cluda.py +57 -0
  6. pyopencl/_monkeypatch.py +1104 -0
  7. pyopencl/_mymako.py +17 -0
  8. pyopencl/algorithm.py +1454 -0
  9. pyopencl/array.py +3530 -0
  10. pyopencl/bitonic_sort.py +245 -0
  11. pyopencl/bitonic_sort_templates.py +597 -0
  12. pyopencl/cache.py +553 -0
  13. pyopencl/capture_call.py +200 -0
  14. pyopencl/characterize/__init__.py +461 -0
  15. pyopencl/characterize/performance.py +240 -0
  16. pyopencl/cl/pyopencl-airy.cl +324 -0
  17. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  18. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  19. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  20. pyopencl/cl/pyopencl-complex.h +303 -0
  21. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  22. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  23. pyopencl/cl/pyopencl-random123/array.h +325 -0
  24. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  25. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  26. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  27. pyopencl/clmath.py +281 -0
  28. pyopencl/clrandom.py +412 -0
  29. pyopencl/cltypes.py +217 -0
  30. pyopencl/compyte/.gitignore +21 -0
  31. pyopencl/compyte/__init__.py +0 -0
  32. pyopencl/compyte/array.py +211 -0
  33. pyopencl/compyte/dtypes.py +314 -0
  34. pyopencl/compyte/pyproject.toml +49 -0
  35. pyopencl/elementwise.py +1288 -0
  36. pyopencl/invoker.py +417 -0
  37. pyopencl/ipython_ext.py +70 -0
  38. pyopencl/py.typed +0 -0
  39. pyopencl/reduction.py +829 -0
  40. pyopencl/scan.py +1921 -0
  41. pyopencl/tools.py +1680 -0
  42. pyopencl/typing.py +61 -0
  43. pyopencl/version.py +11 -0
  44. pyopencl-2026.1.1.dist-info/METADATA +108 -0
  45. pyopencl-2026.1.1.dist-info/RECORD +47 -0
  46. pyopencl-2026.1.1.dist-info/WHEEL +6 -0
  47. pyopencl-2026.1.1.dist-info/licenses/LICENSE +104 -0
pyopencl/invoker.py ADDED
@@ -0,0 +1,417 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ __copyright__ = """
5
+ Copyright (C) 2017 Andreas Kloeckner
6
+ """
7
+
8
+ __license__ = """
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in
17
+ all copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
+ THE SOFTWARE.
26
+ """
27
+
28
+ from typing import Any
29
+ from warnings import warn
30
+
31
+ import numpy as np
32
+
33
+ from pytools.persistent_dict import WriteOncePersistentDict
34
+ from pytools.py_codegen import Indentation, PythonCodeGenerator
35
+
36
+ import pyopencl as cl
37
+ import pyopencl._cl as _cl
38
+ from pyopencl.tools import VectorArg, _NumpyTypesKeyBuilder
39
+
40
+
41
+ # {{{ arg packing helpers
42
+
43
+ _size_t_char = ({
44
+ 8: "Q",
45
+ 4: "L",
46
+ 2: "H",
47
+ 1: "B",
48
+ })[_cl._sizeof_size_t()]
49
+ _type_char_map = {
50
+ "n": _size_t_char.lower(),
51
+ "N": _size_t_char
52
+ }
53
+ del _size_t_char
54
+
55
+ # }}}
56
+
57
+
58
+ # {{{ generic arg handling body
59
+
60
+ def generate_generic_arg_handling_body(num_args):
61
+ gen = PythonCodeGenerator()
62
+
63
+ if num_args == 0:
64
+ gen("pass")
65
+ else:
66
+ gen_indices_and_args = []
67
+ for i in range(num_args):
68
+ gen_indices_and_args.append(i)
69
+ gen_indices_and_args.append(f"arg{i}")
70
+
71
+ gen(f"self._set_arg_multi("
72
+ f"({', '.join(str(i) for i in gen_indices_and_args)},), "
73
+ ")")
74
+
75
+ return gen
76
+
77
+ # }}}
78
+
79
+
80
+ # {{{ specific arg handling body
81
+
82
+ BUF_PACK_TYPECHARS = ["c", "b", "B", "h", "H", "i", "I", "l", "L", "f", "d"]
83
+
84
+
85
+ def generate_specific_arg_handling_body(function_name, num_cl_args, arg_types, *,
86
+ work_around_arg_count_bug, warn_about_arg_count_bug,
87
+ in_enqueue, include_debug_code):
88
+
89
+ assert work_around_arg_count_bug is not None
90
+ assert warn_about_arg_count_bug is not None
91
+
92
+ fp_arg_count = 0
93
+ cl_arg_idx = 0
94
+
95
+ gen = PythonCodeGenerator()
96
+
97
+ if not arg_types:
98
+ gen("pass")
99
+
100
+ gen_indices_and_args = []
101
+ buf_indices_and_args = []
102
+ buf_pack_indices_and_args = []
103
+
104
+ def add_buf_arg(arg_idx, typechar, expr_str):
105
+ if typechar in BUF_PACK_TYPECHARS:
106
+ buf_pack_indices_and_args.append(arg_idx)
107
+ buf_pack_indices_and_args.append(repr(typechar.encode()))
108
+ buf_pack_indices_and_args.append(expr_str)
109
+ else:
110
+ buf_indices_and_args.append(arg_idx)
111
+ buf_indices_and_args.append(f"pack('{typechar}', {expr_str})")
112
+
113
+ wait_for_parts = []
114
+
115
+ for arg_idx, arg_type in enumerate(arg_types):
116
+ arg_var = "arg%d" % arg_idx
117
+
118
+ if arg_type is None:
119
+ gen_indices_and_args.append(cl_arg_idx)
120
+ gen_indices_and_args.append(arg_var)
121
+ cl_arg_idx += 1
122
+ gen("")
123
+ continue
124
+
125
+ elif isinstance(arg_type, VectorArg):
126
+ if include_debug_code:
127
+ gen(f"if not {arg_var}.flags.forc:")
128
+ with Indentation(gen):
129
+ gen("raise RuntimeError('only contiguous arrays may '")
130
+ gen(" 'be used as arguments to this operation')")
131
+ gen("")
132
+
133
+ if in_enqueue and include_debug_code:
134
+ gen(f"assert {arg_var}.queue is None or {arg_var}.queue == queue, "
135
+ "'queues for all arrays must match the queue supplied "
136
+ "to enqueue'")
137
+
138
+ gen_indices_and_args.append(cl_arg_idx)
139
+ gen_indices_and_args.append(f"{arg_var}.base_data")
140
+ cl_arg_idx += 1
141
+
142
+ if arg_type.with_offset:
143
+ add_buf_arg(cl_arg_idx, np.dtype(np.int64).char, f"{arg_var}.offset")
144
+ cl_arg_idx += 1
145
+
146
+ if in_enqueue:
147
+ wait_for_parts .append(f"{arg_var}.events")
148
+
149
+ continue
150
+
151
+ arg_dtype = np.dtype(arg_type)
152
+
153
+ if arg_dtype.char == "V":
154
+ buf_indices_and_args.append(cl_arg_idx)
155
+ buf_indices_and_args.append(arg_var)
156
+ cl_arg_idx += 1
157
+
158
+ elif arg_dtype.kind == "c":
159
+ if warn_about_arg_count_bug:
160
+ warn("{knl_name}: arguments include complex numbers, and "
161
+ "some (but not all) of the target devices mishandle "
162
+ "struct kernel arguments (hence the workaround is "
163
+ "disabled".format(knl_name=function_name),
164
+ stacklevel=2)
165
+
166
+ if arg_dtype == np.complex64:
167
+ arg_char = "f"
168
+ elif arg_dtype == np.complex128:
169
+ arg_char = "d"
170
+ else:
171
+ raise TypeError("unexpected complex type: %s" % arg_dtype)
172
+
173
+ if (work_around_arg_count_bug == "pocl"
174
+ and arg_dtype == np.complex128
175
+ and fp_arg_count + 2 <= 8):
176
+ add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.real")
177
+ cl_arg_idx += 1
178
+ add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.imag")
179
+ cl_arg_idx += 1
180
+
181
+ elif (work_around_arg_count_bug == "apple"
182
+ and arg_dtype == np.complex128
183
+ and fp_arg_count + 2 <= 8):
184
+ raise NotImplementedError("No work-around to "
185
+ "Apple's broken structs-as-kernel arg "
186
+ "handling has been found. "
187
+ "Cannot pass complex numbers to kernels.")
188
+
189
+ else:
190
+ buf_indices_and_args.append(cl_arg_idx)
191
+ buf_indices_and_args.append(
192
+ f"pack('{arg_char}{arg_char}', {arg_var}.real, {arg_var}.imag)")
193
+ cl_arg_idx += 1
194
+
195
+ fp_arg_count += 2
196
+
197
+ else:
198
+ if arg_dtype.kind == "f":
199
+ fp_arg_count += 1
200
+
201
+ arg_char = arg_dtype.char
202
+ arg_char = _type_char_map.get(arg_char, arg_char)
203
+ add_buf_arg(cl_arg_idx, arg_char, arg_var)
204
+ cl_arg_idx += 1
205
+
206
+ gen("")
207
+
208
+ for arg_kind, args_and_indices, entry_length in [
209
+ ("", gen_indices_and_args, 2),
210
+ ("_buf", buf_indices_and_args, 2),
211
+ ("_buf_pack", buf_pack_indices_and_args, 3),
212
+ ]:
213
+ assert len(args_and_indices) % entry_length == 0
214
+ if args_and_indices:
215
+ gen(f"self._set_arg{arg_kind}_multi("
216
+ f"({', '.join(str(i) for i in args_and_indices)},), "
217
+ ")")
218
+
219
+ if cl_arg_idx != num_cl_args:
220
+ raise TypeError(
221
+ "length of argument list (%d) and "
222
+ "CL-generated number of arguments (%d) do not agree"
223
+ % (cl_arg_idx, num_cl_args))
224
+
225
+ if in_enqueue:
226
+ return gen, wait_for_parts
227
+ else:
228
+ return gen
229
+
230
+ # }}}
231
+
232
+
233
+ def _generate_enqueue_and_set_args_module(function_name,
234
+ num_passed_args, num_cl_args,
235
+ arg_types, include_debug_code,
236
+ work_around_arg_count_bug, warn_about_arg_count_bug):
237
+
238
+ arg_names = ["arg%d" % i for i in range(num_passed_args)]
239
+
240
+ def gen_arg_setting(in_enqueue):
241
+ if arg_types is None:
242
+ result = generate_generic_arg_handling_body(num_passed_args)
243
+ if in_enqueue:
244
+ return result, []
245
+ else:
246
+ return result
247
+
248
+ else:
249
+ return generate_specific_arg_handling_body(
250
+ function_name, num_cl_args, arg_types,
251
+ warn_about_arg_count_bug=warn_about_arg_count_bug,
252
+ work_around_arg_count_bug=work_around_arg_count_bug,
253
+ in_enqueue=in_enqueue, include_debug_code=include_debug_code)
254
+
255
+ gen = PythonCodeGenerator()
256
+
257
+ gen("from struct import pack")
258
+ gen("from pyopencl import status_code")
259
+ gen("import numpy as np")
260
+ gen("import pyopencl._cl as _cl")
261
+ gen("")
262
+
263
+ # {{{ generate _enqueue
264
+
265
+ from pytools import to_identifier
266
+ enqueue_name = f"enqueue_knl_{to_identifier(function_name)}"
267
+ gen("def %s(%s):"
268
+ % (enqueue_name,
269
+ ", ".join([
270
+ "self", "queue", "global_size", "local_size",
271
+ *arg_names,
272
+ "global_offset=None",
273
+ "g_times_l=False",
274
+ "allow_empty_ndrange=False",
275
+ "wait_for=None"])))
276
+
277
+ with Indentation(gen):
278
+ subgen, wait_for_parts = gen_arg_setting(in_enqueue=True)
279
+ gen.extend(subgen)
280
+
281
+ if wait_for_parts:
282
+ wait_for_expr = (
283
+ "[*(() if wait_for is None else wait_for), "
284
+ + ", ".join("*"+wfp for wfp in wait_for_parts)
285
+ + "]")
286
+ else:
287
+ wait_for_expr = "wait_for"
288
+
289
+ # Using positional args here because pybind is slow with keyword args
290
+ gen(f"""
291
+ return _cl.enqueue_nd_range_kernel(queue, self,
292
+ global_size, local_size, global_offset,
293
+ {wait_for_expr},
294
+ g_times_l, allow_empty_ndrange)
295
+ """)
296
+
297
+ # }}}
298
+
299
+ # {{{ generate set_args
300
+
301
+ gen("")
302
+ gen("def set_args(%s):"
303
+ % (", ".join(["self", *arg_names])))
304
+
305
+ with Indentation(gen):
306
+ gen.extend(gen_arg_setting(in_enqueue=False))
307
+
308
+ # }}}
309
+
310
+ return (
311
+ gen.get_picklable_module(
312
+ name_prefix=f"pyopencl invoker for '{function_name}'"),
313
+ enqueue_name)
314
+
315
+
316
+ # {{{ Helper functions related to argument sizes and device limits
317
+
318
+ def _get_max_parameter_size(dev):
319
+ """Return the device's maximum parameter size adjusted for PoCL."""
320
+ from pyopencl.characterize import get_pocl_version
321
+
322
+ dev_limit = dev.max_parameter_size
323
+ pocl_version = get_pocl_version(dev.platform, fallback_value=(1, 8))
324
+ if pocl_version is not None and pocl_version < (3, 0):
325
+ # Older PoCL versions (<3.0) have an incorrect parameter
326
+ # size limit of 1024; see e.g. https://github.com/pocl/pocl/pull/1046
327
+ if dev_limit == 1024:
328
+ if dev.type & cl.device_type.CPU:
329
+ return 1024*1024
330
+ if dev.type & cl.device_type.GPU:
331
+ # All modern Nvidia GPUs (starting from Compute Capability 2)
332
+ # have this limit
333
+ return 4352
334
+
335
+ return dev_limit
336
+
337
+
338
+ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
339
+ """Check whether argument sizes exceed the OpenCL device limit."""
340
+
341
+ for dev in devs:
342
+ from pyopencl.characterize import nv_compute_capability
343
+ if nv_compute_capability(dev) is None:
344
+ # Only warn on Nvidia GPUs, because actual failures related to
345
+ # the device limit have been observed only on such devices.
346
+ continue
347
+
348
+ dev_ptr_size = int(dev.address_bits / 8)
349
+ dev_limit = _get_max_parameter_size(dev)
350
+
351
+ total_arg_size = 0
352
+
353
+ if arg_types:
354
+ for arg_type in arg_types:
355
+ if arg_type is None:
356
+ total_arg_size += dev_ptr_size
357
+ elif isinstance(arg_type, VectorArg):
358
+ total_arg_size += dev_ptr_size
359
+ else:
360
+ total_arg_size += np.dtype(arg_type).itemsize
361
+ else:
362
+ # Estimate that each argument has the size of a pointer on average
363
+ total_arg_size = dev_ptr_size * num_cl_args
364
+
365
+ if total_arg_size > dev_limit:
366
+ from warnings import warn
367
+ warn(f"Kernel '{function_name}' has {num_cl_args} arguments with "
368
+ f"a total size of {total_arg_size} bytes, which is higher than "
369
+ f"the limit of {dev_limit} bytes on {dev}. This might "
370
+ "lead to compilation errors, especially on GPU devices.",
371
+ stacklevel=3)
372
+
373
+ # }}}
374
+
375
+
376
+ if not cl._PYOPENCL_NO_CACHE:
377
+ from pytools.py_codegen import PicklableModule
378
+ invoker_cache: WriteOncePersistentDict[Any, tuple[PicklableModule, str]] \
379
+ = WriteOncePersistentDict(
380
+ "pyopencl-invoker-cache-v42-nano",
381
+ key_builder=_NumpyTypesKeyBuilder(),
382
+ in_mem_cache_size=0,
383
+ safe_sync=False)
384
+
385
+
386
+ def generate_enqueue_and_set_args(function_name,
387
+ num_passed_args, num_cl_args,
388
+ arg_types,
389
+ work_around_arg_count_bug, warn_about_arg_count_bug, devs):
390
+
391
+ _check_arg_size(function_name, num_cl_args, arg_types, devs)
392
+
393
+ cache_key = (function_name, num_passed_args, num_cl_args,
394
+ arg_types, __debug__,
395
+ work_around_arg_count_bug, warn_about_arg_count_bug)
396
+
397
+ from_cache = False
398
+ if not cl._PYOPENCL_NO_CACHE:
399
+ try:
400
+ pmod, enqueue_name = invoker_cache[cache_key]
401
+ from_cache = True
402
+ except KeyError:
403
+ pass
404
+
405
+ if not from_cache:
406
+ pmod, enqueue_name = _generate_enqueue_and_set_args_module(*cache_key)
407
+ if not cl._PYOPENCL_NO_CACHE:
408
+ invoker_cache.store_if_not_present(cache_key, (pmod, enqueue_name))
409
+
410
+ return (
411
+ pmod.mod_globals[enqueue_name],
412
+ pmod.mod_globals["set_args"])
413
+
414
+ # }}}
415
+
416
+
417
+ # vim: foldmethod=marker
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ from IPython.core.magic import Magics, cell_magic, line_magic, magics_class
4
+
5
+ import pyopencl as cl
6
+
7
+
8
+ @magics_class
9
+ class PyOpenCLMagics(Magics):
10
+ def _run_kernel(self, kernel, options):
11
+ try:
12
+ ctx = self.shell.user_ns["cl_ctx"]
13
+ except KeyError:
14
+ ctx = None
15
+
16
+ if not isinstance(ctx, cl.Context):
17
+ ctx = None
18
+
19
+ if ctx is None:
20
+ try:
21
+ ctx = self.shell.user_ns["ctx"]
22
+ except KeyError:
23
+ ctx = None
24
+
25
+ if ctx is None or not isinstance(ctx, cl.Context):
26
+ raise RuntimeError("unable to locate cl context, which must be "
27
+ "present in namespace as 'cl_ctx' or 'ctx'")
28
+
29
+ prg = cl.Program(ctx, kernel).build(options=options.split())
30
+
31
+ for knl in prg.all_kernels():
32
+ self.shell.user_ns[knl.function_name] = knl
33
+
34
+ @cell_magic
35
+ def cl_kernel(self, line, cell):
36
+ kernel = cell
37
+
38
+ opts, _args = self.parse_options(line, "o:")
39
+ build_options = opts.get("o", "")
40
+
41
+ self._run_kernel(kernel, build_options)
42
+
43
+ def _load_kernel_and_options(self, line):
44
+ opts, args = self.parse_options(line, "o:f:")
45
+
46
+ build_options = opts.get("o")
47
+ kernel = self.shell.find_user_code(opts.get("f") or args)
48
+
49
+ return kernel, build_options
50
+
51
+ @line_magic
52
+ def cl_kernel_from_file(self, line):
53
+ kernel, build_options = self._load_kernel_and_options(line)
54
+ self._run_kernel(kernel, build_options)
55
+
56
+ @line_magic
57
+ def cl_load_edit_kernel(self, line):
58
+ kernel, build_options = self._load_kernel_and_options(line)
59
+ header = "%%cl_kernel"
60
+
61
+ if build_options:
62
+ header = f'{header} -o "{build_options}"'
63
+
64
+ content = f"{header}\n\n{kernel}"
65
+
66
+ self.shell.set_next_input(content)
67
+
68
+
69
+ def load_ipython_extension(ip):
70
+ ip.register_magics(PyOpenCLMagics)
pyopencl/py.typed ADDED
File without changes