pyopencl 2025.2.7__cp314-cp314t-musllinux_1_2_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/.libs/libOpenCL-4a7ed9fc.so.1.0.0 +0 -0
- pyopencl/__init__.py +1995 -0
- pyopencl/_cl.cpython-314t-x86_64-linux-musl.so +0 -0
- pyopencl/_cl.pyi +2009 -0
- pyopencl/_cluda.py +57 -0
- pyopencl/_monkeypatch.py +1104 -0
- pyopencl/_mymako.py +17 -0
- pyopencl/algorithm.py +1454 -0
- pyopencl/array.py +3530 -0
- pyopencl/bitonic_sort.py +245 -0
- pyopencl/bitonic_sort_templates.py +597 -0
- pyopencl/cache.py +535 -0
- pyopencl/capture_call.py +200 -0
- pyopencl/characterize/__init__.py +461 -0
- pyopencl/characterize/performance.py +240 -0
- pyopencl/cl/pyopencl-airy.cl +324 -0
- pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
- pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
- pyopencl/cl/pyopencl-bessel-y.cl +435 -0
- pyopencl/cl/pyopencl-complex.h +303 -0
- pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
- pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
- pyopencl/cl/pyopencl-random123/array.h +325 -0
- pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
- pyopencl/cl/pyopencl-random123/philox.cl +486 -0
- pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
- pyopencl/clmath.py +281 -0
- pyopencl/clrandom.py +412 -0
- pyopencl/cltypes.py +217 -0
- pyopencl/compyte/.gitignore +21 -0
- pyopencl/compyte/__init__.py +0 -0
- pyopencl/compyte/array.py +211 -0
- pyopencl/compyte/dtypes.py +314 -0
- pyopencl/compyte/pyproject.toml +49 -0
- pyopencl/elementwise.py +1288 -0
- pyopencl/invoker.py +417 -0
- pyopencl/ipython_ext.py +70 -0
- pyopencl/py.typed +0 -0
- pyopencl/reduction.py +815 -0
- pyopencl/scan.py +1921 -0
- pyopencl/tools.py +1680 -0
- pyopencl/typing.py +61 -0
- pyopencl/version.py +11 -0
- pyopencl-2025.2.7.dist-info/METADATA +108 -0
- pyopencl-2025.2.7.dist-info/RECORD +47 -0
- pyopencl-2025.2.7.dist-info/WHEEL +5 -0
- pyopencl-2025.2.7.dist-info/licenses/LICENSE +104 -0
pyopencl/invoker.py
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
__copyright__ = """
|
|
5
|
+
Copyright (C) 2017 Andreas Kloeckner
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__license__ = """
|
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
11
|
+
in the Software without restriction, including without limitation the rights
|
|
12
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
14
|
+
furnished to do so, subject to the following conditions:
|
|
15
|
+
|
|
16
|
+
The above copyright notice and this permission notice shall be included in
|
|
17
|
+
all copies or substantial portions of the Software.
|
|
18
|
+
|
|
19
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
25
|
+
THE SOFTWARE.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from typing import Any
|
|
29
|
+
from warnings import warn
|
|
30
|
+
|
|
31
|
+
import numpy as np
|
|
32
|
+
|
|
33
|
+
from pytools.persistent_dict import WriteOncePersistentDict
|
|
34
|
+
from pytools.py_codegen import Indentation, PythonCodeGenerator
|
|
35
|
+
|
|
36
|
+
import pyopencl as cl
|
|
37
|
+
import pyopencl._cl as _cl
|
|
38
|
+
from pyopencl.tools import VectorArg, _NumpyTypesKeyBuilder
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# {{{ arg packing helpers
|
|
42
|
+
|
|
43
|
+
_size_t_char = ({
|
|
44
|
+
8: "Q",
|
|
45
|
+
4: "L",
|
|
46
|
+
2: "H",
|
|
47
|
+
1: "B",
|
|
48
|
+
})[_cl._sizeof_size_t()]
|
|
49
|
+
_type_char_map = {
|
|
50
|
+
"n": _size_t_char.lower(),
|
|
51
|
+
"N": _size_t_char
|
|
52
|
+
}
|
|
53
|
+
del _size_t_char
|
|
54
|
+
|
|
55
|
+
# }}}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# {{{ generic arg handling body
|
|
59
|
+
|
|
60
|
+
def generate_generic_arg_handling_body(num_args):
|
|
61
|
+
gen = PythonCodeGenerator()
|
|
62
|
+
|
|
63
|
+
if num_args == 0:
|
|
64
|
+
gen("pass")
|
|
65
|
+
else:
|
|
66
|
+
gen_indices_and_args = []
|
|
67
|
+
for i in range(num_args):
|
|
68
|
+
gen_indices_and_args.append(i)
|
|
69
|
+
gen_indices_and_args.append(f"arg{i}")
|
|
70
|
+
|
|
71
|
+
gen(f"self._set_arg_multi("
|
|
72
|
+
f"({', '.join(str(i) for i in gen_indices_and_args)},), "
|
|
73
|
+
")")
|
|
74
|
+
|
|
75
|
+
return gen
|
|
76
|
+
|
|
77
|
+
# }}}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# {{{ specific arg handling body
|
|
81
|
+
|
|
82
|
+
BUF_PACK_TYPECHARS = ["c", "b", "B", "h", "H", "i", "I", "l", "L", "f", "d"]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def generate_specific_arg_handling_body(function_name, num_cl_args, arg_types, *,
|
|
86
|
+
work_around_arg_count_bug, warn_about_arg_count_bug,
|
|
87
|
+
in_enqueue, include_debug_code):
|
|
88
|
+
|
|
89
|
+
assert work_around_arg_count_bug is not None
|
|
90
|
+
assert warn_about_arg_count_bug is not None
|
|
91
|
+
|
|
92
|
+
fp_arg_count = 0
|
|
93
|
+
cl_arg_idx = 0
|
|
94
|
+
|
|
95
|
+
gen = PythonCodeGenerator()
|
|
96
|
+
|
|
97
|
+
if not arg_types:
|
|
98
|
+
gen("pass")
|
|
99
|
+
|
|
100
|
+
gen_indices_and_args = []
|
|
101
|
+
buf_indices_and_args = []
|
|
102
|
+
buf_pack_indices_and_args = []
|
|
103
|
+
|
|
104
|
+
def add_buf_arg(arg_idx, typechar, expr_str):
|
|
105
|
+
if typechar in BUF_PACK_TYPECHARS:
|
|
106
|
+
buf_pack_indices_and_args.append(arg_idx)
|
|
107
|
+
buf_pack_indices_and_args.append(repr(typechar.encode()))
|
|
108
|
+
buf_pack_indices_and_args.append(expr_str)
|
|
109
|
+
else:
|
|
110
|
+
buf_indices_and_args.append(arg_idx)
|
|
111
|
+
buf_indices_and_args.append(f"pack('{typechar}', {expr_str})")
|
|
112
|
+
|
|
113
|
+
wait_for_parts = []
|
|
114
|
+
|
|
115
|
+
for arg_idx, arg_type in enumerate(arg_types):
|
|
116
|
+
arg_var = "arg%d" % arg_idx
|
|
117
|
+
|
|
118
|
+
if arg_type is None:
|
|
119
|
+
gen_indices_and_args.append(cl_arg_idx)
|
|
120
|
+
gen_indices_and_args.append(arg_var)
|
|
121
|
+
cl_arg_idx += 1
|
|
122
|
+
gen("")
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
elif isinstance(arg_type, VectorArg):
|
|
126
|
+
if include_debug_code:
|
|
127
|
+
gen(f"if not {arg_var}.flags.forc:")
|
|
128
|
+
with Indentation(gen):
|
|
129
|
+
gen("raise RuntimeError('only contiguous arrays may '")
|
|
130
|
+
gen(" 'be used as arguments to this operation')")
|
|
131
|
+
gen("")
|
|
132
|
+
|
|
133
|
+
if in_enqueue and include_debug_code:
|
|
134
|
+
gen(f"assert {arg_var}.queue is None or {arg_var}.queue == queue, "
|
|
135
|
+
"'queues for all arrays must match the queue supplied "
|
|
136
|
+
"to enqueue'")
|
|
137
|
+
|
|
138
|
+
gen_indices_and_args.append(cl_arg_idx)
|
|
139
|
+
gen_indices_and_args.append(f"{arg_var}.base_data")
|
|
140
|
+
cl_arg_idx += 1
|
|
141
|
+
|
|
142
|
+
if arg_type.with_offset:
|
|
143
|
+
add_buf_arg(cl_arg_idx, np.dtype(np.int64).char, f"{arg_var}.offset")
|
|
144
|
+
cl_arg_idx += 1
|
|
145
|
+
|
|
146
|
+
if in_enqueue:
|
|
147
|
+
wait_for_parts .append(f"{arg_var}.events")
|
|
148
|
+
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
arg_dtype = np.dtype(arg_type)
|
|
152
|
+
|
|
153
|
+
if arg_dtype.char == "V":
|
|
154
|
+
buf_indices_and_args.append(cl_arg_idx)
|
|
155
|
+
buf_indices_and_args.append(arg_var)
|
|
156
|
+
cl_arg_idx += 1
|
|
157
|
+
|
|
158
|
+
elif arg_dtype.kind == "c":
|
|
159
|
+
if warn_about_arg_count_bug:
|
|
160
|
+
warn("{knl_name}: arguments include complex numbers, and "
|
|
161
|
+
"some (but not all) of the target devices mishandle "
|
|
162
|
+
"struct kernel arguments (hence the workaround is "
|
|
163
|
+
"disabled".format(knl_name=function_name),
|
|
164
|
+
stacklevel=2)
|
|
165
|
+
|
|
166
|
+
if arg_dtype == np.complex64:
|
|
167
|
+
arg_char = "f"
|
|
168
|
+
elif arg_dtype == np.complex128:
|
|
169
|
+
arg_char = "d"
|
|
170
|
+
else:
|
|
171
|
+
raise TypeError("unexpected complex type: %s" % arg_dtype)
|
|
172
|
+
|
|
173
|
+
if (work_around_arg_count_bug == "pocl"
|
|
174
|
+
and arg_dtype == np.complex128
|
|
175
|
+
and fp_arg_count + 2 <= 8):
|
|
176
|
+
add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.real")
|
|
177
|
+
cl_arg_idx += 1
|
|
178
|
+
add_buf_arg(cl_arg_idx, arg_char, f"{arg_var}.imag")
|
|
179
|
+
cl_arg_idx += 1
|
|
180
|
+
|
|
181
|
+
elif (work_around_arg_count_bug == "apple"
|
|
182
|
+
and arg_dtype == np.complex128
|
|
183
|
+
and fp_arg_count + 2 <= 8):
|
|
184
|
+
raise NotImplementedError("No work-around to "
|
|
185
|
+
"Apple's broken structs-as-kernel arg "
|
|
186
|
+
"handling has been found. "
|
|
187
|
+
"Cannot pass complex numbers to kernels.")
|
|
188
|
+
|
|
189
|
+
else:
|
|
190
|
+
buf_indices_and_args.append(cl_arg_idx)
|
|
191
|
+
buf_indices_and_args.append(
|
|
192
|
+
f"pack('{arg_char}{arg_char}', {arg_var}.real, {arg_var}.imag)")
|
|
193
|
+
cl_arg_idx += 1
|
|
194
|
+
|
|
195
|
+
fp_arg_count += 2
|
|
196
|
+
|
|
197
|
+
else:
|
|
198
|
+
if arg_dtype.kind == "f":
|
|
199
|
+
fp_arg_count += 1
|
|
200
|
+
|
|
201
|
+
arg_char = arg_dtype.char
|
|
202
|
+
arg_char = _type_char_map.get(arg_char, arg_char)
|
|
203
|
+
add_buf_arg(cl_arg_idx, arg_char, arg_var)
|
|
204
|
+
cl_arg_idx += 1
|
|
205
|
+
|
|
206
|
+
gen("")
|
|
207
|
+
|
|
208
|
+
for arg_kind, args_and_indices, entry_length in [
|
|
209
|
+
("", gen_indices_and_args, 2),
|
|
210
|
+
("_buf", buf_indices_and_args, 2),
|
|
211
|
+
("_buf_pack", buf_pack_indices_and_args, 3),
|
|
212
|
+
]:
|
|
213
|
+
assert len(args_and_indices) % entry_length == 0
|
|
214
|
+
if args_and_indices:
|
|
215
|
+
gen(f"self._set_arg{arg_kind}_multi("
|
|
216
|
+
f"({', '.join(str(i) for i in args_and_indices)},), "
|
|
217
|
+
")")
|
|
218
|
+
|
|
219
|
+
if cl_arg_idx != num_cl_args:
|
|
220
|
+
raise TypeError(
|
|
221
|
+
"length of argument list (%d) and "
|
|
222
|
+
"CL-generated number of arguments (%d) do not agree"
|
|
223
|
+
% (cl_arg_idx, num_cl_args))
|
|
224
|
+
|
|
225
|
+
if in_enqueue:
|
|
226
|
+
return gen, wait_for_parts
|
|
227
|
+
else:
|
|
228
|
+
return gen
|
|
229
|
+
|
|
230
|
+
# }}}
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _generate_enqueue_and_set_args_module(function_name,
|
|
234
|
+
num_passed_args, num_cl_args,
|
|
235
|
+
arg_types, include_debug_code,
|
|
236
|
+
work_around_arg_count_bug, warn_about_arg_count_bug):
|
|
237
|
+
|
|
238
|
+
arg_names = ["arg%d" % i for i in range(num_passed_args)]
|
|
239
|
+
|
|
240
|
+
def gen_arg_setting(in_enqueue):
|
|
241
|
+
if arg_types is None:
|
|
242
|
+
result = generate_generic_arg_handling_body(num_passed_args)
|
|
243
|
+
if in_enqueue:
|
|
244
|
+
return result, []
|
|
245
|
+
else:
|
|
246
|
+
return result
|
|
247
|
+
|
|
248
|
+
else:
|
|
249
|
+
return generate_specific_arg_handling_body(
|
|
250
|
+
function_name, num_cl_args, arg_types,
|
|
251
|
+
warn_about_arg_count_bug=warn_about_arg_count_bug,
|
|
252
|
+
work_around_arg_count_bug=work_around_arg_count_bug,
|
|
253
|
+
in_enqueue=in_enqueue, include_debug_code=include_debug_code)
|
|
254
|
+
|
|
255
|
+
gen = PythonCodeGenerator()
|
|
256
|
+
|
|
257
|
+
gen("from struct import pack")
|
|
258
|
+
gen("from pyopencl import status_code")
|
|
259
|
+
gen("import numpy as np")
|
|
260
|
+
gen("import pyopencl._cl as _cl")
|
|
261
|
+
gen("")
|
|
262
|
+
|
|
263
|
+
# {{{ generate _enqueue
|
|
264
|
+
|
|
265
|
+
from pytools import to_identifier
|
|
266
|
+
enqueue_name = f"enqueue_knl_{to_identifier(function_name)}"
|
|
267
|
+
gen("def %s(%s):"
|
|
268
|
+
% (enqueue_name,
|
|
269
|
+
", ".join([
|
|
270
|
+
"self", "queue", "global_size", "local_size",
|
|
271
|
+
*arg_names,
|
|
272
|
+
"global_offset=None",
|
|
273
|
+
"g_times_l=False",
|
|
274
|
+
"allow_empty_ndrange=False",
|
|
275
|
+
"wait_for=None"])))
|
|
276
|
+
|
|
277
|
+
with Indentation(gen):
|
|
278
|
+
subgen, wait_for_parts = gen_arg_setting(in_enqueue=True)
|
|
279
|
+
gen.extend(subgen)
|
|
280
|
+
|
|
281
|
+
if wait_for_parts:
|
|
282
|
+
wait_for_expr = (
|
|
283
|
+
"[*(() if wait_for is None else wait_for), "
|
|
284
|
+
+ ", ".join("*"+wfp for wfp in wait_for_parts)
|
|
285
|
+
+ "]")
|
|
286
|
+
else:
|
|
287
|
+
wait_for_expr = "wait_for"
|
|
288
|
+
|
|
289
|
+
# Using positional args here because pybind is slow with keyword args
|
|
290
|
+
gen(f"""
|
|
291
|
+
return _cl.enqueue_nd_range_kernel(queue, self,
|
|
292
|
+
global_size, local_size, global_offset,
|
|
293
|
+
{wait_for_expr},
|
|
294
|
+
g_times_l, allow_empty_ndrange)
|
|
295
|
+
""")
|
|
296
|
+
|
|
297
|
+
# }}}
|
|
298
|
+
|
|
299
|
+
# {{{ generate set_args
|
|
300
|
+
|
|
301
|
+
gen("")
|
|
302
|
+
gen("def set_args(%s):"
|
|
303
|
+
% (", ".join(["self", *arg_names])))
|
|
304
|
+
|
|
305
|
+
with Indentation(gen):
|
|
306
|
+
gen.extend(gen_arg_setting(in_enqueue=False))
|
|
307
|
+
|
|
308
|
+
# }}}
|
|
309
|
+
|
|
310
|
+
return (
|
|
311
|
+
gen.get_picklable_module(
|
|
312
|
+
name_prefix=f"pyopencl invoker for '{function_name}'"),
|
|
313
|
+
enqueue_name)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
# {{{ Helper functions related to argument sizes and device limits
|
|
317
|
+
|
|
318
|
+
def _get_max_parameter_size(dev):
|
|
319
|
+
"""Return the device's maximum parameter size adjusted for PoCL."""
|
|
320
|
+
from pyopencl.characterize import get_pocl_version
|
|
321
|
+
|
|
322
|
+
dev_limit = dev.max_parameter_size
|
|
323
|
+
pocl_version = get_pocl_version(dev.platform, fallback_value=(1, 8))
|
|
324
|
+
if pocl_version is not None and pocl_version < (3, 0):
|
|
325
|
+
# Older PoCL versions (<3.0) have an incorrect parameter
|
|
326
|
+
# size limit of 1024; see e.g. https://github.com/pocl/pocl/pull/1046
|
|
327
|
+
if dev_limit == 1024:
|
|
328
|
+
if dev.type & cl.device_type.CPU:
|
|
329
|
+
return 1024*1024
|
|
330
|
+
if dev.type & cl.device_type.GPU:
|
|
331
|
+
# All modern Nvidia GPUs (starting from Compute Capability 2)
|
|
332
|
+
# have this limit
|
|
333
|
+
return 4352
|
|
334
|
+
|
|
335
|
+
return dev_limit
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def _check_arg_size(function_name, num_cl_args, arg_types, devs):
|
|
339
|
+
"""Check whether argument sizes exceed the OpenCL device limit."""
|
|
340
|
+
|
|
341
|
+
for dev in devs:
|
|
342
|
+
from pyopencl.characterize import nv_compute_capability
|
|
343
|
+
if nv_compute_capability(dev) is None:
|
|
344
|
+
# Only warn on Nvidia GPUs, because actual failures related to
|
|
345
|
+
# the device limit have been observed only on such devices.
|
|
346
|
+
continue
|
|
347
|
+
|
|
348
|
+
dev_ptr_size = int(dev.address_bits / 8)
|
|
349
|
+
dev_limit = _get_max_parameter_size(dev)
|
|
350
|
+
|
|
351
|
+
total_arg_size = 0
|
|
352
|
+
|
|
353
|
+
if arg_types:
|
|
354
|
+
for arg_type in arg_types:
|
|
355
|
+
if arg_type is None:
|
|
356
|
+
total_arg_size += dev_ptr_size
|
|
357
|
+
elif isinstance(arg_type, VectorArg):
|
|
358
|
+
total_arg_size += dev_ptr_size
|
|
359
|
+
else:
|
|
360
|
+
total_arg_size += np.dtype(arg_type).itemsize
|
|
361
|
+
else:
|
|
362
|
+
# Estimate that each argument has the size of a pointer on average
|
|
363
|
+
total_arg_size = dev_ptr_size * num_cl_args
|
|
364
|
+
|
|
365
|
+
if total_arg_size > dev_limit:
|
|
366
|
+
from warnings import warn
|
|
367
|
+
warn(f"Kernel '{function_name}' has {num_cl_args} arguments with "
|
|
368
|
+
f"a total size of {total_arg_size} bytes, which is higher than "
|
|
369
|
+
f"the limit of {dev_limit} bytes on {dev}. This might "
|
|
370
|
+
"lead to compilation errors, especially on GPU devices.",
|
|
371
|
+
stacklevel=3)
|
|
372
|
+
|
|
373
|
+
# }}}
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
if not cl._PYOPENCL_NO_CACHE:
|
|
377
|
+
from pytools.py_codegen import PicklableModule
|
|
378
|
+
invoker_cache: WriteOncePersistentDict[Any, tuple[PicklableModule, str]] \
|
|
379
|
+
= WriteOncePersistentDict(
|
|
380
|
+
"pyopencl-invoker-cache-v42-nano",
|
|
381
|
+
key_builder=_NumpyTypesKeyBuilder(),
|
|
382
|
+
in_mem_cache_size=0,
|
|
383
|
+
safe_sync=False)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def generate_enqueue_and_set_args(function_name,
|
|
387
|
+
num_passed_args, num_cl_args,
|
|
388
|
+
arg_types,
|
|
389
|
+
work_around_arg_count_bug, warn_about_arg_count_bug, devs):
|
|
390
|
+
|
|
391
|
+
_check_arg_size(function_name, num_cl_args, arg_types, devs)
|
|
392
|
+
|
|
393
|
+
cache_key = (function_name, num_passed_args, num_cl_args,
|
|
394
|
+
arg_types, __debug__,
|
|
395
|
+
work_around_arg_count_bug, warn_about_arg_count_bug)
|
|
396
|
+
|
|
397
|
+
from_cache = False
|
|
398
|
+
if not cl._PYOPENCL_NO_CACHE:
|
|
399
|
+
try:
|
|
400
|
+
pmod, enqueue_name = invoker_cache[cache_key]
|
|
401
|
+
from_cache = True
|
|
402
|
+
except KeyError:
|
|
403
|
+
pass
|
|
404
|
+
|
|
405
|
+
if not from_cache:
|
|
406
|
+
pmod, enqueue_name = _generate_enqueue_and_set_args_module(*cache_key)
|
|
407
|
+
if not cl._PYOPENCL_NO_CACHE:
|
|
408
|
+
invoker_cache.store_if_not_present(cache_key, (pmod, enqueue_name))
|
|
409
|
+
|
|
410
|
+
return (
|
|
411
|
+
pmod.mod_globals[enqueue_name],
|
|
412
|
+
pmod.mod_globals["set_args"])
|
|
413
|
+
|
|
414
|
+
# }}}
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
# vim: foldmethod=marker
|
pyopencl/ipython_ext.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from IPython.core.magic import Magics, cell_magic, line_magic, magics_class
|
|
4
|
+
|
|
5
|
+
import pyopencl as cl
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@magics_class
|
|
9
|
+
class PyOpenCLMagics(Magics):
|
|
10
|
+
def _run_kernel(self, kernel, options):
|
|
11
|
+
try:
|
|
12
|
+
ctx = self.shell.user_ns["cl_ctx"]
|
|
13
|
+
except KeyError:
|
|
14
|
+
ctx = None
|
|
15
|
+
|
|
16
|
+
if not isinstance(ctx, cl.Context):
|
|
17
|
+
ctx = None
|
|
18
|
+
|
|
19
|
+
if ctx is None:
|
|
20
|
+
try:
|
|
21
|
+
ctx = self.shell.user_ns["ctx"]
|
|
22
|
+
except KeyError:
|
|
23
|
+
ctx = None
|
|
24
|
+
|
|
25
|
+
if ctx is None or not isinstance(ctx, cl.Context):
|
|
26
|
+
raise RuntimeError("unable to locate cl context, which must be "
|
|
27
|
+
"present in namespace as 'cl_ctx' or 'ctx'")
|
|
28
|
+
|
|
29
|
+
prg = cl.Program(ctx, kernel).build(options=options.split())
|
|
30
|
+
|
|
31
|
+
for knl in prg.all_kernels():
|
|
32
|
+
self.shell.user_ns[knl.function_name] = knl
|
|
33
|
+
|
|
34
|
+
@cell_magic
|
|
35
|
+
def cl_kernel(self, line, cell):
|
|
36
|
+
kernel = cell
|
|
37
|
+
|
|
38
|
+
opts, _args = self.parse_options(line, "o:")
|
|
39
|
+
build_options = opts.get("o", "")
|
|
40
|
+
|
|
41
|
+
self._run_kernel(kernel, build_options)
|
|
42
|
+
|
|
43
|
+
def _load_kernel_and_options(self, line):
|
|
44
|
+
opts, args = self.parse_options(line, "o:f:")
|
|
45
|
+
|
|
46
|
+
build_options = opts.get("o")
|
|
47
|
+
kernel = self.shell.find_user_code(opts.get("f") or args)
|
|
48
|
+
|
|
49
|
+
return kernel, build_options
|
|
50
|
+
|
|
51
|
+
@line_magic
|
|
52
|
+
def cl_kernel_from_file(self, line):
|
|
53
|
+
kernel, build_options = self._load_kernel_and_options(line)
|
|
54
|
+
self._run_kernel(kernel, build_options)
|
|
55
|
+
|
|
56
|
+
@line_magic
|
|
57
|
+
def cl_load_edit_kernel(self, line):
|
|
58
|
+
kernel, build_options = self._load_kernel_and_options(line)
|
|
59
|
+
header = "%%cl_kernel"
|
|
60
|
+
|
|
61
|
+
if build_options:
|
|
62
|
+
header = f'{header} -o "{build_options}"'
|
|
63
|
+
|
|
64
|
+
content = f"{header}\n\n{kernel}"
|
|
65
|
+
|
|
66
|
+
self.shell.set_next_input(content)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def load_ipython_extension(ip):
|
|
70
|
+
ip.register_magics(PyOpenCLMagics)
|
pyopencl/py.typed
ADDED
|
File without changes
|