pyopencl 2024.2.6__cp39-cp39-win_amd64.whl → 2024.3__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +127 -122
- pyopencl/_cl.cp39-win_amd64.pyd +0 -0
- pyopencl/_mymako.py +3 -3
- pyopencl/algorithm.py +10 -7
- pyopencl/array.py +50 -40
- pyopencl/bitonic_sort.py +3 -1
- pyopencl/bitonic_sort_templates.py +1 -1
- pyopencl/cache.py +23 -22
- pyopencl/capture_call.py +5 -4
- pyopencl/clrandom.py +1 -0
- pyopencl/compyte/dtypes.py +4 -4
- pyopencl/compyte/pyproject.toml +54 -0
- pyopencl/elementwise.py +9 -2
- pyopencl/invoker.py +11 -9
- pyopencl/ipython_ext.py +1 -1
- pyopencl/reduction.py +16 -10
- pyopencl/scan.py +38 -22
- pyopencl/tools.py +23 -13
- {pyopencl-2024.2.6.dist-info → pyopencl-2024.3.dist-info}/METADATA +11 -8
- pyopencl-2024.3.dist-info/RECORD +42 -0
- {pyopencl-2024.2.6.dist-info → pyopencl-2024.3.dist-info}/WHEEL +1 -1
- pyopencl/compyte/.git +0 -1
- pyopencl/compyte/ndarray/Makefile +0 -31
- pyopencl/compyte/ndarray/__init__.py +0 -0
- pyopencl/compyte/ndarray/gen_elemwise.py +0 -1907
- pyopencl/compyte/ndarray/gen_reduction.py +0 -1511
- pyopencl/compyte/ndarray/gpu_ndarray.h +0 -35
- pyopencl/compyte/ndarray/pygpu_language.h +0 -207
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +0 -622
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +0 -317
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +0 -1546
- pyopencl/compyte/ndarray/pygpu_ndarray.h +0 -71
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +0 -232
- pyopencl/compyte/ndarray/setup_opencl.py +0 -101
- pyopencl/compyte/ndarray/test_gpu_elemwise.py +0 -411
- pyopencl/compyte/ndarray/test_gpu_ndarray.py +0 -487
- pyopencl-2024.2.6.dist-info/RECORD +0 -56
- {pyopencl-2024.2.6.dist-info → pyopencl-2024.3.dist-info}/licenses/LICENSE +0 -0
pyopencl/array.py
CHANGED
|
@@ -42,9 +42,12 @@ import pyopencl.elementwise as elementwise
|
|
|
42
42
|
from pyopencl import cltypes
|
|
43
43
|
from pyopencl.characterize import has_double_support
|
|
44
44
|
from pyopencl.compyte.array import (
|
|
45
|
-
ArrayFlags as _ArrayFlags,
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
ArrayFlags as _ArrayFlags,
|
|
46
|
+
as_strided as _as_strided,
|
|
47
|
+
c_contiguous_strides as _c_contiguous_strides,
|
|
48
|
+
equal_strides as _equal_strides,
|
|
49
|
+
f_contiguous_strides as _f_contiguous_strides,
|
|
50
|
+
)
|
|
48
51
|
|
|
49
52
|
|
|
50
53
|
SCALAR_CLASSES = (Number, np.bool_, bool)
|
|
@@ -259,7 +262,7 @@ def _splay(device, n, kernel_specific_max_wg_size=None):
|
|
|
259
262
|
group_count = max_groups
|
|
260
263
|
work_items_per_group = max_work_items
|
|
261
264
|
|
|
262
|
-
#print
|
|
265
|
+
# print("n:%d gc:%d wipg:%d" % (n, group_count, work_items_per_group))
|
|
263
266
|
return (group_count*work_items_per_group,), (work_items_per_group,)
|
|
264
267
|
|
|
265
268
|
|
|
@@ -292,7 +295,7 @@ def elwise_kernel_runner(kernel_getter):
|
|
|
292
295
|
queue.device)
|
|
293
296
|
gs, ls = out._get_sizes(queue, work_group_info)
|
|
294
297
|
|
|
295
|
-
args = (out,
|
|
298
|
+
args = (out, *args, out.size)
|
|
296
299
|
if ARRAY_KERNEL_EXEC_HOOK is not None:
|
|
297
300
|
return ARRAY_KERNEL_EXEC_HOOK( # pylint: disable=not-callable
|
|
298
301
|
knl, queue, gs, ls, *args, wait_for=wait_for)
|
|
@@ -587,7 +590,7 @@ class Array:
|
|
|
587
590
|
# FIXME It would be nice to check this. But it would require
|
|
588
591
|
# changing the allocator interface. Trust the user for now.
|
|
589
592
|
|
|
590
|
-
#assert allocator.context == context
|
|
593
|
+
# assert allocator.context == context
|
|
591
594
|
pass
|
|
592
595
|
|
|
593
596
|
# Queue-less arrays do have a purpose in life.
|
|
@@ -608,11 +611,11 @@ class Array:
|
|
|
608
611
|
|
|
609
612
|
try:
|
|
610
613
|
shape = tuple(shape) # type: ignore[arg-type]
|
|
611
|
-
except TypeError:
|
|
614
|
+
except TypeError as err:
|
|
612
615
|
if not isinstance(shape, (int, np.integer)):
|
|
613
616
|
raise TypeError(
|
|
614
617
|
"shape must either be iterable or castable to an integer: "
|
|
615
|
-
f"got a '{type(shape).__name__}'")
|
|
618
|
+
f"got a '{type(shape).__name__}'") from err
|
|
616
619
|
|
|
617
620
|
shape = (shape,)
|
|
618
621
|
|
|
@@ -654,7 +657,7 @@ class Array:
|
|
|
654
657
|
# }}}
|
|
655
658
|
|
|
656
659
|
assert dtype != object, \
|
|
657
|
-
"object arrays on the compute device are not allowed"
|
|
660
|
+
"object arrays on the compute device are not allowed" # noqa: E721
|
|
658
661
|
assert isinstance(shape, tuple)
|
|
659
662
|
assert isinstance(strides, tuple)
|
|
660
663
|
|
|
@@ -922,7 +925,7 @@ class Array:
|
|
|
922
925
|
"device-to-host transfers",
|
|
923
926
|
DeprecationWarning, stacklevel=2)
|
|
924
927
|
|
|
925
|
-
ary,
|
|
928
|
+
ary, _event1 = self._get(queue=queue, ary=ary, async_=async_, **kwargs)
|
|
926
929
|
|
|
927
930
|
return ary
|
|
928
931
|
|
|
@@ -2047,9 +2050,10 @@ class Array:
|
|
|
2047
2050
|
|
|
2048
2051
|
.. versionadded:: 2015.2
|
|
2049
2052
|
"""
|
|
2050
|
-
new_shape = tuple(
|
|
2051
|
-
new_strides = tuple(
|
|
2052
|
-
for i, dim in enumerate(self.shape)
|
|
2053
|
+
new_shape = tuple(dim for dim in self.shape if dim > 1)
|
|
2054
|
+
new_strides = tuple(
|
|
2055
|
+
self.strides[i] for i, dim in enumerate(self.shape)
|
|
2056
|
+
if dim > 1)
|
|
2053
2057
|
|
|
2054
2058
|
return self._new_with_changes(
|
|
2055
2059
|
self.base_data, self.offset,
|
|
@@ -2595,14 +2599,16 @@ def multi_take(arrays, indices, out=None, queue=None):
|
|
|
2595
2599
|
cl.kernel_work_group_info.WORK_GROUP_SIZE,
|
|
2596
2600
|
queue.device))
|
|
2597
2601
|
|
|
2598
|
-
wait_for_this = (
|
|
2599
|
-
|
|
2600
|
-
|
|
2602
|
+
wait_for_this = (
|
|
2603
|
+
*indices.events,
|
|
2604
|
+
*[evt for i in arrays[chunk_slice] for evt in i.events],
|
|
2605
|
+
*[evt for o in out[chunk_slice] for evt in o.events])
|
|
2601
2606
|
evt = knl(queue, gs, ls,
|
|
2602
2607
|
indices.data,
|
|
2603
|
-
*
|
|
2604
|
-
|
|
2605
|
-
|
|
2608
|
+
*[o.data for o in out[chunk_slice]],
|
|
2609
|
+
*[i.data for i in arrays[chunk_slice]],
|
|
2610
|
+
*[indices.size],
|
|
2611
|
+
wait_for=wait_for_this)
|
|
2606
2612
|
for o in out[chunk_slice]:
|
|
2607
2613
|
o.add_event(evt)
|
|
2608
2614
|
|
|
@@ -2673,15 +2679,19 @@ def multi_take_put(arrays, dest_indices, src_indices, dest_shape=None,
|
|
|
2673
2679
|
cl.kernel_work_group_info.WORK_GROUP_SIZE,
|
|
2674
2680
|
queue.device))
|
|
2675
2681
|
|
|
2676
|
-
wait_for_this = (
|
|
2677
|
-
|
|
2678
|
-
|
|
2682
|
+
wait_for_this = (
|
|
2683
|
+
*dest_indices.events,
|
|
2684
|
+
*src_indices.events,
|
|
2685
|
+
*[evt for i in arrays[chunk_slice] for evt in i.events],
|
|
2686
|
+
*[evt for o in out[chunk_slice] for evt in o.events])
|
|
2679
2687
|
evt = knl(queue, gs, ls,
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2688
|
+
*out[chunk_slice],
|
|
2689
|
+
dest_indices,
|
|
2690
|
+
src_indices,
|
|
2691
|
+
*arrays[chunk_slice],
|
|
2692
|
+
*src_offsets_list[chunk_slice],
|
|
2693
|
+
src_indices.size,
|
|
2694
|
+
wait_for=wait_for_this)
|
|
2685
2695
|
for o in out[chunk_slice]:
|
|
2686
2696
|
o.add_event(evt)
|
|
2687
2697
|
|
|
@@ -2750,16 +2760,16 @@ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None,
|
|
|
2750
2760
|
cl.kernel_work_group_info.WORK_GROUP_SIZE,
|
|
2751
2761
|
queue.device))
|
|
2752
2762
|
|
|
2753
|
-
wait_for_this = (
|
|
2754
|
-
|
|
2755
|
-
|
|
2763
|
+
wait_for_this = (
|
|
2764
|
+
*wait_for,
|
|
2765
|
+
*[evt for i in arrays[chunk_slice] for evt in i.events],
|
|
2766
|
+
*[evt for o in out[chunk_slice] for evt in o.events])
|
|
2756
2767
|
evt = knl(queue, gs, ls,
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
wait_for=wait_for_this)
|
|
2768
|
+
*out[chunk_slice],
|
|
2769
|
+
dest_indices,
|
|
2770
|
+
*arrays[chunk_slice],
|
|
2771
|
+
use_fill_cla, array_lengths_cla, dest_indices.size,
|
|
2772
|
+
wait_for=wait_for_this)
|
|
2763
2773
|
|
|
2764
2774
|
for o in out[chunk_slice]:
|
|
2765
2775
|
o.add_event(evt)
|
|
@@ -2874,7 +2884,7 @@ def hstack(arrays, queue=None):
|
|
|
2874
2884
|
|
|
2875
2885
|
lead_shape = single_valued(ary.shape[:-1] for ary in arrays)
|
|
2876
2886
|
|
|
2877
|
-
w = builtins.sum(
|
|
2887
|
+
w = builtins.sum(ary.shape[-1] for ary in arrays)
|
|
2878
2888
|
|
|
2879
2889
|
if __debug__:
|
|
2880
2890
|
if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
|
|
@@ -2883,7 +2893,7 @@ def hstack(arrays, queue=None):
|
|
|
2883
2893
|
"an instance of the type of arrays[0]",
|
|
2884
2894
|
stacklevel=2)
|
|
2885
2895
|
|
|
2886
|
-
result = arrays[0].__class__(queue, lead_shape
|
|
2896
|
+
result = arrays[0].__class__(queue, (*lead_shape, w), arrays[0].dtype,
|
|
2887
2897
|
allocator=arrays[0].allocator)
|
|
2888
2898
|
index = 0
|
|
2889
2899
|
for ary in arrays:
|
|
@@ -3150,8 +3160,8 @@ def _logical_op(x1, x2, out, operator, queue=None):
|
|
|
3150
3160
|
else:
|
|
3151
3161
|
out[:] = np.logical_or(x1, x2)
|
|
3152
3162
|
elif np.isscalar(x1) or np.isscalar(x2):
|
|
3153
|
-
scalar_arg, =
|
|
3154
|
-
ary_arg, =
|
|
3163
|
+
scalar_arg, = (x for x in (x1, x2) if np.isscalar(x))
|
|
3164
|
+
ary_arg, = (x for x in (x1, x2) if not np.isscalar(x))
|
|
3155
3165
|
queue = queue or ary_arg.queue
|
|
3156
3166
|
allocator = ary_arg.allocator
|
|
3157
3167
|
|
pyopencl/bitonic_sort.py
CHANGED
|
@@ -35,8 +35,10 @@ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
35
35
|
|
|
36
36
|
from functools import reduce
|
|
37
37
|
from operator import mul
|
|
38
|
+
from typing import ClassVar, Dict
|
|
38
39
|
|
|
39
40
|
from mako.template import Template
|
|
41
|
+
|
|
40
42
|
from pytools import memoize_method
|
|
41
43
|
|
|
42
44
|
import pyopencl as cl
|
|
@@ -62,7 +64,7 @@ class BitonicSort:
|
|
|
62
64
|
.. automethod:: __call__
|
|
63
65
|
"""
|
|
64
66
|
|
|
65
|
-
kernels_srcs = {
|
|
67
|
+
kernels_srcs: ClassVar[Dict[str, str]] = {
|
|
66
68
|
"B2": _tmpl.ParallelBitonic_B2,
|
|
67
69
|
"B4": _tmpl.ParallelBitonic_B4,
|
|
68
70
|
"B8": _tmpl.ParallelBitonic_B8,
|
pyopencl/cache.py
CHANGED
|
@@ -42,12 +42,14 @@ import hashlib
|
|
|
42
42
|
new_hash = hashlib.md5
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def _erase_dir(
|
|
45
|
+
def _erase_dir(directory):
|
|
46
46
|
from os import listdir, rmdir, unlink
|
|
47
47
|
from os.path import join
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
|
|
49
|
+
for name in listdir(directory):
|
|
50
|
+
unlink(join(directory, name))
|
|
51
|
+
|
|
52
|
+
rmdir(directory)
|
|
51
53
|
|
|
52
54
|
|
|
53
55
|
def update_checksum(checksum, obj):
|
|
@@ -213,7 +215,7 @@ def get_dependencies(src, include_path):
|
|
|
213
215
|
|
|
214
216
|
_inner(src)
|
|
215
217
|
|
|
216
|
-
result = [(name,
|
|
218
|
+
result = [(name, *vals) for name, vals in result.items()]
|
|
217
219
|
result.sort()
|
|
218
220
|
|
|
219
221
|
return result
|
|
@@ -266,7 +268,7 @@ def get_cache_key(device, options_bytes, src):
|
|
|
266
268
|
|
|
267
269
|
|
|
268
270
|
def retrieve_from_cache(cache_dir, cache_key):
|
|
269
|
-
class
|
|
271
|
+
class _InvalidInfoFileError(RuntimeError):
|
|
270
272
|
pass
|
|
271
273
|
|
|
272
274
|
from os.path import isdir, join
|
|
@@ -290,18 +292,18 @@ def retrieve_from_cache(cache_dir, cache_key):
|
|
|
290
292
|
|
|
291
293
|
try:
|
|
292
294
|
info_file = open(info_path, "rb")
|
|
293
|
-
except OSError:
|
|
294
|
-
raise
|
|
295
|
+
except OSError as err:
|
|
296
|
+
raise _InvalidInfoFileError() from err
|
|
295
297
|
|
|
296
298
|
try:
|
|
297
299
|
try:
|
|
298
300
|
info = load(info_file)
|
|
299
|
-
except EOFError:
|
|
300
|
-
raise
|
|
301
|
+
except EOFError as err:
|
|
302
|
+
raise _InvalidInfoFileError() from err
|
|
301
303
|
finally:
|
|
302
304
|
info_file.close()
|
|
303
305
|
|
|
304
|
-
except
|
|
306
|
+
except _InvalidInfoFileError:
|
|
305
307
|
mod_cache_dir_m.reset()
|
|
306
308
|
from warnings import warn
|
|
307
309
|
warn(
|
|
@@ -375,13 +377,13 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
|
|
|
375
377
|
cache_result = retrieve_from_cache(cache_dir, cache_key)
|
|
376
378
|
|
|
377
379
|
if cache_result is None:
|
|
378
|
-
logger.debug("build program: binary cache miss (key: %s)"
|
|
380
|
+
logger.debug("build program: binary cache miss (key: %s)", cache_key)
|
|
379
381
|
|
|
380
382
|
to_be_built_indices.append(i)
|
|
381
383
|
binaries.append(None)
|
|
382
384
|
logs.append(None)
|
|
383
385
|
else:
|
|
384
|
-
logger.debug("build program: binary cache hit (key: %s)"
|
|
386
|
+
logger.debug("build program: binary cache hit (key: %s)", cache_key)
|
|
385
387
|
|
|
386
388
|
binary, log = cache_result
|
|
387
389
|
binaries.append(binary)
|
|
@@ -410,8 +412,9 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
|
|
|
410
412
|
src = src + "\n\n__constant int pyopencl_defeat_cache_%s = 0;" % (
|
|
411
413
|
uuid4().hex)
|
|
412
414
|
|
|
413
|
-
logger.debug(
|
|
414
|
-
|
|
415
|
+
logger.debug(
|
|
416
|
+
"build program: start building program from source on %s",
|
|
417
|
+
", ".join(str(devices[i]) for i in to_be_built_indices))
|
|
415
418
|
|
|
416
419
|
prg = _cl._Program(ctx, src)
|
|
417
420
|
prg.build(options_bytes, [devices[i] for i in to_be_built_indices])
|
|
@@ -459,13 +462,11 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
|
|
|
459
462
|
binary_path = mod_cache_dir_m.sub("binary")
|
|
460
463
|
source_path = mod_cache_dir_m.sub("source.cl")
|
|
461
464
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
outf.close()
|
|
465
|
+
with open(source_path, "w") as outf:
|
|
466
|
+
outf.write(src)
|
|
465
467
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
outf.close()
|
|
468
|
+
with open(binary_path, "wb") as outf:
|
|
469
|
+
outf.write(binary)
|
|
469
470
|
|
|
470
471
|
from pickle import dump
|
|
471
472
|
info_file = open(info_path, "wb")
|
|
@@ -504,7 +505,7 @@ def create_built_program_from_source_cached(ctx, src, options_bytes, devices=Non
|
|
|
504
505
|
except Exception as e:
|
|
505
506
|
from pyopencl import Error
|
|
506
507
|
build_program_failure = (isinstance(e, Error)
|
|
507
|
-
and e.code == _cl.status_code.BUILD_PROGRAM_FAILURE) #
|
|
508
|
+
and e.code == _cl.status_code.BUILD_PROGRAM_FAILURE) # pylint:disable=no-member
|
|
508
509
|
|
|
509
510
|
# Mac error on intel CPU driver: can't build from cached version.
|
|
510
511
|
# If we get a build_program_failure from the cached version then
|
pyopencl/capture_call.py
CHANGED
|
@@ -22,6 +22,7 @@ THE SOFTWARE.
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
import numpy as np
|
|
25
|
+
|
|
25
26
|
from pytools.py_codegen import Indentation, PythonCodeGenerator
|
|
26
27
|
|
|
27
28
|
import pyopencl as cl
|
|
@@ -30,8 +31,8 @@ import pyopencl as cl
|
|
|
30
31
|
def capture_kernel_call(kernel, output_file, queue, g_size, l_size, *args, **kwargs):
|
|
31
32
|
try:
|
|
32
33
|
source = kernel._source
|
|
33
|
-
except AttributeError:
|
|
34
|
-
raise RuntimeError("cannot capture call, kernel source not available")
|
|
34
|
+
except AttributeError as err:
|
|
35
|
+
raise RuntimeError("cannot capture call, kernel source not available") from err
|
|
35
36
|
|
|
36
37
|
if source is None:
|
|
37
38
|
raise RuntimeError("cannot capture call, kernel source not available")
|
|
@@ -91,9 +92,9 @@ def capture_kernel_call(kernel, output_file, queue, g_size, l_size, *args, **kwa
|
|
|
91
92
|
else:
|
|
92
93
|
try:
|
|
93
94
|
arg_buf = memoryview(arg)
|
|
94
|
-
except Exception:
|
|
95
|
+
except Exception as err:
|
|
95
96
|
raise RuntimeError("cannot capture: "
|
|
96
|
-
"unsupported arg nr %d (0-based)" % i)
|
|
97
|
+
"unsupported arg nr %d (0-based)" % i) from err
|
|
97
98
|
|
|
98
99
|
arg_data.append(("arg%d_data" % i, arg_buf))
|
|
99
100
|
kernel_args.append("decompress(b64decode(arg%d_data))" % i)
|
pyopencl/clrandom.py
CHANGED
pyopencl/compyte/dtypes.py
CHANGED
|
@@ -29,7 +29,7 @@ OTHER DEALINGS IN THE SOFTWARE.
|
|
|
29
29
|
import numpy as np
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
class TypeNameNotKnown(RuntimeError):
|
|
32
|
+
class TypeNameNotKnown(RuntimeError): # noqa: N818
|
|
33
33
|
pass
|
|
34
34
|
|
|
35
35
|
|
|
@@ -89,7 +89,7 @@ class DTypeRegistry:
|
|
|
89
89
|
|
|
90
90
|
if not existed:
|
|
91
91
|
self.dtype_to_name[dtype] = c_names[0]
|
|
92
|
-
if
|
|
92
|
+
if str(dtype) not in self.dtype_to_name:
|
|
93
93
|
self.dtype_to_name[str(dtype)] = c_names[0]
|
|
94
94
|
|
|
95
95
|
return dtype
|
|
@@ -103,7 +103,7 @@ class DTypeRegistry:
|
|
|
103
103
|
try:
|
|
104
104
|
return self.dtype_to_name[dtype]
|
|
105
105
|
except KeyError:
|
|
106
|
-
raise ValueError("unable to map dtype '%s'" % dtype)
|
|
106
|
+
raise ValueError("unable to map dtype '%s'" % dtype) from None
|
|
107
107
|
|
|
108
108
|
# }}}
|
|
109
109
|
|
|
@@ -260,7 +260,7 @@ def parse_c_arg_backend(c_arg, scalar_arg_factory, vec_arg_factory,
|
|
|
260
260
|
try:
|
|
261
261
|
dtype = name_to_dtype(tp)
|
|
262
262
|
except KeyError:
|
|
263
|
-
raise ValueError("unknown type '%s'" % tp)
|
|
263
|
+
raise ValueError("unknown type '%s'" % tp) from None
|
|
264
264
|
|
|
265
265
|
return arg_class(dtype, name)
|
|
266
266
|
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[tool.ruff]
|
|
2
|
+
preview = true
|
|
3
|
+
|
|
4
|
+
[tool.ruff.lint]
|
|
5
|
+
extend-select = [
|
|
6
|
+
"B", # flake8-bugbear
|
|
7
|
+
"C", # flake8-comprehensions
|
|
8
|
+
"E", # pycodestyle
|
|
9
|
+
"F", # pyflakes
|
|
10
|
+
|
|
11
|
+
"I", # flake8-isort
|
|
12
|
+
|
|
13
|
+
"N", # pep8-naming
|
|
14
|
+
"NPY", # numpy
|
|
15
|
+
"Q", # flake8-quotes
|
|
16
|
+
"W", # pycodestyle
|
|
17
|
+
|
|
18
|
+
# TODO
|
|
19
|
+
# "UP", # pyupgrade
|
|
20
|
+
# "RUF", # ruff
|
|
21
|
+
]
|
|
22
|
+
extend-ignore = [
|
|
23
|
+
"C90", # McCabe complexity
|
|
24
|
+
"E221", # multiple spaces before operator
|
|
25
|
+
"E241", # multiple spaces after comma
|
|
26
|
+
"E402", # module level import not at the top of file
|
|
27
|
+
"E226", # missing whitespace around operator
|
|
28
|
+
"N817", # CamelCase `SubstitutionRuleMappingContext` imported as acronym `SRMC`
|
|
29
|
+
|
|
30
|
+
# FIXME
|
|
31
|
+
"NPY002", # numpy rng
|
|
32
|
+
"C408", # unnecssary dict() -> literal
|
|
33
|
+
"E265", # block comment should start with
|
|
34
|
+
"F841", # local variable unused
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[tool.ruff.lint.per-file-ignores]
|
|
38
|
+
"ndarray/**/*.py" = ["Q", "B", "E", "F", "N", "C4"]
|
|
39
|
+
|
|
40
|
+
[tool.ruff.lint.flake8-quotes]
|
|
41
|
+
docstring-quotes = "double"
|
|
42
|
+
inline-quotes = "double"
|
|
43
|
+
multiline-quotes = "double"
|
|
44
|
+
|
|
45
|
+
[tool.ruff.lint.isort]
|
|
46
|
+
combine-as-imports = true
|
|
47
|
+
known-first-party = [
|
|
48
|
+
"pytools",
|
|
49
|
+
"pymbolic",
|
|
50
|
+
]
|
|
51
|
+
known-local-folder = [
|
|
52
|
+
"modepy",
|
|
53
|
+
]
|
|
54
|
+
lines-after-imports = 2
|
pyopencl/elementwise.py
CHANGED
|
@@ -31,12 +31,19 @@ import enum
|
|
|
31
31
|
from typing import Any, List, Optional, Tuple, Union
|
|
32
32
|
|
|
33
33
|
import numpy as np
|
|
34
|
+
|
|
34
35
|
from pytools import memoize_method
|
|
35
36
|
|
|
36
37
|
import pyopencl as cl
|
|
37
38
|
from pyopencl.tools import (
|
|
38
|
-
DtypedArgument,
|
|
39
|
-
|
|
39
|
+
DtypedArgument,
|
|
40
|
+
KernelTemplateBase,
|
|
41
|
+
ScalarArg,
|
|
42
|
+
VectorArg,
|
|
43
|
+
context_dependent_memoize,
|
|
44
|
+
dtype_to_c_struct,
|
|
45
|
+
dtype_to_ctype,
|
|
46
|
+
)
|
|
40
47
|
|
|
41
48
|
|
|
42
49
|
# {{{ elementwise kernel code generator
|
pyopencl/invoker.py
CHANGED
|
@@ -26,6 +26,7 @@ from typing import Any, Tuple
|
|
|
26
26
|
from warnings import warn
|
|
27
27
|
|
|
28
28
|
import numpy as np
|
|
29
|
+
|
|
29
30
|
from pytools.persistent_dict import WriteOncePersistentDict
|
|
30
31
|
from pytools.py_codegen import Indentation, PythonCodeGenerator
|
|
31
32
|
|
|
@@ -258,16 +259,17 @@ def _generate_enqueue_and_set_args_module(function_name,
|
|
|
258
259
|
|
|
259
260
|
# {{{ generate _enqueue
|
|
260
261
|
|
|
261
|
-
|
|
262
|
+
from pytools import to_identifier
|
|
263
|
+
enqueue_name = f"enqueue_knl_{to_identifier(function_name)}"
|
|
262
264
|
gen("def %s(%s):"
|
|
263
265
|
% (enqueue_name,
|
|
264
|
-
", ".join(
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
266
|
+
", ".join([
|
|
267
|
+
"self", "queue", "global_size", "local_size",
|
|
268
|
+
*arg_names,
|
|
269
|
+
"global_offset=None",
|
|
270
|
+
"g_times_l=False",
|
|
271
|
+
"allow_empty_ndrange=False",
|
|
272
|
+
"wait_for=None"])))
|
|
271
273
|
|
|
272
274
|
with Indentation(gen):
|
|
273
275
|
subgen, wait_for_parts = gen_arg_setting(in_enqueue=True)
|
|
@@ -295,7 +297,7 @@ def _generate_enqueue_and_set_args_module(function_name,
|
|
|
295
297
|
|
|
296
298
|
gen("")
|
|
297
299
|
gen("def set_args(%s):"
|
|
298
|
-
% (", ".join(["self"
|
|
300
|
+
% (", ".join(["self", *arg_names])))
|
|
299
301
|
|
|
300
302
|
with Indentation(gen):
|
|
301
303
|
gen.extend(gen_arg_setting(in_enqueue=False))
|
pyopencl/ipython_ext.py
CHANGED
|
@@ -33,7 +33,7 @@ class PyOpenCLMagics(Magics):
|
|
|
33
33
|
def cl_kernel(self, line, cell):
|
|
34
34
|
kernel = cell
|
|
35
35
|
|
|
36
|
-
opts,
|
|
36
|
+
opts, _args = self.parse_options(line, "o:")
|
|
37
37
|
build_options = opts.get("o", "")
|
|
38
38
|
|
|
39
39
|
self._run_kernel(kernel, build_options)
|
pyopencl/reduction.py
CHANGED
|
@@ -35,8 +35,12 @@ import numpy as np
|
|
|
35
35
|
|
|
36
36
|
import pyopencl as cl
|
|
37
37
|
from pyopencl.tools import (
|
|
38
|
-
DtypedArgument,
|
|
39
|
-
|
|
38
|
+
DtypedArgument,
|
|
39
|
+
KernelTemplateBase,
|
|
40
|
+
_process_code_for_macro,
|
|
41
|
+
context_dependent_memoize,
|
|
42
|
+
dtype_to_ctype,
|
|
43
|
+
)
|
|
40
44
|
|
|
41
45
|
|
|
42
46
|
# {{{ kernel source
|
|
@@ -219,8 +223,11 @@ def get_reduction_kernel(
|
|
|
219
223
|
map_expr = "pyopencl_reduction_inp[i]" if stage == 2 else "in[i]"
|
|
220
224
|
|
|
221
225
|
from pyopencl.tools import (
|
|
222
|
-
VectorArg,
|
|
223
|
-
|
|
226
|
+
VectorArg,
|
|
227
|
+
get_arg_list_scalar_arg_dtypes,
|
|
228
|
+
get_arg_offset_adjuster_code,
|
|
229
|
+
parse_arg_list,
|
|
230
|
+
)
|
|
224
231
|
|
|
225
232
|
if arguments is None:
|
|
226
233
|
raise ValueError("arguments must not be None")
|
|
@@ -229,9 +236,9 @@ def get_reduction_kernel(
|
|
|
229
236
|
arg_prep = get_arg_offset_adjuster_code(arguments)
|
|
230
237
|
|
|
231
238
|
if stage == 2 and arguments is not None:
|
|
232
|
-
arguments =
|
|
233
|
-
|
|
234
|
-
|
|
239
|
+
arguments = [
|
|
240
|
+
VectorArg(dtype_out, "pyopencl_reduction_inp"),
|
|
241
|
+
*arguments]
|
|
235
242
|
|
|
236
243
|
source, group_size = _get_reduction_source(
|
|
237
244
|
ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize,
|
|
@@ -512,8 +519,7 @@ class ReductionKernel:
|
|
|
512
519
|
use_queue,
|
|
513
520
|
(group_count*stage_inf.group_size,),
|
|
514
521
|
(stage_inf.group_size,),
|
|
515
|
-
*([result.base_data, result.offset]
|
|
516
|
-
+ invocation_args + size_args),
|
|
522
|
+
*([result.base_data, result.offset, *invocation_args, *size_args]),
|
|
517
523
|
wait_for=wait_for)
|
|
518
524
|
wait_for = [last_evt]
|
|
519
525
|
|
|
@@ -526,7 +532,7 @@ class ReductionKernel:
|
|
|
526
532
|
return result
|
|
527
533
|
else:
|
|
528
534
|
stage_inf = self.stage_2_inf
|
|
529
|
-
args = (result,
|
|
535
|
+
args = (result, *stage1_args)
|
|
530
536
|
|
|
531
537
|
range_ = slice_ = None
|
|
532
538
|
|