pyopencl 2024.2.7__cp312-cp312-win_amd64.whl → 2025.1__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +127 -122
- pyopencl/_cl.cp312-win_amd64.pyd +0 -0
- pyopencl/_mymako.py +3 -3
- pyopencl/algorithm.py +10 -7
- pyopencl/array.py +58 -123
- pyopencl/bitonic_sort.py +3 -1
- pyopencl/bitonic_sort_templates.py +1 -1
- pyopencl/cache.py +23 -22
- pyopencl/capture_call.py +5 -4
- pyopencl/clrandom.py +1 -0
- pyopencl/cltypes.py +2 -2
- pyopencl/compyte/dtypes.py +4 -4
- pyopencl/compyte/pyproject.toml +54 -0
- pyopencl/elementwise.py +9 -2
- pyopencl/invoker.py +11 -9
- pyopencl/ipython_ext.py +1 -1
- pyopencl/reduction.py +16 -10
- pyopencl/scan.py +38 -22
- pyopencl/tools.py +23 -13
- pyopencl/version.py +1 -1
- {pyopencl-2024.2.7.dist-info → pyopencl-2025.1.dist-info}/METADATA +11 -8
- pyopencl-2025.1.dist-info/RECORD +42 -0
- {pyopencl-2024.2.7.dist-info → pyopencl-2025.1.dist-info}/WHEEL +1 -1
- pyopencl/compyte/.git +0 -1
- pyopencl/compyte/ndarray/Makefile +0 -31
- pyopencl/compyte/ndarray/__init__.py +0 -0
- pyopencl/compyte/ndarray/gen_elemwise.py +0 -1907
- pyopencl/compyte/ndarray/gen_reduction.py +0 -1511
- pyopencl/compyte/ndarray/gpu_ndarray.h +0 -35
- pyopencl/compyte/ndarray/pygpu_language.h +0 -207
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +0 -622
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +0 -317
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +0 -1546
- pyopencl/compyte/ndarray/pygpu_ndarray.h +0 -71
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +0 -232
- pyopencl/compyte/ndarray/setup_opencl.py +0 -101
- pyopencl/compyte/ndarray/test_gpu_elemwise.py +0 -411
- pyopencl/compyte/ndarray/test_gpu_ndarray.py +0 -487
- pyopencl-2024.2.7.dist-info/RECORD +0 -56
- {pyopencl-2024.2.7.dist-info → pyopencl-2025.1.dist-info}/licenses/LICENSE +0 -0
pyopencl/array.py
CHANGED
|
@@ -32,7 +32,7 @@ import builtins
|
|
|
32
32
|
from dataclasses import dataclass
|
|
33
33
|
from functools import reduce
|
|
34
34
|
from numbers import Number
|
|
35
|
-
from typing import Any, Dict,
|
|
35
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
36
36
|
from warnings import warn
|
|
37
37
|
|
|
38
38
|
import numpy as np
|
|
@@ -42,9 +42,12 @@ import pyopencl.elementwise as elementwise
|
|
|
42
42
|
from pyopencl import cltypes
|
|
43
43
|
from pyopencl.characterize import has_double_support
|
|
44
44
|
from pyopencl.compyte.array import (
|
|
45
|
-
ArrayFlags as _ArrayFlags,
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
ArrayFlags as _ArrayFlags,
|
|
46
|
+
as_strided as _as_strided,
|
|
47
|
+
c_contiguous_strides as _c_contiguous_strides,
|
|
48
|
+
equal_strides as _equal_strides,
|
|
49
|
+
f_contiguous_strides as _f_contiguous_strides,
|
|
50
|
+
)
|
|
48
51
|
|
|
49
52
|
|
|
50
53
|
SCALAR_CLASSES = (Number, np.bool_, bool)
|
|
@@ -55,20 +58,14 @@ else:
|
|
|
55
58
|
_SVMPointer_or_nothing = ()
|
|
56
59
|
|
|
57
60
|
|
|
58
|
-
_NUMPY_PRE_2 = np.__version__.startswith("1.")
|
|
59
|
-
|
|
60
|
-
|
|
61
61
|
# {{{ _get_common_dtype
|
|
62
62
|
|
|
63
|
-
_COMMON_DTYPE_CACHE: Dict[Tuple[Hashable, ...], np.dtype] = {}
|
|
64
|
-
|
|
65
|
-
|
|
66
63
|
class DoubleDowncastWarning(UserWarning):
|
|
67
64
|
pass
|
|
68
65
|
|
|
69
66
|
|
|
70
67
|
_DOUBLE_DOWNCAST_WARNING = (
|
|
71
|
-
"The operation you requested would result in a double-
|
|
68
|
+
"The operation you requested would result in a double-precision "
|
|
72
69
|
"quantity according to numpy semantics. Since your device does not "
|
|
73
70
|
"support double precision, a single-precision quantity is being returned.")
|
|
74
71
|
|
|
@@ -78,78 +75,12 @@ def _get_common_dtype(obj1, obj2, queue):
|
|
|
78
75
|
raise ValueError("PyOpenCL array has no queue; call .with_queue() to "
|
|
79
76
|
"add one in order to be able to perform operations")
|
|
80
77
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if o1_is_array and o2_is_array:
|
|
87
|
-
o1_dtype = obj1.dtype
|
|
88
|
-
o2_dtype = obj2.dtype
|
|
89
|
-
cache_key = (obj1.dtype, obj2.dtype, allow_double)
|
|
90
|
-
else:
|
|
91
|
-
o1_dtype = getattr(obj1, "dtype", type(obj1))
|
|
92
|
-
o2_dtype = getattr(obj2, "dtype", type(obj2))
|
|
93
|
-
|
|
94
|
-
o1_is_integral = np.issubdtype(o1_dtype, np.integer)
|
|
95
|
-
o2_is_integral = np.issubdtype(o1_dtype, np.integer)
|
|
96
|
-
|
|
97
|
-
o1_key = obj1 if o1_is_integral and not o1_is_array else o1_dtype
|
|
98
|
-
o2_key = obj2 if o2_is_integral and not o2_is_array else o2_dtype
|
|
99
|
-
|
|
100
|
-
cache_key = (o1_key, o2_key, o1_is_array, o2_is_array, allow_double)
|
|
101
|
-
|
|
102
|
-
try:
|
|
103
|
-
return _COMMON_DTYPE_CACHE[cache_key]
|
|
104
|
-
except KeyError:
|
|
105
|
-
pass
|
|
78
|
+
# Note: We are calling np.result_type with pyopencl arrays here.
|
|
79
|
+
# Luckily, np.result_type only looks at the dtype of input arrays up until
|
|
80
|
+
# at least numpy v2.1.
|
|
81
|
+
result = np.result_type(obj1, obj2)
|
|
106
82
|
|
|
107
|
-
|
|
108
|
-
# and not just type-sensitive when it comes to scalars. We'll just do our
|
|
109
|
-
# best to emulate it.
|
|
110
|
-
#
|
|
111
|
-
# Some samples that are true as of numpy 1.23.1.
|
|
112
|
-
#
|
|
113
|
-
# >>> a = np.zeros(1, dtype=np.int16)
|
|
114
|
-
# >>> (a + 123123123312).dtype
|
|
115
|
-
# dtype('int64')
|
|
116
|
-
# >>> (a + 12312).dtype
|
|
117
|
-
# dtype('int16')
|
|
118
|
-
# >>> (a + 12312444).dtype
|
|
119
|
-
# dtype('int32')
|
|
120
|
-
# >>> (a + np.int32(12312444)).dtype
|
|
121
|
-
# dtype('int32')
|
|
122
|
-
# >>> (a + np.int32(1234)).dtype
|
|
123
|
-
# dtype('int16')
|
|
124
|
-
#
|
|
125
|
-
# Note that np.find_common_type, while appealing, won't be able to tell
|
|
126
|
-
# the full story.
|
|
127
|
-
|
|
128
|
-
if (_NUMPY_PRE_2
|
|
129
|
-
and not (o1_is_array and o2_is_array)
|
|
130
|
-
and o1_is_integral and o2_is_integral):
|
|
131
|
-
if o1_is_array:
|
|
132
|
-
obj1 = np.zeros(1, dtype=o1_dtype)
|
|
133
|
-
if o2_is_array:
|
|
134
|
-
obj2 = np.zeros(1, dtype=o2_dtype)
|
|
135
|
-
|
|
136
|
-
result = (obj1 + obj2).dtype
|
|
137
|
-
else:
|
|
138
|
-
array_types = []
|
|
139
|
-
scalars = []
|
|
140
|
-
|
|
141
|
-
if o1_is_array:
|
|
142
|
-
array_types.append(o1_dtype)
|
|
143
|
-
else:
|
|
144
|
-
scalars.append(obj1)
|
|
145
|
-
if o2_is_array:
|
|
146
|
-
array_types.append(o2_dtype)
|
|
147
|
-
else:
|
|
148
|
-
scalars.append(obj2)
|
|
149
|
-
|
|
150
|
-
result = np.result_type(*array_types, *scalars)
|
|
151
|
-
|
|
152
|
-
if not allow_double:
|
|
83
|
+
if not has_double_support(queue.device):
|
|
153
84
|
if result == np.float64:
|
|
154
85
|
result = np.dtype(np.float32)
|
|
155
86
|
warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
|
|
@@ -157,9 +88,6 @@ def _get_common_dtype(obj1, obj2, queue):
|
|
|
157
88
|
result = np.dtype(np.complex64)
|
|
158
89
|
warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
|
|
159
90
|
|
|
160
|
-
if cache_key is not None:
|
|
161
|
-
_COMMON_DTYPE_CACHE[cache_key] = result
|
|
162
|
-
|
|
163
91
|
return result
|
|
164
92
|
|
|
165
93
|
# }}}
|
|
@@ -259,7 +187,7 @@ def _splay(device, n, kernel_specific_max_wg_size=None):
|
|
|
259
187
|
group_count = max_groups
|
|
260
188
|
work_items_per_group = max_work_items
|
|
261
189
|
|
|
262
|
-
#print
|
|
190
|
+
# print("n:%d gc:%d wipg:%d" % (n, group_count, work_items_per_group))
|
|
263
191
|
return (group_count*work_items_per_group,), (work_items_per_group,)
|
|
264
192
|
|
|
265
193
|
|
|
@@ -292,7 +220,7 @@ def elwise_kernel_runner(kernel_getter):
|
|
|
292
220
|
queue.device)
|
|
293
221
|
gs, ls = out._get_sizes(queue, work_group_info)
|
|
294
222
|
|
|
295
|
-
args = (out,
|
|
223
|
+
args = (out, *args, out.size)
|
|
296
224
|
if ARRAY_KERNEL_EXEC_HOOK is not None:
|
|
297
225
|
return ARRAY_KERNEL_EXEC_HOOK( # pylint: disable=not-callable
|
|
298
226
|
knl, queue, gs, ls, *args, wait_for=wait_for)
|
|
@@ -587,7 +515,7 @@ class Array:
|
|
|
587
515
|
# FIXME It would be nice to check this. But it would require
|
|
588
516
|
# changing the allocator interface. Trust the user for now.
|
|
589
517
|
|
|
590
|
-
#assert allocator.context == context
|
|
518
|
+
# assert allocator.context == context
|
|
591
519
|
pass
|
|
592
520
|
|
|
593
521
|
# Queue-less arrays do have a purpose in life.
|
|
@@ -608,11 +536,11 @@ class Array:
|
|
|
608
536
|
|
|
609
537
|
try:
|
|
610
538
|
shape = tuple(shape) # type: ignore[arg-type]
|
|
611
|
-
except TypeError:
|
|
539
|
+
except TypeError as err:
|
|
612
540
|
if not isinstance(shape, (int, np.integer)):
|
|
613
541
|
raise TypeError(
|
|
614
542
|
"shape must either be iterable or castable to an integer: "
|
|
615
|
-
f"got a '{type(shape).__name__}'")
|
|
543
|
+
f"got a '{type(shape).__name__}'") from err
|
|
616
544
|
|
|
617
545
|
shape = (shape,)
|
|
618
546
|
|
|
@@ -654,7 +582,7 @@ class Array:
|
|
|
654
582
|
# }}}
|
|
655
583
|
|
|
656
584
|
assert dtype != object, \
|
|
657
|
-
"object arrays on the compute device are not allowed"
|
|
585
|
+
"object arrays on the compute device are not allowed" # noqa: E721
|
|
658
586
|
assert isinstance(shape, tuple)
|
|
659
587
|
assert isinstance(strides, tuple)
|
|
660
588
|
|
|
@@ -922,7 +850,7 @@ class Array:
|
|
|
922
850
|
"device-to-host transfers",
|
|
923
851
|
DeprecationWarning, stacklevel=2)
|
|
924
852
|
|
|
925
|
-
ary,
|
|
853
|
+
ary, _event1 = self._get(queue=queue, ary=ary, async_=async_, **kwargs)
|
|
926
854
|
|
|
927
855
|
return ary
|
|
928
856
|
|
|
@@ -2047,9 +1975,10 @@ class Array:
|
|
|
2047
1975
|
|
|
2048
1976
|
.. versionadded:: 2015.2
|
|
2049
1977
|
"""
|
|
2050
|
-
new_shape = tuple(
|
|
2051
|
-
new_strides = tuple(
|
|
2052
|
-
for i, dim in enumerate(self.shape)
|
|
1978
|
+
new_shape = tuple(dim for dim in self.shape if dim > 1)
|
|
1979
|
+
new_strides = tuple(
|
|
1980
|
+
self.strides[i] for i, dim in enumerate(self.shape)
|
|
1981
|
+
if dim > 1)
|
|
2053
1982
|
|
|
2054
1983
|
return self._new_with_changes(
|
|
2055
1984
|
self.base_data, self.offset,
|
|
@@ -2514,7 +2443,7 @@ def arange(queue, *args, **kwargs):
|
|
|
2514
2443
|
raise TypeError("arange requires a dtype argument")
|
|
2515
2444
|
|
|
2516
2445
|
from math import ceil
|
|
2517
|
-
size =
|
|
2446
|
+
size = ceil((stop-start)/step)
|
|
2518
2447
|
|
|
2519
2448
|
result = Array(queue, (size,), dtype, allocator=inf.allocator)
|
|
2520
2449
|
result.add_event(_arange_knl(result, start, step, queue=queue))
|
|
@@ -2595,14 +2524,16 @@ def multi_take(arrays, indices, out=None, queue=None):
|
|
|
2595
2524
|
cl.kernel_work_group_info.WORK_GROUP_SIZE,
|
|
2596
2525
|
queue.device))
|
|
2597
2526
|
|
|
2598
|
-
wait_for_this = (
|
|
2599
|
-
|
|
2600
|
-
|
|
2527
|
+
wait_for_this = (
|
|
2528
|
+
*indices.events,
|
|
2529
|
+
*[evt for i in arrays[chunk_slice] for evt in i.events],
|
|
2530
|
+
*[evt for o in out[chunk_slice] for evt in o.events])
|
|
2601
2531
|
evt = knl(queue, gs, ls,
|
|
2602
2532
|
indices.data,
|
|
2603
|
-
*
|
|
2604
|
-
|
|
2605
|
-
|
|
2533
|
+
*[o.data for o in out[chunk_slice]],
|
|
2534
|
+
*[i.data for i in arrays[chunk_slice]],
|
|
2535
|
+
*[indices.size],
|
|
2536
|
+
wait_for=wait_for_this)
|
|
2606
2537
|
for o in out[chunk_slice]:
|
|
2607
2538
|
o.add_event(evt)
|
|
2608
2539
|
|
|
@@ -2673,15 +2604,19 @@ def multi_take_put(arrays, dest_indices, src_indices, dest_shape=None,
|
|
|
2673
2604
|
cl.kernel_work_group_info.WORK_GROUP_SIZE,
|
|
2674
2605
|
queue.device))
|
|
2675
2606
|
|
|
2676
|
-
wait_for_this = (
|
|
2677
|
-
|
|
2678
|
-
|
|
2607
|
+
wait_for_this = (
|
|
2608
|
+
*dest_indices.events,
|
|
2609
|
+
*src_indices.events,
|
|
2610
|
+
*[evt for i in arrays[chunk_slice] for evt in i.events],
|
|
2611
|
+
*[evt for o in out[chunk_slice] for evt in o.events])
|
|
2679
2612
|
evt = knl(queue, gs, ls,
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2613
|
+
*out[chunk_slice],
|
|
2614
|
+
dest_indices,
|
|
2615
|
+
src_indices,
|
|
2616
|
+
*arrays[chunk_slice],
|
|
2617
|
+
*src_offsets_list[chunk_slice],
|
|
2618
|
+
src_indices.size,
|
|
2619
|
+
wait_for=wait_for_this)
|
|
2685
2620
|
for o in out[chunk_slice]:
|
|
2686
2621
|
o.add_event(evt)
|
|
2687
2622
|
|
|
@@ -2750,16 +2685,16 @@ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None,
|
|
|
2750
2685
|
cl.kernel_work_group_info.WORK_GROUP_SIZE,
|
|
2751
2686
|
queue.device))
|
|
2752
2687
|
|
|
2753
|
-
wait_for_this = (
|
|
2754
|
-
|
|
2755
|
-
|
|
2688
|
+
wait_for_this = (
|
|
2689
|
+
*wait_for,
|
|
2690
|
+
*[evt for i in arrays[chunk_slice] for evt in i.events],
|
|
2691
|
+
*[evt for o in out[chunk_slice] for evt in o.events])
|
|
2756
2692
|
evt = knl(queue, gs, ls,
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
wait_for=wait_for_this)
|
|
2693
|
+
*out[chunk_slice],
|
|
2694
|
+
dest_indices,
|
|
2695
|
+
*arrays[chunk_slice],
|
|
2696
|
+
use_fill_cla, array_lengths_cla, dest_indices.size,
|
|
2697
|
+
wait_for=wait_for_this)
|
|
2763
2698
|
|
|
2764
2699
|
for o in out[chunk_slice]:
|
|
2765
2700
|
o.add_event(evt)
|
|
@@ -2874,7 +2809,7 @@ def hstack(arrays, queue=None):
|
|
|
2874
2809
|
|
|
2875
2810
|
lead_shape = single_valued(ary.shape[:-1] for ary in arrays)
|
|
2876
2811
|
|
|
2877
|
-
w = builtins.sum(
|
|
2812
|
+
w = builtins.sum(ary.shape[-1] for ary in arrays)
|
|
2878
2813
|
|
|
2879
2814
|
if __debug__:
|
|
2880
2815
|
if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
|
|
@@ -2883,7 +2818,7 @@ def hstack(arrays, queue=None):
|
|
|
2883
2818
|
"an instance of the type of arrays[0]",
|
|
2884
2819
|
stacklevel=2)
|
|
2885
2820
|
|
|
2886
|
-
result = arrays[0].__class__(queue, lead_shape
|
|
2821
|
+
result = arrays[0].__class__(queue, (*lead_shape, w), arrays[0].dtype,
|
|
2887
2822
|
allocator=arrays[0].allocator)
|
|
2888
2823
|
index = 0
|
|
2889
2824
|
for ary in arrays:
|
|
@@ -3150,8 +3085,8 @@ def _logical_op(x1, x2, out, operator, queue=None):
|
|
|
3150
3085
|
else:
|
|
3151
3086
|
out[:] = np.logical_or(x1, x2)
|
|
3152
3087
|
elif np.isscalar(x1) or np.isscalar(x2):
|
|
3153
|
-
scalar_arg, =
|
|
3154
|
-
ary_arg, =
|
|
3088
|
+
scalar_arg, = (x for x in (x1, x2) if np.isscalar(x))
|
|
3089
|
+
ary_arg, = (x for x in (x1, x2) if not np.isscalar(x))
|
|
3155
3090
|
queue = queue or ary_arg.queue
|
|
3156
3091
|
allocator = ary_arg.allocator
|
|
3157
3092
|
|
pyopencl/bitonic_sort.py
CHANGED
|
@@ -35,8 +35,10 @@ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
35
35
|
|
|
36
36
|
from functools import reduce
|
|
37
37
|
from operator import mul
|
|
38
|
+
from typing import ClassVar, Dict
|
|
38
39
|
|
|
39
40
|
from mako.template import Template
|
|
41
|
+
|
|
40
42
|
from pytools import memoize_method
|
|
41
43
|
|
|
42
44
|
import pyopencl as cl
|
|
@@ -62,7 +64,7 @@ class BitonicSort:
|
|
|
62
64
|
.. automethod:: __call__
|
|
63
65
|
"""
|
|
64
66
|
|
|
65
|
-
kernels_srcs = {
|
|
67
|
+
kernels_srcs: ClassVar[Dict[str, str]] = {
|
|
66
68
|
"B2": _tmpl.ParallelBitonic_B2,
|
|
67
69
|
"B4": _tmpl.ParallelBitonic_B4,
|
|
68
70
|
"B8": _tmpl.ParallelBitonic_B8,
|
pyopencl/cache.py
CHANGED
|
@@ -42,12 +42,14 @@ import hashlib
|
|
|
42
42
|
new_hash = hashlib.md5
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def _erase_dir(
|
|
45
|
+
def _erase_dir(directory):
|
|
46
46
|
from os import listdir, rmdir, unlink
|
|
47
47
|
from os.path import join
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
|
|
49
|
+
for name in listdir(directory):
|
|
50
|
+
unlink(join(directory, name))
|
|
51
|
+
|
|
52
|
+
rmdir(directory)
|
|
51
53
|
|
|
52
54
|
|
|
53
55
|
def update_checksum(checksum, obj):
|
|
@@ -213,7 +215,7 @@ def get_dependencies(src, include_path):
|
|
|
213
215
|
|
|
214
216
|
_inner(src)
|
|
215
217
|
|
|
216
|
-
result = [(name,
|
|
218
|
+
result = [(name, *vals) for name, vals in result.items()]
|
|
217
219
|
result.sort()
|
|
218
220
|
|
|
219
221
|
return result
|
|
@@ -266,7 +268,7 @@ def get_cache_key(device, options_bytes, src):
|
|
|
266
268
|
|
|
267
269
|
|
|
268
270
|
def retrieve_from_cache(cache_dir, cache_key):
|
|
269
|
-
class
|
|
271
|
+
class _InvalidInfoFileError(RuntimeError):
|
|
270
272
|
pass
|
|
271
273
|
|
|
272
274
|
from os.path import isdir, join
|
|
@@ -290,18 +292,18 @@ def retrieve_from_cache(cache_dir, cache_key):
|
|
|
290
292
|
|
|
291
293
|
try:
|
|
292
294
|
info_file = open(info_path, "rb")
|
|
293
|
-
except OSError:
|
|
294
|
-
raise
|
|
295
|
+
except OSError as err:
|
|
296
|
+
raise _InvalidInfoFileError() from err
|
|
295
297
|
|
|
296
298
|
try:
|
|
297
299
|
try:
|
|
298
300
|
info = load(info_file)
|
|
299
|
-
except EOFError:
|
|
300
|
-
raise
|
|
301
|
+
except EOFError as err:
|
|
302
|
+
raise _InvalidInfoFileError() from err
|
|
301
303
|
finally:
|
|
302
304
|
info_file.close()
|
|
303
305
|
|
|
304
|
-
except
|
|
306
|
+
except _InvalidInfoFileError:
|
|
305
307
|
mod_cache_dir_m.reset()
|
|
306
308
|
from warnings import warn
|
|
307
309
|
warn(
|
|
@@ -375,13 +377,13 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
|
|
|
375
377
|
cache_result = retrieve_from_cache(cache_dir, cache_key)
|
|
376
378
|
|
|
377
379
|
if cache_result is None:
|
|
378
|
-
logger.debug("build program: binary cache miss (key: %s)"
|
|
380
|
+
logger.debug("build program: binary cache miss (key: %s)", cache_key)
|
|
379
381
|
|
|
380
382
|
to_be_built_indices.append(i)
|
|
381
383
|
binaries.append(None)
|
|
382
384
|
logs.append(None)
|
|
383
385
|
else:
|
|
384
|
-
logger.debug("build program: binary cache hit (key: %s)"
|
|
386
|
+
logger.debug("build program: binary cache hit (key: %s)", cache_key)
|
|
385
387
|
|
|
386
388
|
binary, log = cache_result
|
|
387
389
|
binaries.append(binary)
|
|
@@ -410,8 +412,9 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
|
|
|
410
412
|
src = src + "\n\n__constant int pyopencl_defeat_cache_%s = 0;" % (
|
|
411
413
|
uuid4().hex)
|
|
412
414
|
|
|
413
|
-
logger.debug(
|
|
414
|
-
|
|
415
|
+
logger.debug(
|
|
416
|
+
"build program: start building program from source on %s",
|
|
417
|
+
", ".join(str(devices[i]) for i in to_be_built_indices))
|
|
415
418
|
|
|
416
419
|
prg = _cl._Program(ctx, src)
|
|
417
420
|
prg.build(options_bytes, [devices[i] for i in to_be_built_indices])
|
|
@@ -459,13 +462,11 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
|
|
|
459
462
|
binary_path = mod_cache_dir_m.sub("binary")
|
|
460
463
|
source_path = mod_cache_dir_m.sub("source.cl")
|
|
461
464
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
outf.close()
|
|
465
|
+
with open(source_path, "w") as outf:
|
|
466
|
+
outf.write(src)
|
|
465
467
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
outf.close()
|
|
468
|
+
with open(binary_path, "wb") as outf:
|
|
469
|
+
outf.write(binary)
|
|
469
470
|
|
|
470
471
|
from pickle import dump
|
|
471
472
|
info_file = open(info_path, "wb")
|
|
@@ -504,7 +505,7 @@ def create_built_program_from_source_cached(ctx, src, options_bytes, devices=Non
|
|
|
504
505
|
except Exception as e:
|
|
505
506
|
from pyopencl import Error
|
|
506
507
|
build_program_failure = (isinstance(e, Error)
|
|
507
|
-
and e.code == _cl.status_code.BUILD_PROGRAM_FAILURE) #
|
|
508
|
+
and e.code == _cl.status_code.BUILD_PROGRAM_FAILURE) # pylint:disable=no-member
|
|
508
509
|
|
|
509
510
|
# Mac error on intel CPU driver: can't build from cached version.
|
|
510
511
|
# If we get a build_program_failure from the cached version then
|
pyopencl/capture_call.py
CHANGED
|
@@ -22,6 +22,7 @@ THE SOFTWARE.
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
import numpy as np
|
|
25
|
+
|
|
25
26
|
from pytools.py_codegen import Indentation, PythonCodeGenerator
|
|
26
27
|
|
|
27
28
|
import pyopencl as cl
|
|
@@ -30,8 +31,8 @@ import pyopencl as cl
|
|
|
30
31
|
def capture_kernel_call(kernel, output_file, queue, g_size, l_size, *args, **kwargs):
|
|
31
32
|
try:
|
|
32
33
|
source = kernel._source
|
|
33
|
-
except AttributeError:
|
|
34
|
-
raise RuntimeError("cannot capture call, kernel source not available")
|
|
34
|
+
except AttributeError as err:
|
|
35
|
+
raise RuntimeError("cannot capture call, kernel source not available") from err
|
|
35
36
|
|
|
36
37
|
if source is None:
|
|
37
38
|
raise RuntimeError("cannot capture call, kernel source not available")
|
|
@@ -91,9 +92,9 @@ def capture_kernel_call(kernel, output_file, queue, g_size, l_size, *args, **kwa
|
|
|
91
92
|
else:
|
|
92
93
|
try:
|
|
93
94
|
arg_buf = memoryview(arg)
|
|
94
|
-
except Exception:
|
|
95
|
+
except Exception as err:
|
|
95
96
|
raise RuntimeError("cannot capture: "
|
|
96
|
-
"unsupported arg nr %d (0-based)" % i)
|
|
97
|
+
"unsupported arg nr %d (0-based)" % i) from err
|
|
97
98
|
|
|
98
99
|
arg_data.append(("arg%d_data" % i, arg_buf))
|
|
99
100
|
kernel_args.append("decompress(b64decode(arg%d_data))" % i)
|
pyopencl/clrandom.py
CHANGED
pyopencl/cltypes.py
CHANGED
|
@@ -50,7 +50,7 @@ double = np.float64
|
|
|
50
50
|
# {{{ vector types
|
|
51
51
|
|
|
52
52
|
def _create_vector_types():
|
|
53
|
-
|
|
53
|
+
mapping = [(k, globals()[k]) for k in
|
|
54
54
|
["char", "uchar", "short", "ushort", "int",
|
|
55
55
|
"uint", "long", "ulong", "float", "double"]]
|
|
56
56
|
|
|
@@ -64,7 +64,7 @@ def _create_vector_types():
|
|
|
64
64
|
|
|
65
65
|
counts = [2, 3, 4, 8, 16]
|
|
66
66
|
|
|
67
|
-
for base_name, base_type in
|
|
67
|
+
for base_name, base_type in mapping:
|
|
68
68
|
for count in counts:
|
|
69
69
|
name = "%s%d" % (base_name, count)
|
|
70
70
|
|
pyopencl/compyte/dtypes.py
CHANGED
|
@@ -29,7 +29,7 @@ OTHER DEALINGS IN THE SOFTWARE.
|
|
|
29
29
|
import numpy as np
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
class TypeNameNotKnown(RuntimeError):
|
|
32
|
+
class TypeNameNotKnown(RuntimeError): # noqa: N818
|
|
33
33
|
pass
|
|
34
34
|
|
|
35
35
|
|
|
@@ -89,7 +89,7 @@ class DTypeRegistry:
|
|
|
89
89
|
|
|
90
90
|
if not existed:
|
|
91
91
|
self.dtype_to_name[dtype] = c_names[0]
|
|
92
|
-
if
|
|
92
|
+
if str(dtype) not in self.dtype_to_name:
|
|
93
93
|
self.dtype_to_name[str(dtype)] = c_names[0]
|
|
94
94
|
|
|
95
95
|
return dtype
|
|
@@ -103,7 +103,7 @@ class DTypeRegistry:
|
|
|
103
103
|
try:
|
|
104
104
|
return self.dtype_to_name[dtype]
|
|
105
105
|
except KeyError:
|
|
106
|
-
raise ValueError("unable to map dtype '%s'" % dtype)
|
|
106
|
+
raise ValueError("unable to map dtype '%s'" % dtype) from None
|
|
107
107
|
|
|
108
108
|
# }}}
|
|
109
109
|
|
|
@@ -260,7 +260,7 @@ def parse_c_arg_backend(c_arg, scalar_arg_factory, vec_arg_factory,
|
|
|
260
260
|
try:
|
|
261
261
|
dtype = name_to_dtype(tp)
|
|
262
262
|
except KeyError:
|
|
263
|
-
raise ValueError("unknown type '%s'" % tp)
|
|
263
|
+
raise ValueError("unknown type '%s'" % tp) from None
|
|
264
264
|
|
|
265
265
|
return arg_class(dtype, name)
|
|
266
266
|
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[tool.ruff]
|
|
2
|
+
preview = true
|
|
3
|
+
|
|
4
|
+
[tool.ruff.lint]
|
|
5
|
+
extend-select = [
|
|
6
|
+
"B", # flake8-bugbear
|
|
7
|
+
"C", # flake8-comprehensions
|
|
8
|
+
"E", # pycodestyle
|
|
9
|
+
"F", # pyflakes
|
|
10
|
+
|
|
11
|
+
"I", # flake8-isort
|
|
12
|
+
|
|
13
|
+
"N", # pep8-naming
|
|
14
|
+
"NPY", # numpy
|
|
15
|
+
"Q", # flake8-quotes
|
|
16
|
+
"W", # pycodestyle
|
|
17
|
+
|
|
18
|
+
# TODO
|
|
19
|
+
# "UP", # pyupgrade
|
|
20
|
+
# "RUF", # ruff
|
|
21
|
+
]
|
|
22
|
+
extend-ignore = [
|
|
23
|
+
"C90", # McCabe complexity
|
|
24
|
+
"E221", # multiple spaces before operator
|
|
25
|
+
"E241", # multiple spaces after comma
|
|
26
|
+
"E402", # module level import not at the top of file
|
|
27
|
+
"E226", # missing whitespace around operator
|
|
28
|
+
"N817", # CamelCase `SubstitutionRuleMappingContext` imported as acronym `SRMC`
|
|
29
|
+
|
|
30
|
+
# FIXME
|
|
31
|
+
"NPY002", # numpy rng
|
|
32
|
+
"C408", # unnecssary dict() -> literal
|
|
33
|
+
"E265", # block comment should start with
|
|
34
|
+
"F841", # local variable unused
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[tool.ruff.lint.per-file-ignores]
|
|
38
|
+
"ndarray/**/*.py" = ["Q", "B", "E", "F", "N", "C4"]
|
|
39
|
+
|
|
40
|
+
[tool.ruff.lint.flake8-quotes]
|
|
41
|
+
docstring-quotes = "double"
|
|
42
|
+
inline-quotes = "double"
|
|
43
|
+
multiline-quotes = "double"
|
|
44
|
+
|
|
45
|
+
[tool.ruff.lint.isort]
|
|
46
|
+
combine-as-imports = true
|
|
47
|
+
known-first-party = [
|
|
48
|
+
"pytools",
|
|
49
|
+
"pymbolic",
|
|
50
|
+
]
|
|
51
|
+
known-local-folder = [
|
|
52
|
+
"modepy",
|
|
53
|
+
]
|
|
54
|
+
lines-after-imports = 2
|
pyopencl/elementwise.py
CHANGED
|
@@ -31,12 +31,19 @@ import enum
|
|
|
31
31
|
from typing import Any, List, Optional, Tuple, Union
|
|
32
32
|
|
|
33
33
|
import numpy as np
|
|
34
|
+
|
|
34
35
|
from pytools import memoize_method
|
|
35
36
|
|
|
36
37
|
import pyopencl as cl
|
|
37
38
|
from pyopencl.tools import (
|
|
38
|
-
DtypedArgument,
|
|
39
|
-
|
|
39
|
+
DtypedArgument,
|
|
40
|
+
KernelTemplateBase,
|
|
41
|
+
ScalarArg,
|
|
42
|
+
VectorArg,
|
|
43
|
+
context_dependent_memoize,
|
|
44
|
+
dtype_to_c_struct,
|
|
45
|
+
dtype_to_ctype,
|
|
46
|
+
)
|
|
40
47
|
|
|
41
48
|
|
|
42
49
|
# {{{ elementwise kernel code generator
|