pyopencl 2025.1__cp310-cp310-macosx_11_0_arm64.whl → 2025.2.2__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +582 -997
- pyopencl/_cl.cpython-310-darwin.so +0 -0
- pyopencl/_cl.pyi +2006 -0
- pyopencl/_cluda.py +3 -0
- pyopencl/_monkeypatch.py +1063 -0
- pyopencl/_mymako.py +3 -0
- pyopencl/algorithm.py +29 -24
- pyopencl/array.py +300 -255
- pyopencl/bitonic_sort.py +5 -2
- pyopencl/bitonic_sort_templates.py +3 -0
- pyopencl/cache.py +5 -5
- pyopencl/capture_call.py +31 -8
- pyopencl/characterize/__init__.py +26 -19
- pyopencl/characterize/performance.py +3 -0
- pyopencl/clmath.py +2 -0
- pyopencl/clrandom.py +3 -0
- pyopencl/cltypes.py +67 -2
- pyopencl/compyte/.basedpyright/baseline.json +1272 -0
- pyopencl/compyte/array.py +36 -9
- pyopencl/compyte/dtypes.py +61 -29
- pyopencl/compyte/pyproject.toml +17 -22
- pyopencl/elementwise.py +13 -10
- pyopencl/invoker.py +13 -17
- pyopencl/ipython_ext.py +2 -0
- pyopencl/py.typed +0 -0
- pyopencl/reduction.py +72 -43
- pyopencl/scan.py +31 -30
- pyopencl/tools.py +128 -90
- pyopencl/typing.py +57 -0
- pyopencl/version.py +2 -0
- {pyopencl-2025.1.dist-info → pyopencl-2025.2.2.dist-info}/METADATA +11 -10
- pyopencl-2025.2.2.dist-info/RECORD +47 -0
- {pyopencl-2025.1.dist-info → pyopencl-2025.2.2.dist-info}/WHEEL +2 -1
- pyopencl-2025.1.dist-info/RECORD +0 -42
- {pyopencl-2025.1.dist-info → pyopencl-2025.2.2.dist-info}/licenses/LICENSE +0 -0
pyopencl/compyte/array.py
CHANGED
|
@@ -20,10 +20,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
20
20
|
THE SOFTWARE.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
+
from typing import Any, Protocol
|
|
24
|
+
|
|
23
25
|
import numpy as np
|
|
26
|
+
from typing_extensions import override
|
|
24
27
|
|
|
25
28
|
|
|
26
|
-
def f_contiguous_strides(itemsize, shape):
|
|
29
|
+
def f_contiguous_strides(itemsize: int, shape: tuple[int, ...]) -> tuple[int, ...]:
|
|
27
30
|
if shape:
|
|
28
31
|
strides = [itemsize]
|
|
29
32
|
for s in shape[:-1]:
|
|
@@ -36,7 +39,7 @@ def f_contiguous_strides(itemsize, shape):
|
|
|
36
39
|
return ()
|
|
37
40
|
|
|
38
41
|
|
|
39
|
-
def c_contiguous_strides(itemsize, shape):
|
|
42
|
+
def c_contiguous_strides(itemsize: int, shape: tuple[int, ...]) -> tuple[int, ...]:
|
|
40
43
|
if shape:
|
|
41
44
|
strides = [itemsize]
|
|
42
45
|
for s in shape[:0:-1]:
|
|
@@ -49,7 +52,11 @@ def c_contiguous_strides(itemsize, shape):
|
|
|
49
52
|
return ()
|
|
50
53
|
|
|
51
54
|
|
|
52
|
-
def equal_strides(
|
|
55
|
+
def equal_strides(
|
|
56
|
+
strides1: tuple[int, ...],
|
|
57
|
+
strides2: tuple[int, ...],
|
|
58
|
+
shape: tuple[int, ...]
|
|
59
|
+
) -> bool:
|
|
53
60
|
if strides1 == strides2:
|
|
54
61
|
return True
|
|
55
62
|
|
|
@@ -63,33 +70,53 @@ def equal_strides(strides1, strides2, shape):
|
|
|
63
70
|
return True
|
|
64
71
|
|
|
65
72
|
|
|
66
|
-
def is_f_contiguous_strides(
|
|
73
|
+
def is_f_contiguous_strides(
|
|
74
|
+
strides: tuple[int, ...],
|
|
75
|
+
itemsize: int,
|
|
76
|
+
shape: tuple[int, ...]
|
|
77
|
+
) -> bool:
|
|
67
78
|
from pytools import product
|
|
68
79
|
return (
|
|
69
80
|
equal_strides(strides, f_contiguous_strides(itemsize, shape), shape)
|
|
70
|
-
or product(shape) == 0)
|
|
81
|
+
or product(shape) == 0)
|
|
71
82
|
|
|
72
83
|
|
|
73
|
-
def is_c_contiguous_strides(
|
|
84
|
+
def is_c_contiguous_strides(
|
|
85
|
+
strides: tuple[int, ...],
|
|
86
|
+
itemsize: int,
|
|
87
|
+
shape: tuple[int, ...]
|
|
88
|
+
) -> bool:
|
|
74
89
|
from pytools import product
|
|
75
90
|
return (equal_strides(strides, c_contiguous_strides(itemsize, shape), shape)
|
|
76
|
-
or product(shape) == 0)
|
|
91
|
+
or product(shape) == 0)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ArrayIsh(Protocol):
|
|
95
|
+
shape: tuple[int, ...]
|
|
96
|
+
strides: tuple[int, ...]
|
|
97
|
+
dtype: np.dtype[Any]
|
|
77
98
|
|
|
78
99
|
|
|
79
100
|
class ArrayFlags:
|
|
80
|
-
|
|
101
|
+
f_contiguous: bool
|
|
102
|
+
c_contiguous: bool
|
|
103
|
+
forc: bool
|
|
104
|
+
|
|
105
|
+
def __init__(self, ary: ArrayIsh):
|
|
81
106
|
self.f_contiguous = is_f_contiguous_strides(
|
|
82
107
|
ary.strides, ary.dtype.itemsize, ary.shape)
|
|
83
108
|
self.c_contiguous = is_c_contiguous_strides(
|
|
84
109
|
ary.strides, ary.dtype.itemsize, ary.shape)
|
|
85
110
|
self.forc = self.f_contiguous or self.c_contiguous
|
|
86
111
|
|
|
112
|
+
@override
|
|
87
113
|
def __repr__(self):
|
|
88
114
|
return (
|
|
89
115
|
f" C_CONTIGUOUS : {self.c_contiguous}\n"
|
|
90
116
|
f" F_CONTIGUOUS : {self.f_contiguous}"
|
|
91
117
|
)
|
|
92
118
|
|
|
119
|
+
@override
|
|
93
120
|
def __str__(self):
|
|
94
121
|
return repr(self)
|
|
95
122
|
|
|
@@ -177,7 +204,7 @@ except Exception:
|
|
|
177
204
|
# currently (2014/May/17) on pypy.
|
|
178
205
|
|
|
179
206
|
if ((shape is None or x.shape == shape)
|
|
180
|
-
and (strides is None or x.strides == strides)):
|
|
207
|
+
and (strides is None or x.strides == strides)):
|
|
181
208
|
return x
|
|
182
209
|
if not x.dtype.isbuiltin:
|
|
183
210
|
if shape is None:
|
pyopencl/compyte/dtypes.py
CHANGED
|
@@ -26,7 +26,11 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
26
26
|
OTHER DEALINGS IN THE SOFTWARE.
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
|
+
from collections.abc import Sequence
|
|
30
|
+
from typing import Any, Callable, TypeVar
|
|
31
|
+
|
|
29
32
|
import numpy as np
|
|
33
|
+
from numpy.typing import DTypeLike
|
|
30
34
|
|
|
31
35
|
|
|
32
36
|
class TypeNameNotKnown(RuntimeError): # noqa: N818
|
|
@@ -36,11 +40,16 @@ class TypeNameNotKnown(RuntimeError): # noqa: N818
|
|
|
36
40
|
# {{{ registry
|
|
37
41
|
|
|
38
42
|
class DTypeRegistry:
|
|
43
|
+
dtype_to_name: dict[np.dtype[Any] | str, str]
|
|
44
|
+
name_to_dtype: dict[str, np.dtype[Any]]
|
|
45
|
+
|
|
39
46
|
def __init__(self):
|
|
40
47
|
self.dtype_to_name = {}
|
|
41
48
|
self.name_to_dtype = {}
|
|
42
49
|
|
|
43
|
-
def get_or_register_dtype(self,
|
|
50
|
+
def get_or_register_dtype(self,
|
|
51
|
+
c_names: str | Sequence[str],
|
|
52
|
+
dtype: DTypeLike | None = None):
|
|
44
53
|
"""Get or register a :class:`numpy.dtype` associated with the C type names
|
|
45
54
|
in the string list *c_names*. If *dtype* is `None`, no registration is
|
|
46
55
|
performed, and the :class:`numpy.dtype` must already have been registered.
|
|
@@ -84,8 +93,8 @@ class DTypeRegistry:
|
|
|
84
93
|
self.name_to_dtype[nm] = dtype
|
|
85
94
|
else:
|
|
86
95
|
if name_dtype != dtype:
|
|
87
|
-
raise RuntimeError(
|
|
88
|
-
|
|
96
|
+
raise RuntimeError(
|
|
97
|
+
f"name '{nm}' already registered to different dtype")
|
|
89
98
|
|
|
90
99
|
if not existed:
|
|
91
100
|
self.dtype_to_name[dtype] = c_names[0]
|
|
@@ -94,8 +103,8 @@ class DTypeRegistry:
|
|
|
94
103
|
|
|
95
104
|
return dtype
|
|
96
105
|
|
|
97
|
-
def dtype_to_ctype(self, dtype):
|
|
98
|
-
if dtype is None:
|
|
106
|
+
def dtype_to_ctype(self, dtype: np.dtype[Any]) -> str:
|
|
107
|
+
if dtype is None: # pyright: ignore[reportUnnecessaryComparison]
|
|
99
108
|
raise ValueError("dtype may not be None")
|
|
100
109
|
|
|
101
110
|
dtype = np.dtype(dtype)
|
|
@@ -103,14 +112,18 @@ class DTypeRegistry:
|
|
|
103
112
|
try:
|
|
104
113
|
return self.dtype_to_name[dtype]
|
|
105
114
|
except KeyError:
|
|
106
|
-
raise ValueError("unable to map dtype '
|
|
115
|
+
raise ValueError(f"unable to map dtype '{dtype}'") from None
|
|
107
116
|
|
|
108
117
|
# }}}
|
|
109
118
|
|
|
110
119
|
|
|
111
120
|
# {{{ C types
|
|
112
121
|
|
|
113
|
-
def fill_registry_with_c_types(
|
|
122
|
+
def fill_registry_with_c_types(
|
|
123
|
+
reg: DTypeRegistry,
|
|
124
|
+
respect_windows: bool,
|
|
125
|
+
include_bool: bool = True
|
|
126
|
+
) -> None:
|
|
114
127
|
import struct
|
|
115
128
|
from sys import platform
|
|
116
129
|
|
|
@@ -135,18 +148,21 @@ def fill_registry_with_c_types(reg, respect_windows, include_bool=True):
|
|
|
135
148
|
else:
|
|
136
149
|
i64_name = "long"
|
|
137
150
|
|
|
138
|
-
reg.get_or_register_dtype(
|
|
139
|
-
|
|
140
|
-
|
|
151
|
+
reg.get_or_register_dtype([
|
|
152
|
+
i64_name,
|
|
153
|
+
f"{i64_name} int",
|
|
154
|
+
f"signed {i64_name} int",
|
|
155
|
+
f"{i64_name} signed int"],
|
|
141
156
|
np.int64)
|
|
142
|
-
reg.get_or_register_dtype(
|
|
143
|
-
|
|
144
|
-
|
|
157
|
+
reg.get_or_register_dtype([
|
|
158
|
+
f"unsigned {i64_name}",
|
|
159
|
+
f"unsigned {i64_name} int",
|
|
160
|
+
f"{i64_name} unsigned int"],
|
|
145
161
|
np.uint64)
|
|
146
162
|
|
|
147
|
-
#
|
|
163
|
+
# https://github.com/numpy/numpy/issues/2610
|
|
148
164
|
if is_64_bit:
|
|
149
|
-
reg.get_or_register_dtype(["unsigned
|
|
165
|
+
reg.get_or_register_dtype([f"unsigned {i64_name}"], np.uintp)
|
|
150
166
|
else:
|
|
151
167
|
reg.get_or_register_dtype(["unsigned"], np.uintp)
|
|
152
168
|
|
|
@@ -154,7 +170,7 @@ def fill_registry_with_c_types(reg, respect_windows, include_bool=True):
|
|
|
154
170
|
reg.get_or_register_dtype("double", np.float64)
|
|
155
171
|
|
|
156
172
|
|
|
157
|
-
def fill_registry_with_opencl_c_types(reg):
|
|
173
|
+
def fill_registry_with_opencl_c_types(reg: DTypeRegistry) -> None:
|
|
158
174
|
reg.get_or_register_dtype(["char", "signed char"], np.int8)
|
|
159
175
|
reg.get_or_register_dtype(["uchar", "unsigned char"], np.uint8)
|
|
160
176
|
reg.get_or_register_dtype(["short", "signed short",
|
|
@@ -180,7 +196,7 @@ def fill_registry_with_opencl_c_types(reg):
|
|
|
180
196
|
reg.get_or_register_dtype("double", np.float64)
|
|
181
197
|
|
|
182
198
|
|
|
183
|
-
def fill_registry_with_c99_stdint_types(reg):
|
|
199
|
+
def fill_registry_with_c99_stdint_types(reg: DTypeRegistry) -> None:
|
|
184
200
|
reg.get_or_register_dtype("bool", np.bool_)
|
|
185
201
|
|
|
186
202
|
reg.get_or_register_dtype("int8_t", np.int8)
|
|
@@ -197,7 +213,7 @@ def fill_registry_with_c99_stdint_types(reg):
|
|
|
197
213
|
reg.get_or_register_dtype("double", np.float64)
|
|
198
214
|
|
|
199
215
|
|
|
200
|
-
def fill_registry_with_c99_complex_types(reg):
|
|
216
|
+
def fill_registry_with_c99_complex_types(reg: DTypeRegistry) -> None:
|
|
201
217
|
reg.get_or_register_dtype("float complex", np.complex64)
|
|
202
218
|
reg.get_or_register_dtype("double complex", np.complex128)
|
|
203
219
|
reg.get_or_register_dtype("long double complex", np.clongdouble)
|
|
@@ -226,12 +242,21 @@ def _fill_dtype_registry(respect_windows, include_bool=True):
|
|
|
226
242
|
|
|
227
243
|
# {{{ c declarator parsing
|
|
228
244
|
|
|
229
|
-
|
|
230
|
-
|
|
245
|
+
ArgTypeT = TypeVar("ArgTypeT")
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def parse_c_arg_backend(
|
|
249
|
+
c_arg: str,
|
|
250
|
+
scalar_arg_factory: Callable[[np.dtype[Any], str], ArgTypeT],
|
|
251
|
+
vec_arg_factory: Callable[[np.dtype[Any], str], ArgTypeT],
|
|
252
|
+
name_to_dtype: Callable[[str], np.dtype[Any]] | DTypeRegistry | None = None,
|
|
253
|
+
):
|
|
231
254
|
if isinstance(name_to_dtype, DTypeRegistry):
|
|
232
|
-
|
|
255
|
+
name_to_dtype_clbl = name_to_dtype.name_to_dtype.__getitem__
|
|
233
256
|
elif name_to_dtype is None:
|
|
234
|
-
|
|
257
|
+
name_to_dtype_clbl = NAME_TO_DTYPE.__getitem__
|
|
258
|
+
else:
|
|
259
|
+
name_to_dtype_clbl = name_to_dtype
|
|
235
260
|
|
|
236
261
|
c_arg = (c_arg
|
|
237
262
|
.replace("const", "")
|
|
@@ -245,7 +270,7 @@ def parse_c_arg_backend(c_arg, scalar_arg_factory, vec_arg_factory,
|
|
|
245
270
|
decl_match = decl_re.search(c_arg)
|
|
246
271
|
|
|
247
272
|
if decl_match is None:
|
|
248
|
-
raise ValueError("couldn't parse C declarator '
|
|
273
|
+
raise ValueError(f"couldn't parse C declarator '{c_arg}'")
|
|
249
274
|
|
|
250
275
|
name = decl_match.group(2)
|
|
251
276
|
|
|
@@ -258,16 +283,20 @@ def parse_c_arg_backend(c_arg, scalar_arg_factory, vec_arg_factory,
|
|
|
258
283
|
tp = " ".join(tp.split())
|
|
259
284
|
|
|
260
285
|
try:
|
|
261
|
-
dtype =
|
|
286
|
+
dtype = name_to_dtype_clbl(tp)
|
|
262
287
|
except KeyError:
|
|
263
|
-
raise ValueError("unknown type '
|
|
288
|
+
raise ValueError(f"unknown type '{tp}'") from None
|
|
264
289
|
|
|
265
290
|
return arg_class(dtype, name)
|
|
266
291
|
|
|
267
292
|
# }}}
|
|
268
293
|
|
|
269
294
|
|
|
270
|
-
def register_dtype(
|
|
295
|
+
def register_dtype(
|
|
296
|
+
dtype: DTypeLike,
|
|
297
|
+
c_names: Sequence[str] | str,
|
|
298
|
+
alias_ok: bool = False
|
|
299
|
+
) -> None:
|
|
271
300
|
from warnings import warn
|
|
272
301
|
warn("register_dtype is deprecated. Use get_or_register_dtype instead.",
|
|
273
302
|
DeprecationWarning, stacklevel=2)
|
|
@@ -280,9 +309,12 @@ def register_dtype(dtype, c_names, alias_ok=False):
|
|
|
280
309
|
# check if we've seen this dtype before and error out if a) it was seen before
|
|
281
310
|
# and b) alias_ok is False.
|
|
282
311
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
312
|
+
name = TYPE_REGISTRY.dtype_to_name.get(dtype)
|
|
313
|
+
if not alias_ok and name is not None:
|
|
314
|
+
c_names_join = "', '".join(c_names)
|
|
315
|
+
raise RuntimeError(
|
|
316
|
+
f"dtype '{dtype}' already registered "
|
|
317
|
+
f"(as '{name}', new names '{c_names_join}')")
|
|
286
318
|
|
|
287
319
|
TYPE_REGISTRY.get_or_register_dtype(c_names, dtype)
|
|
288
320
|
|
pyopencl/compyte/pyproject.toml
CHANGED
|
@@ -7,36 +7,20 @@ extend-select = [
|
|
|
7
7
|
"C", # flake8-comprehensions
|
|
8
8
|
"E", # pycodestyle
|
|
9
9
|
"F", # pyflakes
|
|
10
|
-
|
|
11
10
|
"I", # flake8-isort
|
|
12
|
-
|
|
13
11
|
"N", # pep8-naming
|
|
14
12
|
"NPY", # numpy
|
|
15
13
|
"Q", # flake8-quotes
|
|
14
|
+
"RUF", # ruff
|
|
15
|
+
"UP", # pyupgrade
|
|
16
16
|
"W", # pycodestyle
|
|
17
|
-
|
|
18
|
-
# TODO
|
|
19
|
-
# "UP", # pyupgrade
|
|
20
|
-
# "RUF", # ruff
|
|
21
17
|
]
|
|
22
18
|
extend-ignore = [
|
|
23
19
|
"C90", # McCabe complexity
|
|
24
|
-
"E221", # multiple spaces before operator
|
|
25
|
-
"E241", # multiple spaces after comma
|
|
26
20
|
"E402", # module level import not at the top of file
|
|
27
21
|
"E226", # missing whitespace around operator
|
|
28
|
-
"N817", # CamelCase `SubstitutionRuleMappingContext` imported as acronym `SRMC`
|
|
29
|
-
|
|
30
|
-
# FIXME
|
|
31
|
-
"NPY002", # numpy rng
|
|
32
|
-
"C408", # unnecssary dict() -> literal
|
|
33
|
-
"E265", # block comment should start with
|
|
34
|
-
"F841", # local variable unused
|
|
35
22
|
]
|
|
36
23
|
|
|
37
|
-
[tool.ruff.lint.per-file-ignores]
|
|
38
|
-
"ndarray/**/*.py" = ["Q", "B", "E", "F", "N", "C4"]
|
|
39
|
-
|
|
40
24
|
[tool.ruff.lint.flake8-quotes]
|
|
41
25
|
docstring-quotes = "double"
|
|
42
26
|
inline-quotes = "double"
|
|
@@ -46,9 +30,20 @@ multiline-quotes = "double"
|
|
|
46
30
|
combine-as-imports = true
|
|
47
31
|
known-first-party = [
|
|
48
32
|
"pytools",
|
|
49
|
-
"pymbolic",
|
|
50
|
-
]
|
|
51
|
-
known-local-folder = [
|
|
52
|
-
"modepy",
|
|
53
33
|
]
|
|
54
34
|
lines-after-imports = 2
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
[tool.basedpyright]
|
|
38
|
+
reportImplicitStringConcatenation = "none"
|
|
39
|
+
reportUnnecessaryIsInstance = "none"
|
|
40
|
+
reportUnusedCallResult = "none"
|
|
41
|
+
reportExplicitAny = "none"
|
|
42
|
+
reportUnreachable = "hint"
|
|
43
|
+
|
|
44
|
+
# This reports even cycles that are qualified by 'if TYPE_CHECKING'. Not what
|
|
45
|
+
# we care about at this moment.
|
|
46
|
+
# https://github.com/microsoft/pyright/issues/746
|
|
47
|
+
reportImportCycles = "none"
|
|
48
|
+
pythonVersion = "3.10"
|
|
49
|
+
pythonPlatform = "All"
|
pyopencl/elementwise.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Elementwise functionality."""
|
|
2
|
+
from __future__ import annotations
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
__copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
|
|
@@ -28,7 +29,7 @@ OTHER DEALINGS IN THE SOFTWARE.
|
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
import enum
|
|
31
|
-
from typing import Any
|
|
32
|
+
from typing import Any
|
|
32
33
|
|
|
33
34
|
import numpy as np
|
|
34
35
|
|
|
@@ -50,7 +51,7 @@ from pyopencl.tools import (
|
|
|
50
51
|
|
|
51
52
|
def get_elwise_program(
|
|
52
53
|
context: cl.Context,
|
|
53
|
-
arguments:
|
|
54
|
+
arguments: list[DtypedArgument],
|
|
54
55
|
operation: str, *,
|
|
55
56
|
name: str = "elwise_kernel",
|
|
56
57
|
options: Any = None,
|
|
@@ -118,13 +119,13 @@ def get_elwise_program(
|
|
|
118
119
|
|
|
119
120
|
def get_elwise_kernel_and_types(
|
|
120
121
|
context: cl.Context,
|
|
121
|
-
arguments:
|
|
122
|
+
arguments: str | list[DtypedArgument],
|
|
122
123
|
operation: str, *,
|
|
123
124
|
name: str = "elwise_kernel",
|
|
124
125
|
options: Any = None,
|
|
125
126
|
preamble: str = "",
|
|
126
127
|
use_range: bool = False,
|
|
127
|
-
**kwargs: Any) ->
|
|
128
|
+
**kwargs: Any) -> tuple[cl.Kernel, list[DtypedArgument]]:
|
|
128
129
|
|
|
129
130
|
from pyopencl.tools import get_arg_offset_adjuster_code, parse_arg_list
|
|
130
131
|
parsed_args = parse_arg_list(arguments, with_offset=True)
|
|
@@ -181,7 +182,7 @@ def get_elwise_kernel_and_types(
|
|
|
181
182
|
|
|
182
183
|
def get_elwise_kernel(
|
|
183
184
|
context: cl.Context,
|
|
184
|
-
arguments:
|
|
185
|
+
arguments: str | list[DtypedArgument],
|
|
185
186
|
operation: str, *,
|
|
186
187
|
name: str = "elwise_kernel",
|
|
187
188
|
options: Any = None, **kwargs: Any) -> cl.Kernel:
|
|
@@ -228,7 +229,7 @@ class ElementwiseKernel:
|
|
|
228
229
|
def __init__(
|
|
229
230
|
self,
|
|
230
231
|
context: cl.Context,
|
|
231
|
-
arguments:
|
|
232
|
+
arguments: str | list[DtypedArgument],
|
|
232
233
|
operation: str,
|
|
233
234
|
name: str = "elwise_kernel",
|
|
234
235
|
options: Any = None, **kwargs: Any) -> None:
|
|
@@ -294,7 +295,9 @@ class ElementwiseKernel:
|
|
|
294
295
|
|
|
295
296
|
repr_vec = None
|
|
296
297
|
invocation_args = []
|
|
297
|
-
|
|
298
|
+
|
|
299
|
+
# non-strict because length arg gets appended below
|
|
300
|
+
for arg, arg_descr in zip(args, arg_descrs, strict=False):
|
|
298
301
|
if isinstance(arg_descr, VectorArg):
|
|
299
302
|
if repr_vec is None:
|
|
300
303
|
repr_vec = arg
|
|
@@ -358,11 +361,11 @@ class ElementwiseKernel:
|
|
|
358
361
|
class ElementwiseTemplate(KernelTemplateBase):
|
|
359
362
|
def __init__(
|
|
360
363
|
self,
|
|
361
|
-
arguments:
|
|
364
|
+
arguments: str | list[DtypedArgument],
|
|
362
365
|
operation: str,
|
|
363
366
|
name: str = "elwise",
|
|
364
367
|
preamble: str = "",
|
|
365
|
-
template_processor:
|
|
368
|
+
template_processor: str | None = None) -> None:
|
|
366
369
|
super().__init__(template_processor=template_processor)
|
|
367
370
|
self.arguments = arguments
|
|
368
371
|
self.operation = operation
|
|
@@ -411,7 +414,7 @@ def get_argument_kind(v: Any) -> ArgumentKind:
|
|
|
411
414
|
return ArgumentKind.SCALAR
|
|
412
415
|
|
|
413
416
|
|
|
414
|
-
def get_decl_and_access_for_kind(name: str, kind: ArgumentKind) ->
|
|
417
|
+
def get_decl_and_access_for_kind(name: str, kind: ArgumentKind) -> tuple[str, str]:
|
|
415
418
|
if kind == ArgumentKind.ARRAY:
|
|
416
419
|
return f"*{name}", f"{name}[i]"
|
|
417
420
|
elif kind == ArgumentKind.SCALAR:
|
pyopencl/invoker.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
__copyright__ = """
|
|
2
5
|
Copyright (C) 2017 Andreas Kloeckner
|
|
3
6
|
"""
|
|
@@ -22,7 +25,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
22
25
|
THE SOFTWARE.
|
|
23
26
|
"""
|
|
24
27
|
|
|
25
|
-
from typing import Any
|
|
28
|
+
from typing import Any
|
|
26
29
|
from warnings import warn
|
|
27
30
|
|
|
28
31
|
import numpy as np
|
|
@@ -306,7 +309,7 @@ def _generate_enqueue_and_set_args_module(function_name,
|
|
|
306
309
|
|
|
307
310
|
return (
|
|
308
311
|
gen.get_picklable_module(
|
|
309
|
-
|
|
312
|
+
name_prefix=f"pyopencl invoker for '{function_name}'"),
|
|
310
313
|
enqueue_name)
|
|
311
314
|
|
|
312
315
|
|
|
@@ -319,7 +322,7 @@ def _get_max_parameter_size(dev):
|
|
|
319
322
|
dev_limit = dev.max_parameter_size
|
|
320
323
|
pocl_version = get_pocl_version(dev.platform, fallback_value=(1, 8))
|
|
321
324
|
if pocl_version is not None and pocl_version < (3, 0):
|
|
322
|
-
#
|
|
325
|
+
# Older PoCL versions (<3.0) have an incorrect parameter
|
|
323
326
|
# size limit of 1024; see e.g. https://github.com/pocl/pocl/pull/1046
|
|
324
327
|
if dev_limit == 1024:
|
|
325
328
|
if dev.type & cl.device_type.CPU:
|
|
@@ -336,17 +339,20 @@ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
|
|
|
336
339
|
"""Check whether argument sizes exceed the OpenCL device limit."""
|
|
337
340
|
|
|
338
341
|
for dev in devs:
|
|
342
|
+
from pyopencl.characterize import nv_compute_capability
|
|
343
|
+
if nv_compute_capability(dev) is None:
|
|
344
|
+
# Only warn on Nvidia GPUs, because actual failures related to
|
|
345
|
+
# the device limit have been observed only on such devices.
|
|
346
|
+
continue
|
|
347
|
+
|
|
339
348
|
dev_ptr_size = int(dev.address_bits / 8)
|
|
340
349
|
dev_limit = _get_max_parameter_size(dev)
|
|
341
350
|
|
|
342
351
|
total_arg_size = 0
|
|
343
352
|
|
|
344
|
-
is_estimate = False
|
|
345
|
-
|
|
346
353
|
if arg_types:
|
|
347
354
|
for arg_type in arg_types:
|
|
348
355
|
if arg_type is None:
|
|
349
|
-
is_estimate = True
|
|
350
356
|
total_arg_size += dev_ptr_size
|
|
351
357
|
elif isinstance(arg_type, VectorArg):
|
|
352
358
|
total_arg_size += dev_ptr_size
|
|
@@ -354,7 +360,6 @@ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
|
|
|
354
360
|
total_arg_size += np.dtype(arg_type).itemsize
|
|
355
361
|
else:
|
|
356
362
|
# Estimate that each argument has the size of a pointer on average
|
|
357
|
-
is_estimate = True
|
|
358
363
|
total_arg_size = dev_ptr_size * num_cl_args
|
|
359
364
|
|
|
360
365
|
if total_arg_size > dev_limit:
|
|
@@ -364,22 +369,13 @@ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
|
|
|
364
369
|
f"the limit of {dev_limit} bytes on {dev}. This might "
|
|
365
370
|
"lead to compilation errors, especially on GPU devices.",
|
|
366
371
|
stacklevel=3)
|
|
367
|
-
elif is_estimate and total_arg_size >= dev_limit * 0.75:
|
|
368
|
-
# Since total_arg_size is just an estimate, also warn in case we are
|
|
369
|
-
# just below the actual limit.
|
|
370
|
-
from warnings import warn
|
|
371
|
-
warn(f"Kernel '{function_name}' has {num_cl_args} arguments with "
|
|
372
|
-
f"a total size of {total_arg_size} bytes, which approaches "
|
|
373
|
-
f"the limit of {dev_limit} bytes on {dev}. This might "
|
|
374
|
-
"lead to compilation errors, especially on GPU devices.",
|
|
375
|
-
stacklevel=3)
|
|
376
372
|
|
|
377
373
|
# }}}
|
|
378
374
|
|
|
379
375
|
|
|
380
376
|
if not cl._PYOPENCL_NO_CACHE:
|
|
381
377
|
from pytools.py_codegen import PicklableModule
|
|
382
|
-
invoker_cache: WriteOncePersistentDict[Any,
|
|
378
|
+
invoker_cache: WriteOncePersistentDict[Any, tuple[PicklableModule, str]] \
|
|
383
379
|
= WriteOncePersistentDict(
|
|
384
380
|
"pyopencl-invoker-cache-v42-nano",
|
|
385
381
|
key_builder=_NumpyTypesKeyBuilder(),
|
pyopencl/ipython_ext.py
CHANGED
pyopencl/py.typed
ADDED
|
File without changes
|