pyopencl 2025.1__cp310-cp310-win_amd64.whl → 2025.2.1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +568 -997
- pyopencl/_cl.cp310-win_amd64.pyd +0 -0
- pyopencl/_cl.pyi +2006 -0
- pyopencl/_cluda.py +3 -0
- pyopencl/_monkeypatch.py +1063 -0
- pyopencl/_mymako.py +3 -0
- pyopencl/algorithm.py +29 -24
- pyopencl/array.py +30 -27
- pyopencl/bitonic_sort.py +5 -2
- pyopencl/bitonic_sort_templates.py +3 -0
- pyopencl/cache.py +5 -5
- pyopencl/capture_call.py +31 -8
- pyopencl/characterize/__init__.py +26 -19
- pyopencl/characterize/performance.py +3 -0
- pyopencl/clmath.py +2 -0
- pyopencl/clrandom.py +3 -0
- pyopencl/cltypes.py +67 -2
- pyopencl/compyte/array.py +3 -3
- pyopencl/compyte/dtypes.py +22 -16
- pyopencl/compyte/pyproject.toml +2 -22
- pyopencl/elementwise.py +13 -10
- pyopencl/invoker.py +13 -17
- pyopencl/ipython_ext.py +2 -0
- pyopencl/py.typed +0 -0
- pyopencl/reduction.py +18 -16
- pyopencl/scan.py +31 -30
- pyopencl/tools.py +128 -90
- pyopencl/typing.py +52 -0
- pyopencl/version.py +2 -0
- {pyopencl-2025.1.dist-info → pyopencl-2025.2.1.dist-info}/METADATA +11 -10
- pyopencl-2025.2.1.dist-info/RECORD +46 -0
- {pyopencl-2025.1.dist-info → pyopencl-2025.2.1.dist-info}/WHEEL +1 -1
- pyopencl-2025.1.dist-info/RECORD +0 -42
- {pyopencl-2025.1.dist-info → pyopencl-2025.2.1.dist-info}/licenses/LICENSE +0 -0
pyopencl/cltypes.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
__copyright__ = "Copyright (C) 2016 Jonathan Mackenzie"
|
|
2
5
|
|
|
3
6
|
__license__ = """
|
|
@@ -19,6 +22,7 @@ THE SOFTWARE.
|
|
|
19
22
|
"""
|
|
20
23
|
|
|
21
24
|
import warnings
|
|
25
|
+
from typing import Any
|
|
22
26
|
|
|
23
27
|
import numpy as np
|
|
24
28
|
|
|
@@ -89,10 +93,11 @@ def _create_vector_types():
|
|
|
89
93
|
except NotImplementedError:
|
|
90
94
|
try:
|
|
91
95
|
dtype = np.dtype([((n, title), base_type)
|
|
92
|
-
for (n, title)
|
|
96
|
+
for (n, title)
|
|
97
|
+
in zip(names, titles, strict=True)])
|
|
93
98
|
except TypeError:
|
|
94
99
|
dtype = np.dtype([(n, base_type) for (n, title)
|
|
95
|
-
in zip(names, titles)])
|
|
100
|
+
in zip(names, titles, strict=True)])
|
|
96
101
|
|
|
97
102
|
get_or_register_dtype(name, dtype)
|
|
98
103
|
|
|
@@ -134,4 +139,64 @@ vec_types, vec_type_to_scalar_and_count = _create_vector_types()
|
|
|
134
139
|
|
|
135
140
|
# }}}
|
|
136
141
|
|
|
142
|
+
char2: np.dtype[Any]
|
|
143
|
+
char3: np.dtype[Any]
|
|
144
|
+
char4: np.dtype[Any]
|
|
145
|
+
char8: np.dtype[Any]
|
|
146
|
+
char16: np.dtype[Any]
|
|
147
|
+
|
|
148
|
+
uchar2: np.dtype[Any]
|
|
149
|
+
uchar3: np.dtype[Any]
|
|
150
|
+
uchar4: np.dtype[Any]
|
|
151
|
+
uchar8: np.dtype[Any]
|
|
152
|
+
uchar16: np.dtype[Any]
|
|
153
|
+
|
|
154
|
+
short2: np.dtype[Any]
|
|
155
|
+
short3: np.dtype[Any]
|
|
156
|
+
short4: np.dtype[Any]
|
|
157
|
+
short8: np.dtype[Any]
|
|
158
|
+
short16: np.dtype[Any]
|
|
159
|
+
|
|
160
|
+
ushort2: np.dtype[Any]
|
|
161
|
+
ushort3: np.dtype[Any]
|
|
162
|
+
ushort4: np.dtype[Any]
|
|
163
|
+
ushort8: np.dtype[Any]
|
|
164
|
+
ushort16: np.dtype[Any]
|
|
165
|
+
|
|
166
|
+
int2: np.dtype[Any]
|
|
167
|
+
int3: np.dtype[Any]
|
|
168
|
+
int4: np.dtype[Any]
|
|
169
|
+
int8: np.dtype[Any]
|
|
170
|
+
int16: np.dtype[Any]
|
|
171
|
+
|
|
172
|
+
uint2: np.dtype[Any]
|
|
173
|
+
uint3: np.dtype[Any]
|
|
174
|
+
uint4: np.dtype[Any]
|
|
175
|
+
uint8: np.dtype[Any]
|
|
176
|
+
uint16: np.dtype[Any]
|
|
177
|
+
|
|
178
|
+
long2: np.dtype[Any]
|
|
179
|
+
long3: np.dtype[Any]
|
|
180
|
+
long4: np.dtype[Any]
|
|
181
|
+
long8: np.dtype[Any]
|
|
182
|
+
long16: np.dtype[Any]
|
|
183
|
+
|
|
184
|
+
ulong2: np.dtype[Any]
|
|
185
|
+
ulong3: np.dtype[Any]
|
|
186
|
+
ulong4: np.dtype[Any]
|
|
187
|
+
ulong8: np.dtype[Any]
|
|
188
|
+
ulong16: np.dtype[Any]
|
|
189
|
+
|
|
190
|
+
float2: np.dtype[Any]
|
|
191
|
+
float3: np.dtype[Any]
|
|
192
|
+
float4: np.dtype[Any]
|
|
193
|
+
float8: np.dtype[Any]
|
|
194
|
+
float16: np.dtype[Any]
|
|
195
|
+
|
|
196
|
+
double2: np.dtype[Any]
|
|
197
|
+
double3: np.dtype[Any]
|
|
198
|
+
double4: np.dtype[Any]
|
|
199
|
+
double8: np.dtype[Any]
|
|
200
|
+
double16: np.dtype[Any]
|
|
201
|
+
|
|
137
202
|
# vim: foldmethod=marker
|
pyopencl/compyte/array.py
CHANGED
|
@@ -67,13 +67,13 @@ def is_f_contiguous_strides(strides, itemsize, shape):
|
|
|
67
67
|
from pytools import product
|
|
68
68
|
return (
|
|
69
69
|
equal_strides(strides, f_contiguous_strides(itemsize, shape), shape)
|
|
70
|
-
or product(shape) == 0)
|
|
70
|
+
or product(shape) == 0)
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
def is_c_contiguous_strides(strides, itemsize, shape):
|
|
74
74
|
from pytools import product
|
|
75
75
|
return (equal_strides(strides, c_contiguous_strides(itemsize, shape), shape)
|
|
76
|
-
or product(shape) == 0)
|
|
76
|
+
or product(shape) == 0)
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
class ArrayFlags:
|
|
@@ -177,7 +177,7 @@ except Exception:
|
|
|
177
177
|
# currently (2014/May/17) on pypy.
|
|
178
178
|
|
|
179
179
|
if ((shape is None or x.shape == shape)
|
|
180
|
-
and (strides is None or x.strides == strides)):
|
|
180
|
+
and (strides is None or x.strides == strides)):
|
|
181
181
|
return x
|
|
182
182
|
if not x.dtype.isbuiltin:
|
|
183
183
|
if shape is None:
|
pyopencl/compyte/dtypes.py
CHANGED
|
@@ -84,8 +84,8 @@ class DTypeRegistry:
|
|
|
84
84
|
self.name_to_dtype[nm] = dtype
|
|
85
85
|
else:
|
|
86
86
|
if name_dtype != dtype:
|
|
87
|
-
raise RuntimeError(
|
|
88
|
-
|
|
87
|
+
raise RuntimeError(
|
|
88
|
+
f"name '{nm}' already registered to different dtype")
|
|
89
89
|
|
|
90
90
|
if not existed:
|
|
91
91
|
self.dtype_to_name[dtype] = c_names[0]
|
|
@@ -103,7 +103,7 @@ class DTypeRegistry:
|
|
|
103
103
|
try:
|
|
104
104
|
return self.dtype_to_name[dtype]
|
|
105
105
|
except KeyError:
|
|
106
|
-
raise ValueError("unable to map dtype '
|
|
106
|
+
raise ValueError(f"unable to map dtype '{dtype}'") from None
|
|
107
107
|
|
|
108
108
|
# }}}
|
|
109
109
|
|
|
@@ -135,18 +135,21 @@ def fill_registry_with_c_types(reg, respect_windows, include_bool=True):
|
|
|
135
135
|
else:
|
|
136
136
|
i64_name = "long"
|
|
137
137
|
|
|
138
|
-
reg.get_or_register_dtype(
|
|
139
|
-
|
|
140
|
-
|
|
138
|
+
reg.get_or_register_dtype([
|
|
139
|
+
i64_name,
|
|
140
|
+
f"{i64_name} int",
|
|
141
|
+
f"signed {i64_name} int",
|
|
142
|
+
f"{i64_name} signed int"],
|
|
141
143
|
np.int64)
|
|
142
|
-
reg.get_or_register_dtype(
|
|
143
|
-
|
|
144
|
-
|
|
144
|
+
reg.get_or_register_dtype([
|
|
145
|
+
f"unsigned {i64_name}",
|
|
146
|
+
f"unsigned {i64_name} int",
|
|
147
|
+
f"{i64_name} unsigned int"],
|
|
145
148
|
np.uint64)
|
|
146
149
|
|
|
147
|
-
#
|
|
150
|
+
# https://github.com/numpy/numpy/issues/2610
|
|
148
151
|
if is_64_bit:
|
|
149
|
-
reg.get_or_register_dtype(["unsigned
|
|
152
|
+
reg.get_or_register_dtype([f"unsigned {i64_name}"], np.uintp)
|
|
150
153
|
else:
|
|
151
154
|
reg.get_or_register_dtype(["unsigned"], np.uintp)
|
|
152
155
|
|
|
@@ -245,7 +248,7 @@ def parse_c_arg_backend(c_arg, scalar_arg_factory, vec_arg_factory,
|
|
|
245
248
|
decl_match = decl_re.search(c_arg)
|
|
246
249
|
|
|
247
250
|
if decl_match is None:
|
|
248
|
-
raise ValueError("couldn't parse C declarator '
|
|
251
|
+
raise ValueError(f"couldn't parse C declarator '{c_arg}'")
|
|
249
252
|
|
|
250
253
|
name = decl_match.group(2)
|
|
251
254
|
|
|
@@ -260,7 +263,7 @@ def parse_c_arg_backend(c_arg, scalar_arg_factory, vec_arg_factory,
|
|
|
260
263
|
try:
|
|
261
264
|
dtype = name_to_dtype(tp)
|
|
262
265
|
except KeyError:
|
|
263
|
-
raise ValueError("unknown type '
|
|
266
|
+
raise ValueError(f"unknown type '{tp}'") from None
|
|
264
267
|
|
|
265
268
|
return arg_class(dtype, name)
|
|
266
269
|
|
|
@@ -280,9 +283,12 @@ def register_dtype(dtype, c_names, alias_ok=False):
|
|
|
280
283
|
# check if we've seen this dtype before and error out if a) it was seen before
|
|
281
284
|
# and b) alias_ok is False.
|
|
282
285
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
+
name = TYPE_REGISTRY.dtype_to_name.get(dtype)
|
|
287
|
+
if not alias_ok and name is not None:
|
|
288
|
+
c_names_join = "', '".join(c_names)
|
|
289
|
+
raise RuntimeError(
|
|
290
|
+
f"dtype '{dtype}' already registered "
|
|
291
|
+
f"(as '{name}', new names '{c_names_join}')")
|
|
286
292
|
|
|
287
293
|
TYPE_REGISTRY.get_or_register_dtype(c_names, dtype)
|
|
288
294
|
|
pyopencl/compyte/pyproject.toml
CHANGED
|
@@ -7,36 +7,20 @@ extend-select = [
|
|
|
7
7
|
"C", # flake8-comprehensions
|
|
8
8
|
"E", # pycodestyle
|
|
9
9
|
"F", # pyflakes
|
|
10
|
-
|
|
11
10
|
"I", # flake8-isort
|
|
12
|
-
|
|
13
11
|
"N", # pep8-naming
|
|
14
12
|
"NPY", # numpy
|
|
15
13
|
"Q", # flake8-quotes
|
|
14
|
+
"RUF", # ruff
|
|
15
|
+
"UP", # pyupgrade
|
|
16
16
|
"W", # pycodestyle
|
|
17
|
-
|
|
18
|
-
# TODO
|
|
19
|
-
# "UP", # pyupgrade
|
|
20
|
-
# "RUF", # ruff
|
|
21
17
|
]
|
|
22
18
|
extend-ignore = [
|
|
23
19
|
"C90", # McCabe complexity
|
|
24
|
-
"E221", # multiple spaces before operator
|
|
25
|
-
"E241", # multiple spaces after comma
|
|
26
20
|
"E402", # module level import not at the top of file
|
|
27
21
|
"E226", # missing whitespace around operator
|
|
28
|
-
"N817", # CamelCase `SubstitutionRuleMappingContext` imported as acronym `SRMC`
|
|
29
|
-
|
|
30
|
-
# FIXME
|
|
31
|
-
"NPY002", # numpy rng
|
|
32
|
-
"C408", # unnecssary dict() -> literal
|
|
33
|
-
"E265", # block comment should start with
|
|
34
|
-
"F841", # local variable unused
|
|
35
22
|
]
|
|
36
23
|
|
|
37
|
-
[tool.ruff.lint.per-file-ignores]
|
|
38
|
-
"ndarray/**/*.py" = ["Q", "B", "E", "F", "N", "C4"]
|
|
39
|
-
|
|
40
24
|
[tool.ruff.lint.flake8-quotes]
|
|
41
25
|
docstring-quotes = "double"
|
|
42
26
|
inline-quotes = "double"
|
|
@@ -46,9 +30,5 @@ multiline-quotes = "double"
|
|
|
46
30
|
combine-as-imports = true
|
|
47
31
|
known-first-party = [
|
|
48
32
|
"pytools",
|
|
49
|
-
"pymbolic",
|
|
50
|
-
]
|
|
51
|
-
known-local-folder = [
|
|
52
|
-
"modepy",
|
|
53
33
|
]
|
|
54
34
|
lines-after-imports = 2
|
pyopencl/elementwise.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Elementwise functionality."""
|
|
2
|
+
from __future__ import annotations
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
__copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
|
|
@@ -28,7 +29,7 @@ OTHER DEALINGS IN THE SOFTWARE.
|
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
import enum
|
|
31
|
-
from typing import Any
|
|
32
|
+
from typing import Any
|
|
32
33
|
|
|
33
34
|
import numpy as np
|
|
34
35
|
|
|
@@ -50,7 +51,7 @@ from pyopencl.tools import (
|
|
|
50
51
|
|
|
51
52
|
def get_elwise_program(
|
|
52
53
|
context: cl.Context,
|
|
53
|
-
arguments:
|
|
54
|
+
arguments: list[DtypedArgument],
|
|
54
55
|
operation: str, *,
|
|
55
56
|
name: str = "elwise_kernel",
|
|
56
57
|
options: Any = None,
|
|
@@ -118,13 +119,13 @@ def get_elwise_program(
|
|
|
118
119
|
|
|
119
120
|
def get_elwise_kernel_and_types(
|
|
120
121
|
context: cl.Context,
|
|
121
|
-
arguments:
|
|
122
|
+
arguments: str | list[DtypedArgument],
|
|
122
123
|
operation: str, *,
|
|
123
124
|
name: str = "elwise_kernel",
|
|
124
125
|
options: Any = None,
|
|
125
126
|
preamble: str = "",
|
|
126
127
|
use_range: bool = False,
|
|
127
|
-
**kwargs: Any) ->
|
|
128
|
+
**kwargs: Any) -> tuple[cl.Kernel, list[DtypedArgument]]:
|
|
128
129
|
|
|
129
130
|
from pyopencl.tools import get_arg_offset_adjuster_code, parse_arg_list
|
|
130
131
|
parsed_args = parse_arg_list(arguments, with_offset=True)
|
|
@@ -181,7 +182,7 @@ def get_elwise_kernel_and_types(
|
|
|
181
182
|
|
|
182
183
|
def get_elwise_kernel(
|
|
183
184
|
context: cl.Context,
|
|
184
|
-
arguments:
|
|
185
|
+
arguments: str | list[DtypedArgument],
|
|
185
186
|
operation: str, *,
|
|
186
187
|
name: str = "elwise_kernel",
|
|
187
188
|
options: Any = None, **kwargs: Any) -> cl.Kernel:
|
|
@@ -228,7 +229,7 @@ class ElementwiseKernel:
|
|
|
228
229
|
def __init__(
|
|
229
230
|
self,
|
|
230
231
|
context: cl.Context,
|
|
231
|
-
arguments:
|
|
232
|
+
arguments: str | list[DtypedArgument],
|
|
232
233
|
operation: str,
|
|
233
234
|
name: str = "elwise_kernel",
|
|
234
235
|
options: Any = None, **kwargs: Any) -> None:
|
|
@@ -294,7 +295,9 @@ class ElementwiseKernel:
|
|
|
294
295
|
|
|
295
296
|
repr_vec = None
|
|
296
297
|
invocation_args = []
|
|
297
|
-
|
|
298
|
+
|
|
299
|
+
# non-strict because length arg gets appended below
|
|
300
|
+
for arg, arg_descr in zip(args, arg_descrs, strict=False):
|
|
298
301
|
if isinstance(arg_descr, VectorArg):
|
|
299
302
|
if repr_vec is None:
|
|
300
303
|
repr_vec = arg
|
|
@@ -358,11 +361,11 @@ class ElementwiseKernel:
|
|
|
358
361
|
class ElementwiseTemplate(KernelTemplateBase):
|
|
359
362
|
def __init__(
|
|
360
363
|
self,
|
|
361
|
-
arguments:
|
|
364
|
+
arguments: str | list[DtypedArgument],
|
|
362
365
|
operation: str,
|
|
363
366
|
name: str = "elwise",
|
|
364
367
|
preamble: str = "",
|
|
365
|
-
template_processor:
|
|
368
|
+
template_processor: str | None = None) -> None:
|
|
366
369
|
super().__init__(template_processor=template_processor)
|
|
367
370
|
self.arguments = arguments
|
|
368
371
|
self.operation = operation
|
|
@@ -411,7 +414,7 @@ def get_argument_kind(v: Any) -> ArgumentKind:
|
|
|
411
414
|
return ArgumentKind.SCALAR
|
|
412
415
|
|
|
413
416
|
|
|
414
|
-
def get_decl_and_access_for_kind(name: str, kind: ArgumentKind) ->
|
|
417
|
+
def get_decl_and_access_for_kind(name: str, kind: ArgumentKind) -> tuple[str, str]:
|
|
415
418
|
if kind == ArgumentKind.ARRAY:
|
|
416
419
|
return f"*{name}", f"{name}[i]"
|
|
417
420
|
elif kind == ArgumentKind.SCALAR:
|
pyopencl/invoker.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
__copyright__ = """
|
|
2
5
|
Copyright (C) 2017 Andreas Kloeckner
|
|
3
6
|
"""
|
|
@@ -22,7 +25,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
22
25
|
THE SOFTWARE.
|
|
23
26
|
"""
|
|
24
27
|
|
|
25
|
-
from typing import Any
|
|
28
|
+
from typing import Any
|
|
26
29
|
from warnings import warn
|
|
27
30
|
|
|
28
31
|
import numpy as np
|
|
@@ -306,7 +309,7 @@ def _generate_enqueue_and_set_args_module(function_name,
|
|
|
306
309
|
|
|
307
310
|
return (
|
|
308
311
|
gen.get_picklable_module(
|
|
309
|
-
|
|
312
|
+
name_prefix=f"pyopencl invoker for '{function_name}'"),
|
|
310
313
|
enqueue_name)
|
|
311
314
|
|
|
312
315
|
|
|
@@ -319,7 +322,7 @@ def _get_max_parameter_size(dev):
|
|
|
319
322
|
dev_limit = dev.max_parameter_size
|
|
320
323
|
pocl_version = get_pocl_version(dev.platform, fallback_value=(1, 8))
|
|
321
324
|
if pocl_version is not None and pocl_version < (3, 0):
|
|
322
|
-
#
|
|
325
|
+
# Older PoCL versions (<3.0) have an incorrect parameter
|
|
323
326
|
# size limit of 1024; see e.g. https://github.com/pocl/pocl/pull/1046
|
|
324
327
|
if dev_limit == 1024:
|
|
325
328
|
if dev.type & cl.device_type.CPU:
|
|
@@ -336,17 +339,20 @@ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
|
|
|
336
339
|
"""Check whether argument sizes exceed the OpenCL device limit."""
|
|
337
340
|
|
|
338
341
|
for dev in devs:
|
|
342
|
+
from pyopencl.characterize import nv_compute_capability
|
|
343
|
+
if nv_compute_capability(dev) is None:
|
|
344
|
+
# Only warn on Nvidia GPUs, because actual failures related to
|
|
345
|
+
# the device limit have been observed only on such devices.
|
|
346
|
+
continue
|
|
347
|
+
|
|
339
348
|
dev_ptr_size = int(dev.address_bits / 8)
|
|
340
349
|
dev_limit = _get_max_parameter_size(dev)
|
|
341
350
|
|
|
342
351
|
total_arg_size = 0
|
|
343
352
|
|
|
344
|
-
is_estimate = False
|
|
345
|
-
|
|
346
353
|
if arg_types:
|
|
347
354
|
for arg_type in arg_types:
|
|
348
355
|
if arg_type is None:
|
|
349
|
-
is_estimate = True
|
|
350
356
|
total_arg_size += dev_ptr_size
|
|
351
357
|
elif isinstance(arg_type, VectorArg):
|
|
352
358
|
total_arg_size += dev_ptr_size
|
|
@@ -354,7 +360,6 @@ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
|
|
|
354
360
|
total_arg_size += np.dtype(arg_type).itemsize
|
|
355
361
|
else:
|
|
356
362
|
# Estimate that each argument has the size of a pointer on average
|
|
357
|
-
is_estimate = True
|
|
358
363
|
total_arg_size = dev_ptr_size * num_cl_args
|
|
359
364
|
|
|
360
365
|
if total_arg_size > dev_limit:
|
|
@@ -364,22 +369,13 @@ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
|
|
|
364
369
|
f"the limit of {dev_limit} bytes on {dev}. This might "
|
|
365
370
|
"lead to compilation errors, especially on GPU devices.",
|
|
366
371
|
stacklevel=3)
|
|
367
|
-
elif is_estimate and total_arg_size >= dev_limit * 0.75:
|
|
368
|
-
# Since total_arg_size is just an estimate, also warn in case we are
|
|
369
|
-
# just below the actual limit.
|
|
370
|
-
from warnings import warn
|
|
371
|
-
warn(f"Kernel '{function_name}' has {num_cl_args} arguments with "
|
|
372
|
-
f"a total size of {total_arg_size} bytes, which approaches "
|
|
373
|
-
f"the limit of {dev_limit} bytes on {dev}. This might "
|
|
374
|
-
"lead to compilation errors, especially on GPU devices.",
|
|
375
|
-
stacklevel=3)
|
|
376
372
|
|
|
377
373
|
# }}}
|
|
378
374
|
|
|
379
375
|
|
|
380
376
|
if not cl._PYOPENCL_NO_CACHE:
|
|
381
377
|
from pytools.py_codegen import PicklableModule
|
|
382
|
-
invoker_cache: WriteOncePersistentDict[Any,
|
|
378
|
+
invoker_cache: WriteOncePersistentDict[Any, tuple[PicklableModule, str]] \
|
|
383
379
|
= WriteOncePersistentDict(
|
|
384
380
|
"pyopencl-invoker-cache-v42-nano",
|
|
385
381
|
key_builder=_NumpyTypesKeyBuilder(),
|
pyopencl/ipython_ext.py
CHANGED
pyopencl/py.typed
ADDED
|
File without changes
|
pyopencl/reduction.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
"""Computation of reductions on vectors."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
2
4
|
|
|
3
5
|
__copyright__ = "Copyright (C) 2010 Andreas Kloeckner"
|
|
4
6
|
|
|
@@ -29,7 +31,7 @@ None of the original source code remains.
|
|
|
29
31
|
"""
|
|
30
32
|
|
|
31
33
|
from dataclasses import dataclass
|
|
32
|
-
from typing import Any
|
|
34
|
+
from typing import Any
|
|
33
35
|
|
|
34
36
|
import numpy as np
|
|
35
37
|
|
|
@@ -133,7 +135,7 @@ class _ReductionInfo:
|
|
|
133
135
|
|
|
134
136
|
program: cl.Program
|
|
135
137
|
kernel: cl.Kernel
|
|
136
|
-
arg_types:
|
|
138
|
+
arg_types: list[DtypedArgument]
|
|
137
139
|
|
|
138
140
|
|
|
139
141
|
def _get_reduction_source(
|
|
@@ -143,12 +145,12 @@ def _get_reduction_source(
|
|
|
143
145
|
neutral: str,
|
|
144
146
|
reduce_expr: str,
|
|
145
147
|
map_expr: str,
|
|
146
|
-
parsed_args:
|
|
148
|
+
parsed_args: list[DtypedArgument],
|
|
147
149
|
name: str = "reduce_kernel",
|
|
148
150
|
preamble: str = "",
|
|
149
151
|
arg_prep: str = "",
|
|
150
|
-
device:
|
|
151
|
-
max_group_size:
|
|
152
|
+
device: cl.Device | None = None,
|
|
153
|
+
max_group_size: int | None = None) -> tuple[str, int]:
|
|
152
154
|
|
|
153
155
|
if device is not None:
|
|
154
156
|
devices = [device]
|
|
@@ -209,13 +211,13 @@ def get_reduction_kernel(
|
|
|
209
211
|
dtype_out: Any,
|
|
210
212
|
neutral: str,
|
|
211
213
|
reduce_expr: str,
|
|
212
|
-
map_expr:
|
|
213
|
-
arguments:
|
|
214
|
+
map_expr: str | None = None,
|
|
215
|
+
arguments: list[DtypedArgument] | None = None,
|
|
214
216
|
name: str = "reduce_kernel",
|
|
215
217
|
preamble: str = "",
|
|
216
|
-
device:
|
|
218
|
+
device: cl.Device | None = None,
|
|
217
219
|
options: Any = None,
|
|
218
|
-
max_group_size:
|
|
220
|
+
max_group_size: int | None = None) -> _ReductionInfo:
|
|
219
221
|
if stage not in (1, 2):
|
|
220
222
|
raise ValueError(f"unknown stage index: '{stage}'")
|
|
221
223
|
|
|
@@ -308,8 +310,8 @@ class ReductionKernel:
|
|
|
308
310
|
dtype_out: Any,
|
|
309
311
|
neutral: str,
|
|
310
312
|
reduce_expr: str,
|
|
311
|
-
map_expr:
|
|
312
|
-
arguments:
|
|
313
|
+
map_expr: str | None = None,
|
|
314
|
+
arguments: str | list[DtypedArgument] | None = None,
|
|
313
315
|
name: str = "reduce_kernel",
|
|
314
316
|
options: Any = None,
|
|
315
317
|
preamble: str = "") -> None:
|
|
@@ -418,7 +420,7 @@ class ReductionKernel:
|
|
|
418
420
|
array_empty = empty
|
|
419
421
|
|
|
420
422
|
from pyopencl.tools import VectorArg
|
|
421
|
-
for arg, arg_tp in zip(args, stage_inf.arg_types):
|
|
423
|
+
for arg, arg_tp in zip(args, stage_inf.arg_types, strict=True):
|
|
422
424
|
if isinstance(arg_tp, VectorArg):
|
|
423
425
|
array_empty = arg.__class__
|
|
424
426
|
if not arg.flags.forc:
|
|
@@ -544,12 +546,12 @@ class ReductionKernel:
|
|
|
544
546
|
class ReductionTemplate(KernelTemplateBase):
|
|
545
547
|
def __init__(
|
|
546
548
|
self,
|
|
547
|
-
arguments:
|
|
549
|
+
arguments: str | list[DtypedArgument],
|
|
548
550
|
neutral: str,
|
|
549
551
|
reduce_expr: str,
|
|
550
|
-
map_expr:
|
|
551
|
-
is_segment_start_expr:
|
|
552
|
-
input_fetch_exprs:
|
|
552
|
+
map_expr: str | None = None,
|
|
553
|
+
is_segment_start_expr: str | None = None,
|
|
554
|
+
input_fetch_exprs: list[tuple[str, str, int]] | None = None,
|
|
553
555
|
name_prefix: str = "reduce",
|
|
554
556
|
preamble: str = "",
|
|
555
557
|
template_processor: Any = None) -> None:
|