pyopencl 2024.3__cp311-cp311-macosx_11_0_arm64.whl → 2025.2.1__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

@@ -1,3 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+
1
4
  __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
2
5
 
3
6
  __license__ = """
@@ -20,7 +23,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
23
  THE SOFTWARE.
21
24
  """
22
25
 
23
- from typing import Dict, Optional, Tuple
26
+
27
+ from typing import cast
24
28
 
25
29
  from pytools import memoize
26
30
 
@@ -32,14 +36,14 @@ class CLCharacterizationWarning(UserWarning):
32
36
 
33
37
 
34
38
  @memoize
35
- def has_double_support(dev):
39
+ def has_double_support(dev: cl.Device):
36
40
  for ext in dev.extensions.split(" "):
37
41
  if ext == "cl_khr_fp64":
38
42
  return True
39
43
  return False
40
44
 
41
45
 
42
- def has_amd_double_support(dev):
46
+ def has_amd_double_support(dev: cl.Device):
43
47
  """"Fix to allow incomplete amd double support in low end boards"""
44
48
 
45
49
  for ext in dev.extensions.split(" "):
@@ -48,7 +52,10 @@ def has_amd_double_support(dev):
48
52
  return False
49
53
 
50
54
 
51
- def reasonable_work_group_size_multiple(dev, ctx=None):
55
+ def reasonable_work_group_size_multiple(
56
+ dev: cl.Device,
57
+ ctx: cl.Context | None = None
58
+ ):
52
59
  try:
53
60
  return dev.warp_size_nv
54
61
  except Exception:
@@ -63,12 +70,12 @@ def reasonable_work_group_size_multiple(dev, ctx=None):
63
70
  }
64
71
  """)
65
72
  prg.build()
66
- return prg.knl.get_work_group_info(
73
+ return cast("int", prg.knl.get_work_group_info(
67
74
  cl.kernel_work_group_info.PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
68
- dev)
75
+ dev))
69
76
 
70
77
 
71
- def nv_compute_capability(dev):
78
+ def nv_compute_capability(dev: cl.Device):
72
79
  """If *dev* is an Nvidia GPU :class:`pyopencl.Device`, return a tuple
73
80
  *(major, minor)* indicating the device's compute capability.
74
81
  """
@@ -80,7 +87,7 @@ def nv_compute_capability(dev):
80
87
  return None
81
88
 
82
89
 
83
- def usable_local_mem_size(dev, nargs=None):
90
+ def usable_local_mem_size(dev: cl.Device, nargs: int | None = None):
84
91
  """Return an estimate of the usable local memory size.
85
92
  :arg nargs: Number of 32-bit arguments passed.
86
93
  """
@@ -101,7 +108,7 @@ def usable_local_mem_size(dev, nargs=None):
101
108
  return usable_local_mem_size
102
109
 
103
110
 
104
- def simultaneous_work_items_on_local_access(dev):
111
+ def simultaneous_work_items_on_local_access(dev: cl.Device):
105
112
  """Return the number of work items that access local
106
113
  memory simultaneously and thereby may conflict with
107
114
  each other.
@@ -136,12 +143,12 @@ def simultaneous_work_items_on_local_access(dev):
136
143
  return 16
137
144
 
138
145
 
139
- def local_memory_access_granularity(dev):
146
+ def local_memory_access_granularity(dev: cl.Device):
140
147
  """Return the number of bytes per bank in local memory."""
141
148
  return 4
142
149
 
143
150
 
144
- def local_memory_bank_count(dev):
151
+ def local_memory_bank_count(dev: cl.Device):
145
152
  """Return the number of banks present in local memory.
146
153
  """
147
154
  nv_compute_cap = nv_compute_capability(dev)
@@ -219,7 +226,7 @@ def why_not_local_access_conflict_free(dev, itemsize,
219
226
  idx = []
220
227
  left_over_idx = work_item_id
221
228
  for axis, (ax_size, ax_stor_size) in enumerate(
222
- zip(array_shape, array_stored_shape)):
229
+ zip(array_shape, array_stored_shape, strict=True)):
223
230
 
224
231
  if axis >= work_item_axis:
225
232
  left_over_idx, ax_idx = divmod(left_over_idx, ax_size)
@@ -258,7 +265,7 @@ def why_not_local_access_conflict_free(dev, itemsize,
258
265
  return 1, None
259
266
 
260
267
 
261
- def get_fast_inaccurate_build_options(dev):
268
+ def get_fast_inaccurate_build_options(dev: cl.Device):
262
269
  """Return a list of flags valid on device *dev* that enable fast, but
263
270
  potentially inaccurate floating point math.
264
271
  """
@@ -269,7 +276,7 @@ def get_fast_inaccurate_build_options(dev):
269
276
  return result
270
277
 
271
278
 
272
- def get_simd_group_size(dev, type_size):
279
+ def get_simd_group_size(dev: cl.Device, type_size: int):
273
280
  """Return an estimate of how many work items will be executed across SIMD
274
281
  lanes. This returns the size of what Nvidia calls a warp and what AMD calls
275
282
  a wavefront.
@@ -323,8 +330,8 @@ def get_simd_group_size(dev, type_size):
323
330
 
324
331
  def get_pocl_version(
325
332
  platform: cl.Platform,
326
- fallback_value: Optional[Tuple[int, int]] = None
327
- ) -> Optional[Tuple[int, int]]:
333
+ fallback_value: tuple[int, int] | None = None
334
+ ) -> tuple[int, int] | None:
328
335
  if platform.name != "Portable Computing Language":
329
336
  return None
330
337
 
@@ -342,12 +349,12 @@ def get_pocl_version(
342
349
  return (int(ver_match.group(1)), int(ver_match.group(2)))
343
350
 
344
351
 
345
- _CHECK_FOR_POCL_ARG_COUNT_BUG_CACHE: Dict[cl.Device, bool] = {}
352
+ _CHECK_FOR_POCL_ARG_COUNT_BUG_CACHE: dict[cl.Device, bool] = {}
346
353
 
347
354
 
348
355
  def _check_for_pocl_arg_count_bug(
349
356
  dev: cl.Device,
350
- ctx: Optional[cl.Context] = None) -> bool:
357
+ ctx: cl.Context | None = None) -> bool:
351
358
  try:
352
359
  return _CHECK_FOR_POCL_ARG_COUNT_BUG_CACHE[dev]
353
360
  except KeyError:
@@ -437,7 +444,7 @@ def has_fine_grain_system_svm_atomics(dev):
437
444
  # }}}
438
445
 
439
446
 
440
- def has_src_build_cache(dev: cl.Device) -> Optional[bool]:
447
+ def has_src_build_cache(dev: cl.Device) -> bool | None:
441
448
  """
442
449
  Return *True* if *dev* has internal support for caching builds from source,
443
450
  *False* if it doesn't, and *None* if unknown.
@@ -1,3 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+
1
4
  __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
2
5
 
3
6
  __license__ = """
pyopencl/clmath.py CHANGED
@@ -1,4 +1,6 @@
1
1
  # pylint:disable=unexpected-keyword-arg # for @elwise_kernel_runner
2
+ from __future__ import annotations
3
+
2
4
 
3
5
  __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
4
6
 
pyopencl/clrandom.py CHANGED
@@ -1,3 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+
1
4
  __copyright__ = "Copyright (C) 2009-16 Andreas Kloeckner"
2
5
 
3
6
  __license__ = """
pyopencl/cltypes.py CHANGED
@@ -1,3 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+
1
4
  __copyright__ = "Copyright (C) 2016 Jonathan Mackenzie"
2
5
 
3
6
  __license__ = """
@@ -19,6 +22,7 @@ THE SOFTWARE.
19
22
  """
20
23
 
21
24
  import warnings
25
+ from typing import Any
22
26
 
23
27
  import numpy as np
24
28
 
@@ -50,7 +54,7 @@ double = np.float64
50
54
  # {{{ vector types
51
55
 
52
56
  def _create_vector_types():
53
- _mapping = [(k, globals()[k]) for k in
57
+ mapping = [(k, globals()[k]) for k in
54
58
  ["char", "uchar", "short", "ushort", "int",
55
59
  "uint", "long", "ulong", "float", "double"]]
56
60
 
@@ -64,7 +68,7 @@ def _create_vector_types():
64
68
 
65
69
  counts = [2, 3, 4, 8, 16]
66
70
 
67
- for base_name, base_type in _mapping:
71
+ for base_name, base_type in mapping:
68
72
  for count in counts:
69
73
  name = "%s%d" % (base_name, count)
70
74
 
@@ -89,10 +93,11 @@ def _create_vector_types():
89
93
  except NotImplementedError:
90
94
  try:
91
95
  dtype = np.dtype([((n, title), base_type)
92
- for (n, title) in zip(names, titles)])
96
+ for (n, title)
97
+ in zip(names, titles, strict=True)])
93
98
  except TypeError:
94
99
  dtype = np.dtype([(n, base_type) for (n, title)
95
- in zip(names, titles)])
100
+ in zip(names, titles, strict=True)])
96
101
 
97
102
  get_or_register_dtype(name, dtype)
98
103
 
@@ -134,4 +139,64 @@ vec_types, vec_type_to_scalar_and_count = _create_vector_types()
134
139
 
135
140
  # }}}
136
141
 
142
+ char2: np.dtype[Any]
143
+ char3: np.dtype[Any]
144
+ char4: np.dtype[Any]
145
+ char8: np.dtype[Any]
146
+ char16: np.dtype[Any]
147
+
148
+ uchar2: np.dtype[Any]
149
+ uchar3: np.dtype[Any]
150
+ uchar4: np.dtype[Any]
151
+ uchar8: np.dtype[Any]
152
+ uchar16: np.dtype[Any]
153
+
154
+ short2: np.dtype[Any]
155
+ short3: np.dtype[Any]
156
+ short4: np.dtype[Any]
157
+ short8: np.dtype[Any]
158
+ short16: np.dtype[Any]
159
+
160
+ ushort2: np.dtype[Any]
161
+ ushort3: np.dtype[Any]
162
+ ushort4: np.dtype[Any]
163
+ ushort8: np.dtype[Any]
164
+ ushort16: np.dtype[Any]
165
+
166
+ int2: np.dtype[Any]
167
+ int3: np.dtype[Any]
168
+ int4: np.dtype[Any]
169
+ int8: np.dtype[Any]
170
+ int16: np.dtype[Any]
171
+
172
+ uint2: np.dtype[Any]
173
+ uint3: np.dtype[Any]
174
+ uint4: np.dtype[Any]
175
+ uint8: np.dtype[Any]
176
+ uint16: np.dtype[Any]
177
+
178
+ long2: np.dtype[Any]
179
+ long3: np.dtype[Any]
180
+ long4: np.dtype[Any]
181
+ long8: np.dtype[Any]
182
+ long16: np.dtype[Any]
183
+
184
+ ulong2: np.dtype[Any]
185
+ ulong3: np.dtype[Any]
186
+ ulong4: np.dtype[Any]
187
+ ulong8: np.dtype[Any]
188
+ ulong16: np.dtype[Any]
189
+
190
+ float2: np.dtype[Any]
191
+ float3: np.dtype[Any]
192
+ float4: np.dtype[Any]
193
+ float8: np.dtype[Any]
194
+ float16: np.dtype[Any]
195
+
196
+ double2: np.dtype[Any]
197
+ double3: np.dtype[Any]
198
+ double4: np.dtype[Any]
199
+ double8: np.dtype[Any]
200
+ double16: np.dtype[Any]
201
+
137
202
  # vim: foldmethod=marker
pyopencl/compyte/array.py CHANGED
@@ -67,13 +67,13 @@ def is_f_contiguous_strides(strides, itemsize, shape):
67
67
  from pytools import product
68
68
  return (
69
69
  equal_strides(strides, f_contiguous_strides(itemsize, shape), shape)
70
- or product(shape) == 0) # noqa: W503
70
+ or product(shape) == 0)
71
71
 
72
72
 
73
73
  def is_c_contiguous_strides(strides, itemsize, shape):
74
74
  from pytools import product
75
75
  return (equal_strides(strides, c_contiguous_strides(itemsize, shape), shape)
76
- or product(shape) == 0) # noqa: W503
76
+ or product(shape) == 0)
77
77
 
78
78
 
79
79
  class ArrayFlags:
@@ -177,7 +177,7 @@ except Exception:
177
177
  # currently (2014/May/17) on pypy.
178
178
 
179
179
  if ((shape is None or x.shape == shape)
180
- and (strides is None or x.strides == strides)): # noqa: W503
180
+ and (strides is None or x.strides == strides)):
181
181
  return x
182
182
  if not x.dtype.isbuiltin:
183
183
  if shape is None:
@@ -84,8 +84,8 @@ class DTypeRegistry:
84
84
  self.name_to_dtype[nm] = dtype
85
85
  else:
86
86
  if name_dtype != dtype:
87
- raise RuntimeError("name '%s' already registered to "
88
- "different dtype" % nm)
87
+ raise RuntimeError(
88
+ f"name '{nm}' already registered to different dtype")
89
89
 
90
90
  if not existed:
91
91
  self.dtype_to_name[dtype] = c_names[0]
@@ -103,7 +103,7 @@ class DTypeRegistry:
103
103
  try:
104
104
  return self.dtype_to_name[dtype]
105
105
  except KeyError:
106
- raise ValueError("unable to map dtype '%s'" % dtype) from None
106
+ raise ValueError(f"unable to map dtype '{dtype}'") from None
107
107
 
108
108
  # }}}
109
109
 
@@ -135,18 +135,21 @@ def fill_registry_with_c_types(reg, respect_windows, include_bool=True):
135
135
  else:
136
136
  i64_name = "long"
137
137
 
138
- reg.get_or_register_dtype(
139
- [i64_name, "%s int" % i64_name, "signed %s int" % i64_name,
140
- "%s signed int" % i64_name],
138
+ reg.get_or_register_dtype([
139
+ i64_name,
140
+ f"{i64_name} int",
141
+ f"signed {i64_name} int",
142
+ f"{i64_name} signed int"],
141
143
  np.int64)
142
- reg.get_or_register_dtype(
143
- ["unsigned %s" % i64_name, "unsigned %s int" % i64_name,
144
- "%s unsigned int" % i64_name],
144
+ reg.get_or_register_dtype([
145
+ f"unsigned {i64_name}",
146
+ f"unsigned {i64_name} int",
147
+ f"{i64_name} unsigned int"],
145
148
  np.uint64)
146
149
 
147
- # http://projects.scipy.org/numpy/ticket/2017
150
+ # https://github.com/numpy/numpy/issues/2610
148
151
  if is_64_bit:
149
- reg.get_or_register_dtype(["unsigned %s" % i64_name], np.uintp)
152
+ reg.get_or_register_dtype([f"unsigned {i64_name}"], np.uintp)
150
153
  else:
151
154
  reg.get_or_register_dtype(["unsigned"], np.uintp)
152
155
 
@@ -245,7 +248,7 @@ def parse_c_arg_backend(c_arg, scalar_arg_factory, vec_arg_factory,
245
248
  decl_match = decl_re.search(c_arg)
246
249
 
247
250
  if decl_match is None:
248
- raise ValueError("couldn't parse C declarator '%s'" % c_arg)
251
+ raise ValueError(f"couldn't parse C declarator '{c_arg}'")
249
252
 
250
253
  name = decl_match.group(2)
251
254
 
@@ -260,7 +263,7 @@ def parse_c_arg_backend(c_arg, scalar_arg_factory, vec_arg_factory,
260
263
  try:
261
264
  dtype = name_to_dtype(tp)
262
265
  except KeyError:
263
- raise ValueError("unknown type '%s'" % tp) from None
266
+ raise ValueError(f"unknown type '{tp}'") from None
264
267
 
265
268
  return arg_class(dtype, name)
266
269
 
@@ -280,9 +283,12 @@ def register_dtype(dtype, c_names, alias_ok=False):
280
283
  # check if we've seen this dtype before and error out if a) it was seen before
281
284
  # and b) alias_ok is False.
282
285
 
283
- if not alias_ok and dtype in TYPE_REGISTRY.dtype_to_name:
284
- raise RuntimeError("dtype '%s' already registered (as '%s', new names '%s')"
285
- % (dtype, TYPE_REGISTRY.dtype_to_name[dtype], ", ".join(c_names)))
286
+ name = TYPE_REGISTRY.dtype_to_name.get(dtype)
287
+ if not alias_ok and name is not None:
288
+ c_names_join = "', '".join(c_names)
289
+ raise RuntimeError(
290
+ f"dtype '{dtype}' already registered "
291
+ f"(as '{name}', new names '{c_names_join}')")
286
292
 
287
293
  TYPE_REGISTRY.get_or_register_dtype(c_names, dtype)
288
294
 
@@ -7,36 +7,20 @@ extend-select = [
7
7
  "C", # flake8-comprehensions
8
8
  "E", # pycodestyle
9
9
  "F", # pyflakes
10
-
11
10
  "I", # flake8-isort
12
-
13
11
  "N", # pep8-naming
14
12
  "NPY", # numpy
15
13
  "Q", # flake8-quotes
14
+ "RUF", # ruff
15
+ "UP", # pyupgrade
16
16
  "W", # pycodestyle
17
-
18
- # TODO
19
- # "UP", # pyupgrade
20
- # "RUF", # ruff
21
17
  ]
22
18
  extend-ignore = [
23
19
  "C90", # McCabe complexity
24
- "E221", # multiple spaces before operator
25
- "E241", # multiple spaces after comma
26
20
  "E402", # module level import not at the top of file
27
21
  "E226", # missing whitespace around operator
28
- "N817", # CamelCase `SubstitutionRuleMappingContext` imported as acronym `SRMC`
29
-
30
- # FIXME
31
- "NPY002", # numpy rng
32
- "C408", # unnecssary dict() -> literal
33
- "E265", # block comment should start with
34
- "F841", # local variable unused
35
22
  ]
36
23
 
37
- [tool.ruff.lint.per-file-ignores]
38
- "ndarray/**/*.py" = ["Q", "B", "E", "F", "N", "C4"]
39
-
40
24
  [tool.ruff.lint.flake8-quotes]
41
25
  docstring-quotes = "double"
42
26
  inline-quotes = "double"
@@ -46,9 +30,5 @@ multiline-quotes = "double"
46
30
  combine-as-imports = true
47
31
  known-first-party = [
48
32
  "pytools",
49
- "pymbolic",
50
- ]
51
- known-local-folder = [
52
- "modepy",
53
33
  ]
54
34
  lines-after-imports = 2
pyopencl/elementwise.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Elementwise functionality."""
2
+ from __future__ import annotations
2
3
 
3
4
 
4
5
  __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
@@ -28,7 +29,7 @@ OTHER DEALINGS IN THE SOFTWARE.
28
29
 
29
30
 
30
31
  import enum
31
- from typing import Any, List, Optional, Tuple, Union
32
+ from typing import Any
32
33
 
33
34
  import numpy as np
34
35
 
@@ -50,7 +51,7 @@ from pyopencl.tools import (
50
51
 
51
52
  def get_elwise_program(
52
53
  context: cl.Context,
53
- arguments: List[DtypedArgument],
54
+ arguments: list[DtypedArgument],
54
55
  operation: str, *,
55
56
  name: str = "elwise_kernel",
56
57
  options: Any = None,
@@ -118,13 +119,13 @@ def get_elwise_program(
118
119
 
119
120
  def get_elwise_kernel_and_types(
120
121
  context: cl.Context,
121
- arguments: Union[str, List[DtypedArgument]],
122
+ arguments: str | list[DtypedArgument],
122
123
  operation: str, *,
123
124
  name: str = "elwise_kernel",
124
125
  options: Any = None,
125
126
  preamble: str = "",
126
127
  use_range: bool = False,
127
- **kwargs: Any) -> Tuple[cl.Kernel, List[DtypedArgument]]:
128
+ **kwargs: Any) -> tuple[cl.Kernel, list[DtypedArgument]]:
128
129
 
129
130
  from pyopencl.tools import get_arg_offset_adjuster_code, parse_arg_list
130
131
  parsed_args = parse_arg_list(arguments, with_offset=True)
@@ -181,7 +182,7 @@ def get_elwise_kernel_and_types(
181
182
 
182
183
  def get_elwise_kernel(
183
184
  context: cl.Context,
184
- arguments: Union[str, List[DtypedArgument]],
185
+ arguments: str | list[DtypedArgument],
185
186
  operation: str, *,
186
187
  name: str = "elwise_kernel",
187
188
  options: Any = None, **kwargs: Any) -> cl.Kernel:
@@ -228,7 +229,7 @@ class ElementwiseKernel:
228
229
  def __init__(
229
230
  self,
230
231
  context: cl.Context,
231
- arguments: Union[str, List[DtypedArgument]],
232
+ arguments: str | list[DtypedArgument],
232
233
  operation: str,
233
234
  name: str = "elwise_kernel",
234
235
  options: Any = None, **kwargs: Any) -> None:
@@ -294,7 +295,9 @@ class ElementwiseKernel:
294
295
 
295
296
  repr_vec = None
296
297
  invocation_args = []
297
- for arg, arg_descr in zip(args, arg_descrs):
298
+
299
+ # non-strict because length arg gets appended below
300
+ for arg, arg_descr in zip(args, arg_descrs, strict=False):
298
301
  if isinstance(arg_descr, VectorArg):
299
302
  if repr_vec is None:
300
303
  repr_vec = arg
@@ -358,11 +361,11 @@ class ElementwiseKernel:
358
361
  class ElementwiseTemplate(KernelTemplateBase):
359
362
  def __init__(
360
363
  self,
361
- arguments: Union[str, List[DtypedArgument]],
364
+ arguments: str | list[DtypedArgument],
362
365
  operation: str,
363
366
  name: str = "elwise",
364
367
  preamble: str = "",
365
- template_processor: Optional[str] = None) -> None:
368
+ template_processor: str | None = None) -> None:
366
369
  super().__init__(template_processor=template_processor)
367
370
  self.arguments = arguments
368
371
  self.operation = operation
@@ -411,7 +414,7 @@ def get_argument_kind(v: Any) -> ArgumentKind:
411
414
  return ArgumentKind.SCALAR
412
415
 
413
416
 
414
- def get_decl_and_access_for_kind(name: str, kind: ArgumentKind) -> Tuple[str, str]:
417
+ def get_decl_and_access_for_kind(name: str, kind: ArgumentKind) -> tuple[str, str]:
415
418
  if kind == ArgumentKind.ARRAY:
416
419
  return f"*{name}", f"{name}[i]"
417
420
  elif kind == ArgumentKind.SCALAR:
pyopencl/invoker.py CHANGED
@@ -1,3 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+
1
4
  __copyright__ = """
2
5
  Copyright (C) 2017 Andreas Kloeckner
3
6
  """
@@ -22,7 +25,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
25
  THE SOFTWARE.
23
26
  """
24
27
 
25
- from typing import Any, Tuple
28
+ from typing import Any
26
29
  from warnings import warn
27
30
 
28
31
  import numpy as np
@@ -306,7 +309,7 @@ def _generate_enqueue_and_set_args_module(function_name,
306
309
 
307
310
  return (
308
311
  gen.get_picklable_module(
309
- name=f"<pyopencl invoker for '{function_name}'>"),
312
+ name_prefix=f"pyopencl invoker for '{function_name}'"),
310
313
  enqueue_name)
311
314
 
312
315
 
@@ -319,7 +322,7 @@ def _get_max_parameter_size(dev):
319
322
  dev_limit = dev.max_parameter_size
320
323
  pocl_version = get_pocl_version(dev.platform, fallback_value=(1, 8))
321
324
  if pocl_version is not None and pocl_version < (3, 0):
322
- # Current PoCL versions (as of 04/2022) have an incorrect parameter
325
+ # Older PoCL versions (<3.0) have an incorrect parameter
323
326
  # size limit of 1024; see e.g. https://github.com/pocl/pocl/pull/1046
324
327
  if dev_limit == 1024:
325
328
  if dev.type & cl.device_type.CPU:
@@ -336,17 +339,20 @@ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
336
339
  """Check whether argument sizes exceed the OpenCL device limit."""
337
340
 
338
341
  for dev in devs:
342
+ from pyopencl.characterize import nv_compute_capability
343
+ if nv_compute_capability(dev) is None:
344
+ # Only warn on Nvidia GPUs, because actual failures related to
345
+ # the device limit have been observed only on such devices.
346
+ continue
347
+
339
348
  dev_ptr_size = int(dev.address_bits / 8)
340
349
  dev_limit = _get_max_parameter_size(dev)
341
350
 
342
351
  total_arg_size = 0
343
352
 
344
- is_estimate = False
345
-
346
353
  if arg_types:
347
354
  for arg_type in arg_types:
348
355
  if arg_type is None:
349
- is_estimate = True
350
356
  total_arg_size += dev_ptr_size
351
357
  elif isinstance(arg_type, VectorArg):
352
358
  total_arg_size += dev_ptr_size
@@ -354,7 +360,6 @@ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
354
360
  total_arg_size += np.dtype(arg_type).itemsize
355
361
  else:
356
362
  # Estimate that each argument has the size of a pointer on average
357
- is_estimate = True
358
363
  total_arg_size = dev_ptr_size * num_cl_args
359
364
 
360
365
  if total_arg_size > dev_limit:
@@ -364,22 +369,13 @@ def _check_arg_size(function_name, num_cl_args, arg_types, devs):
364
369
  f"the limit of {dev_limit} bytes on {dev}. This might "
365
370
  "lead to compilation errors, especially on GPU devices.",
366
371
  stacklevel=3)
367
- elif is_estimate and total_arg_size >= dev_limit * 0.75:
368
- # Since total_arg_size is just an estimate, also warn in case we are
369
- # just below the actual limit.
370
- from warnings import warn
371
- warn(f"Kernel '{function_name}' has {num_cl_args} arguments with "
372
- f"a total size of {total_arg_size} bytes, which approaches "
373
- f"the limit of {dev_limit} bytes on {dev}. This might "
374
- "lead to compilation errors, especially on GPU devices.",
375
- stacklevel=3)
376
372
 
377
373
  # }}}
378
374
 
379
375
 
380
376
  if not cl._PYOPENCL_NO_CACHE:
381
377
  from pytools.py_codegen import PicklableModule
382
- invoker_cache: WriteOncePersistentDict[Any, Tuple[PicklableModule, str]] \
378
+ invoker_cache: WriteOncePersistentDict[Any, tuple[PicklableModule, str]] \
383
379
  = WriteOncePersistentDict(
384
380
  "pyopencl-invoker-cache-v42-nano",
385
381
  key_builder=_NumpyTypesKeyBuilder(),
pyopencl/ipython_ext.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from IPython.core.magic import Magics, cell_magic, line_magic, magics_class
2
4
 
3
5
  import pyopencl as cl
pyopencl/py.typed ADDED
File without changes