pyopencl 2025.2.5__cp311-cp311-win_amd64.whl → 2025.2.7__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

pyopencl/scan.py CHANGED
@@ -26,7 +26,7 @@ Derived from code within the Thrust project, https://github.com/NVIDIA/thrust
26
26
  import logging
27
27
  from abc import ABC, abstractmethod
28
28
  from dataclasses import dataclass
29
- from typing import Any
29
+ from typing import TYPE_CHECKING, Any, cast
30
30
 
31
31
  import numpy as np
32
32
 
@@ -49,6 +49,10 @@ from pyopencl.tools import (
49
49
  )
50
50
 
51
51
 
52
+ if TYPE_CHECKING:
53
+ from collections.abc import Sequence
54
+
55
+
52
56
  logger = logging.getLogger(__name__)
53
57
 
54
58
 
@@ -868,7 +872,7 @@ def _make_template(s: str):
868
872
  warn("Leftover words in identifier prefixing: " + " ".join(leftovers),
869
873
  stacklevel=3)
870
874
 
871
- return mako.template.Template(s, strict_undefined=True) # type: ignore
875
+ return mako.template.Template(s, strict_undefined=True)
872
876
 
873
877
 
874
878
  @dataclass(frozen=True)
@@ -900,7 +904,7 @@ class _BuiltScanKernelInfo:
900
904
  class _GeneratedFinalUpdateKernelInfo:
901
905
  source: str
902
906
  kernel_name: str
903
- scalar_arg_dtypes: list[np.dtype | None]
907
+ scalar_arg_dtypes: Sequence[np.dtype | None]
904
908
  update_wg_size: int
905
909
 
906
910
  def build(self,
@@ -942,7 +946,7 @@ class GenericScanKernelBase(ABC):
942
946
  name_prefix: str = "scan",
943
947
  options: Any = None,
944
948
  preamble: str = "",
945
- devices: cl.Device | None = None) -> None:
949
+ devices: Sequence[cl.Device] | None = None) -> None:
946
950
  """
947
951
  :arg ctx: a :class:`pyopencl.Context` within which the code
948
952
  for this scan kernel will be generated.
@@ -1031,7 +1035,8 @@ class GenericScanKernelBase(ABC):
1031
1035
  if input_fetch_exprs is None:
1032
1036
  input_fetch_exprs = []
1033
1037
 
1034
- self.context = ctx
1038
+ self.context: cl.Context = ctx
1039
+ self.dtype: np.dtype[Any]
1035
1040
  dtype = self.dtype = np.dtype(dtype)
1036
1041
 
1037
1042
  if neutral is None:
@@ -1044,35 +1049,35 @@ class GenericScanKernelBase(ABC):
1044
1049
  if dtype.itemsize % 4 != 0:
1045
1050
  raise TypeError("scan value type must have size divisible by 4 bytes")
1046
1051
 
1047
- self.index_dtype = np.dtype(index_dtype)
1052
+ self.index_dtype: np.dtype[np.integer] = np.dtype(index_dtype)
1048
1053
  if np.iinfo(self.index_dtype).min >= 0:
1049
1054
  raise TypeError("index_dtype must be signed")
1050
1055
 
1051
1056
  if devices is None:
1052
1057
  devices = ctx.devices
1053
- self.devices = devices
1058
+ self.devices: Sequence[cl.Device] = devices
1054
1059
  self.options = options
1055
1060
 
1056
1061
  from pyopencl.tools import parse_arg_list
1057
- self.parsed_args = parse_arg_list(arguments)
1062
+ self.parsed_args: Sequence[DtypedArgument] = parse_arg_list(arguments)
1058
1063
  from pyopencl.tools import VectorArg
1059
- self.first_array_idx = next(
1064
+ self.first_array_idx: int = next(
1060
1065
  i for i, arg in enumerate(self.parsed_args)
1061
1066
  if isinstance(arg, VectorArg))
1062
1067
 
1063
- self.input_expr = input_expr
1068
+ self.input_expr: str = input_expr
1064
1069
 
1065
- self.is_segment_start_expr = is_segment_start_expr
1066
- self.is_segmented = is_segment_start_expr is not None
1067
- if self.is_segmented:
1070
+ self.is_segment_start_expr: str | None = is_segment_start_expr
1071
+ self.is_segmented: bool = is_segment_start_expr is not None
1072
+ if is_segment_start_expr is not None:
1068
1073
  is_segment_start_expr = _process_code_for_macro(is_segment_start_expr)
1069
1074
 
1070
- self.output_statement = output_statement
1075
+ self.output_statement: str = output_statement
1071
1076
 
1072
1077
  for _name, _arg_name, ife_offset in input_fetch_exprs:
1073
1078
  if ife_offset not in [0, -1]:
1074
1079
  raise RuntimeError("input_fetch_expr offsets must either be 0 or -1")
1075
- self.input_fetch_exprs = input_fetch_exprs
1080
+ self.input_fetch_exprs: Sequence[tuple[str, str, int]] = input_fetch_exprs
1076
1081
 
1077
1082
  arg_dtypes = {}
1078
1083
  arg_ctypes = {}
@@ -1080,7 +1085,7 @@ class GenericScanKernelBase(ABC):
1080
1085
  arg_dtypes[arg.name] = arg.dtype
1081
1086
  arg_ctypes[arg.name] = dtype_to_ctype(arg.dtype)
1082
1087
 
1083
- self.name_prefix = name_prefix
1088
+ self.name_prefix: str = name_prefix
1084
1089
 
1085
1090
  # {{{ set up shared code dict
1086
1091
 
@@ -1128,8 +1133,8 @@ class GenericScanKernelBase(ABC):
1128
1133
 
1129
1134
  # }}}
1130
1135
 
1131
- self.use_lookbehind_update = "prev_item" in self.output_statement
1132
- self.store_segment_start_flags = (
1136
+ self.use_lookbehind_update: bool = "prev_item" in self.output_statement
1137
+ self.store_segment_start_flags: bool = (
1133
1138
  self.is_segmented and self.use_lookbehind_update)
1134
1139
 
1135
1140
  self.finish_setup()
@@ -1233,8 +1238,8 @@ class GenericScanKernel(GenericScanKernelBase):
1233
1238
  # not sure where these go, but roughly this much seems unavailable.
1234
1239
  avail_local_mem -= 0x400
1235
1240
 
1236
- is_cpu = self.devices[0].type & cl.device_type.CPU
1237
- is_gpu = self.devices[0].type & cl.device_type.GPU
1241
+ is_cpu = bool(self.devices[0].type & cl.device_type.CPU)
1242
+ is_gpu = bool(self.devices[0].type & cl.device_type.GPU)
1238
1243
 
1239
1244
  if is_cpu:
1240
1245
  # (about the widest vector a CPU can support, also taking
@@ -1260,7 +1265,7 @@ class GenericScanKernel(GenericScanKernelBase):
1260
1265
  # k_group_size should be a power of two because of in-kernel
1261
1266
  # division by that number.
1262
1267
 
1263
- solutions = []
1268
+ solutions: list[tuple[int, int, int]] = []
1264
1269
  for k_exp in range(0, 9):
1265
1270
  for wg_size in range(wg_size_multiples, max_scan_wg_size+1,
1266
1271
  wg_size_multiples):
@@ -1402,7 +1407,7 @@ class GenericScanKernel(GenericScanKernelBase):
1402
1407
  for arg in self.parsed_args:
1403
1408
  arg_dtypes[arg.name] = arg.dtype
1404
1409
 
1405
- fetch_expr_offsets: dict[str, set] = {}
1410
+ fetch_expr_offsets: dict[str, set[int]] = {}
1406
1411
  for _name, arg_name, ife_offset in self.input_fetch_exprs:
1407
1412
  fetch_expr_offsets.setdefault(arg_name, set()).add(ife_offset)
1408
1413
 
@@ -1428,10 +1433,10 @@ class GenericScanKernel(GenericScanKernelBase):
1428
1433
  def generate_scan_kernel(
1429
1434
  self,
1430
1435
  max_wg_size: int,
1431
- arguments: list[DtypedArgument],
1436
+ arguments: Sequence[DtypedArgument],
1432
1437
  input_expr: str,
1433
1438
  is_segment_start_expr: str | None,
1434
- input_fetch_exprs: list[tuple[str, str, int]],
1439
+ input_fetch_exprs: Sequence[tuple[str, str, int]],
1435
1440
  is_first_level: bool,
1436
1441
  store_segment_start_flags: bool,
1437
1442
  k_group_size: int,
@@ -1442,7 +1447,7 @@ class GenericScanKernel(GenericScanKernelBase):
1442
1447
  wg_size = _round_down_to_power_of_2(
1443
1448
  min(max_wg_size, 256))
1444
1449
 
1445
- kernel_name = self.code_variables["name_prefix"]
1450
+ kernel_name = cast("str", self.code_variables["name_prefix"])
1446
1451
  if is_first_level:
1447
1452
  kernel_name += "_lev1"
1448
1453
  else: