pyopencl 2025.2.5__cp312-cp312-macosx_11_0_arm64.whl → 2025.2.7__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/_cl.cpython-312-darwin.so +0 -0
- pyopencl/_cl.pyi +10 -7
- pyopencl/_monkeypatch.py +40 -5
- pyopencl/algorithm.py +1 -1
- pyopencl/array.py +214 -125
- pyopencl/cache.py +1 -1
- pyopencl/characterize/__init__.py +2 -4
- pyopencl/clmath.py +0 -1
- pyopencl/cltypes.py +42 -27
- pyopencl/compyte/array.py +9 -39
- pyopencl/compyte/dtypes.py +9 -11
- pyopencl/compyte/pyproject.toml +0 -3
- pyopencl/elementwise.py +223 -113
- pyopencl/scan.py +30 -25
- pyopencl/tools.py +327 -212
- {pyopencl-2025.2.5.dist-info → pyopencl-2025.2.7.dist-info}/METADATA +3 -4
- {pyopencl-2025.2.5.dist-info → pyopencl-2025.2.7.dist-info}/RECORD +19 -19
- {pyopencl-2025.2.5.dist-info → pyopencl-2025.2.7.dist-info}/WHEEL +1 -1
- {pyopencl-2025.2.5.dist-info → pyopencl-2025.2.7.dist-info}/licenses/LICENSE +0 -0
pyopencl/scan.py
CHANGED
|
@@ -26,7 +26,7 @@ Derived from code within the Thrust project, https://github.com/NVIDIA/thrust
|
|
|
26
26
|
import logging
|
|
27
27
|
from abc import ABC, abstractmethod
|
|
28
28
|
from dataclasses import dataclass
|
|
29
|
-
from typing import Any
|
|
29
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
30
30
|
|
|
31
31
|
import numpy as np
|
|
32
32
|
|
|
@@ -49,6 +49,10 @@ from pyopencl.tools import (
|
|
|
49
49
|
)
|
|
50
50
|
|
|
51
51
|
|
|
52
|
+
if TYPE_CHECKING:
|
|
53
|
+
from collections.abc import Sequence
|
|
54
|
+
|
|
55
|
+
|
|
52
56
|
logger = logging.getLogger(__name__)
|
|
53
57
|
|
|
54
58
|
|
|
@@ -868,7 +872,7 @@ def _make_template(s: str):
|
|
|
868
872
|
warn("Leftover words in identifier prefixing: " + " ".join(leftovers),
|
|
869
873
|
stacklevel=3)
|
|
870
874
|
|
|
871
|
-
return mako.template.Template(s, strict_undefined=True)
|
|
875
|
+
return mako.template.Template(s, strict_undefined=True)
|
|
872
876
|
|
|
873
877
|
|
|
874
878
|
@dataclass(frozen=True)
|
|
@@ -900,7 +904,7 @@ class _BuiltScanKernelInfo:
|
|
|
900
904
|
class _GeneratedFinalUpdateKernelInfo:
|
|
901
905
|
source: str
|
|
902
906
|
kernel_name: str
|
|
903
|
-
scalar_arg_dtypes:
|
|
907
|
+
scalar_arg_dtypes: Sequence[np.dtype | None]
|
|
904
908
|
update_wg_size: int
|
|
905
909
|
|
|
906
910
|
def build(self,
|
|
@@ -942,7 +946,7 @@ class GenericScanKernelBase(ABC):
|
|
|
942
946
|
name_prefix: str = "scan",
|
|
943
947
|
options: Any = None,
|
|
944
948
|
preamble: str = "",
|
|
945
|
-
devices: cl.Device | None = None) -> None:
|
|
949
|
+
devices: Sequence[cl.Device] | None = None) -> None:
|
|
946
950
|
"""
|
|
947
951
|
:arg ctx: a :class:`pyopencl.Context` within which the code
|
|
948
952
|
for this scan kernel will be generated.
|
|
@@ -1031,7 +1035,8 @@ class GenericScanKernelBase(ABC):
|
|
|
1031
1035
|
if input_fetch_exprs is None:
|
|
1032
1036
|
input_fetch_exprs = []
|
|
1033
1037
|
|
|
1034
|
-
self.context = ctx
|
|
1038
|
+
self.context: cl.Context = ctx
|
|
1039
|
+
self.dtype: np.dtype[Any]
|
|
1035
1040
|
dtype = self.dtype = np.dtype(dtype)
|
|
1036
1041
|
|
|
1037
1042
|
if neutral is None:
|
|
@@ -1044,35 +1049,35 @@ class GenericScanKernelBase(ABC):
|
|
|
1044
1049
|
if dtype.itemsize % 4 != 0:
|
|
1045
1050
|
raise TypeError("scan value type must have size divisible by 4 bytes")
|
|
1046
1051
|
|
|
1047
|
-
self.index_dtype = np.dtype(index_dtype)
|
|
1052
|
+
self.index_dtype: np.dtype[np.integer] = np.dtype(index_dtype)
|
|
1048
1053
|
if np.iinfo(self.index_dtype).min >= 0:
|
|
1049
1054
|
raise TypeError("index_dtype must be signed")
|
|
1050
1055
|
|
|
1051
1056
|
if devices is None:
|
|
1052
1057
|
devices = ctx.devices
|
|
1053
|
-
self.devices = devices
|
|
1058
|
+
self.devices: Sequence[cl.Device] = devices
|
|
1054
1059
|
self.options = options
|
|
1055
1060
|
|
|
1056
1061
|
from pyopencl.tools import parse_arg_list
|
|
1057
|
-
self.parsed_args = parse_arg_list(arguments)
|
|
1062
|
+
self.parsed_args: Sequence[DtypedArgument] = parse_arg_list(arguments)
|
|
1058
1063
|
from pyopencl.tools import VectorArg
|
|
1059
|
-
self.first_array_idx = next(
|
|
1064
|
+
self.first_array_idx: int = next(
|
|
1060
1065
|
i for i, arg in enumerate(self.parsed_args)
|
|
1061
1066
|
if isinstance(arg, VectorArg))
|
|
1062
1067
|
|
|
1063
|
-
self.input_expr = input_expr
|
|
1068
|
+
self.input_expr: str = input_expr
|
|
1064
1069
|
|
|
1065
|
-
self.is_segment_start_expr = is_segment_start_expr
|
|
1066
|
-
self.is_segmented = is_segment_start_expr is not None
|
|
1067
|
-
if
|
|
1070
|
+
self.is_segment_start_expr: str | None = is_segment_start_expr
|
|
1071
|
+
self.is_segmented: bool = is_segment_start_expr is not None
|
|
1072
|
+
if is_segment_start_expr is not None:
|
|
1068
1073
|
is_segment_start_expr = _process_code_for_macro(is_segment_start_expr)
|
|
1069
1074
|
|
|
1070
|
-
self.output_statement = output_statement
|
|
1075
|
+
self.output_statement: str = output_statement
|
|
1071
1076
|
|
|
1072
1077
|
for _name, _arg_name, ife_offset in input_fetch_exprs:
|
|
1073
1078
|
if ife_offset not in [0, -1]:
|
|
1074
1079
|
raise RuntimeError("input_fetch_expr offsets must either be 0 or -1")
|
|
1075
|
-
self.input_fetch_exprs = input_fetch_exprs
|
|
1080
|
+
self.input_fetch_exprs: Sequence[tuple[str, str, int]] = input_fetch_exprs
|
|
1076
1081
|
|
|
1077
1082
|
arg_dtypes = {}
|
|
1078
1083
|
arg_ctypes = {}
|
|
@@ -1080,7 +1085,7 @@ class GenericScanKernelBase(ABC):
|
|
|
1080
1085
|
arg_dtypes[arg.name] = arg.dtype
|
|
1081
1086
|
arg_ctypes[arg.name] = dtype_to_ctype(arg.dtype)
|
|
1082
1087
|
|
|
1083
|
-
self.name_prefix = name_prefix
|
|
1088
|
+
self.name_prefix: str = name_prefix
|
|
1084
1089
|
|
|
1085
1090
|
# {{{ set up shared code dict
|
|
1086
1091
|
|
|
@@ -1128,8 +1133,8 @@ class GenericScanKernelBase(ABC):
|
|
|
1128
1133
|
|
|
1129
1134
|
# }}}
|
|
1130
1135
|
|
|
1131
|
-
self.use_lookbehind_update = "prev_item" in self.output_statement
|
|
1132
|
-
self.store_segment_start_flags = (
|
|
1136
|
+
self.use_lookbehind_update: bool = "prev_item" in self.output_statement
|
|
1137
|
+
self.store_segment_start_flags: bool = (
|
|
1133
1138
|
self.is_segmented and self.use_lookbehind_update)
|
|
1134
1139
|
|
|
1135
1140
|
self.finish_setup()
|
|
@@ -1233,8 +1238,8 @@ class GenericScanKernel(GenericScanKernelBase):
|
|
|
1233
1238
|
# not sure where these go, but roughly this much seems unavailable.
|
|
1234
1239
|
avail_local_mem -= 0x400
|
|
1235
1240
|
|
|
1236
|
-
is_cpu = self.devices[0].type & cl.device_type.CPU
|
|
1237
|
-
is_gpu = self.devices[0].type & cl.device_type.GPU
|
|
1241
|
+
is_cpu = bool(self.devices[0].type & cl.device_type.CPU)
|
|
1242
|
+
is_gpu = bool(self.devices[0].type & cl.device_type.GPU)
|
|
1238
1243
|
|
|
1239
1244
|
if is_cpu:
|
|
1240
1245
|
# (about the widest vector a CPU can support, also taking
|
|
@@ -1260,7 +1265,7 @@ class GenericScanKernel(GenericScanKernelBase):
|
|
|
1260
1265
|
# k_group_size should be a power of two because of in-kernel
|
|
1261
1266
|
# division by that number.
|
|
1262
1267
|
|
|
1263
|
-
solutions = []
|
|
1268
|
+
solutions: list[tuple[int, int, int]] = []
|
|
1264
1269
|
for k_exp in range(0, 9):
|
|
1265
1270
|
for wg_size in range(wg_size_multiples, max_scan_wg_size+1,
|
|
1266
1271
|
wg_size_multiples):
|
|
@@ -1402,7 +1407,7 @@ class GenericScanKernel(GenericScanKernelBase):
|
|
|
1402
1407
|
for arg in self.parsed_args:
|
|
1403
1408
|
arg_dtypes[arg.name] = arg.dtype
|
|
1404
1409
|
|
|
1405
|
-
fetch_expr_offsets: dict[str, set] = {}
|
|
1410
|
+
fetch_expr_offsets: dict[str, set[int]] = {}
|
|
1406
1411
|
for _name, arg_name, ife_offset in self.input_fetch_exprs:
|
|
1407
1412
|
fetch_expr_offsets.setdefault(arg_name, set()).add(ife_offset)
|
|
1408
1413
|
|
|
@@ -1428,10 +1433,10 @@ class GenericScanKernel(GenericScanKernelBase):
|
|
|
1428
1433
|
def generate_scan_kernel(
|
|
1429
1434
|
self,
|
|
1430
1435
|
max_wg_size: int,
|
|
1431
|
-
arguments:
|
|
1436
|
+
arguments: Sequence[DtypedArgument],
|
|
1432
1437
|
input_expr: str,
|
|
1433
1438
|
is_segment_start_expr: str | None,
|
|
1434
|
-
input_fetch_exprs:
|
|
1439
|
+
input_fetch_exprs: Sequence[tuple[str, str, int]],
|
|
1435
1440
|
is_first_level: bool,
|
|
1436
1441
|
store_segment_start_flags: bool,
|
|
1437
1442
|
k_group_size: int,
|
|
@@ -1442,7 +1447,7 @@ class GenericScanKernel(GenericScanKernelBase):
|
|
|
1442
1447
|
wg_size = _round_down_to_power_of_2(
|
|
1443
1448
|
min(max_wg_size, 256))
|
|
1444
1449
|
|
|
1445
|
-
kernel_name = self.code_variables["name_prefix"]
|
|
1450
|
+
kernel_name = cast("str", self.code_variables["name_prefix"])
|
|
1446
1451
|
if is_first_level:
|
|
1447
1452
|
kernel_name += "_lev1"
|
|
1448
1453
|
else:
|