PyPI - pyopencl - Versions diffs - 2025.1__cp310-cp310-win_amd64.whl → 2025.2.2__cp310-cp310-win_amd64.whl - Mend

pyopencl 2025.1__cp310-cp310-win_amd64.whl → 2025.2.2__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pyopencl might be problematic. Click here for more details.

Files changed (35) hide show

pyopencl/__init__.py +582 -997
pyopencl/_cl.cp310-win_amd64.pyd +0 -0
pyopencl/_cl.pyi +2006 -0
pyopencl/_cluda.py +3 -0
pyopencl/_monkeypatch.py +1063 -0
pyopencl/_mymako.py +3 -0
pyopencl/algorithm.py +29 -24
pyopencl/array.py +300 -255
pyopencl/bitonic_sort.py +5 -2
pyopencl/bitonic_sort_templates.py +3 -0
pyopencl/cache.py +5 -5
pyopencl/capture_call.py +31 -8
pyopencl/characterize/__init__.py +26 -19
pyopencl/characterize/performance.py +3 -0
pyopencl/clmath.py +2 -0
pyopencl/clrandom.py +3 -0
pyopencl/cltypes.py +67 -2
pyopencl/compyte/.basedpyright/baseline.json +1272 -0
pyopencl/compyte/array.py +36 -9
pyopencl/compyte/dtypes.py +61 -29
pyopencl/compyte/pyproject.toml +17 -22
pyopencl/elementwise.py +13 -10
pyopencl/invoker.py +13 -17
pyopencl/ipython_ext.py +2 -0
pyopencl/py.typed +0 -0
pyopencl/reduction.py +72 -43
pyopencl/scan.py +31 -30
pyopencl/tools.py +128 -90
pyopencl/typing.py +57 -0
pyopencl/version.py +2 -0
{pyopencl-2025.1.dist-info → pyopencl-2025.2.2.dist-info}/METADATA +11 -10
pyopencl-2025.2.2.dist-info/RECORD +47 -0
{pyopencl-2025.1.dist-info → pyopencl-2025.2.2.dist-info}/WHEEL +1 -1
pyopencl-2025.1.dist-info/RECORD +0 -42
{pyopencl-2025.1.dist-info → pyopencl-2025.2.2.dist-info}/licenses/LICENSE +0 -0

pyopencl/reduction.py CHANGED Viewed

@@ -1,4 +1,6 @@
 """Computation of reductions on vectors."""
+from __future__ import annotations
 __copyright__ = "Copyright (C) 2010 Andreas Kloeckner"
@@ -28,12 +30,14 @@ Based on code/ideas by Mark Harris <mharris@nvidia.com>.
 None of the original source code remains.
 """
+import builtins
 from dataclasses import dataclass
-from typing import Any, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Literal, cast, overload
 import numpy as np
 import pyopencl as cl
+import pyopencl.array as cl_array
 from pyopencl.tools import (
     DtypedArgument,
     KernelTemplateBase,
@@ -43,6 +47,10 @@ from pyopencl.tools import (
 )
+if TYPE_CHECKING:
+    from pyopencl.typing import Allocator
 # {{{ kernel source
 KERNEL = r"""//CL//
@@ -133,7 +141,7 @@ class _ReductionInfo:
     program: cl.Program
     kernel: cl.Kernel
-    arg_types: List[DtypedArgument]
+    arg_types: list[DtypedArgument]
 def _get_reduction_source(
@@ -143,12 +151,12 @@ def _get_reduction_source(
         neutral: str,
         reduce_expr: str,
         map_expr: str,
-        parsed_args: List[DtypedArgument],
+        parsed_args: list[DtypedArgument],
         name: str = "reduce_kernel",
         preamble: str = "",
         arg_prep: str = "",
-        device: Optional[cl.Device] = None,
-        max_group_size: Optional[int] = None) -> Tuple[str, int]:
+        device: cl.Device | None = None,
+        max_group_size: int | None = None) -> tuple[str, int]:
     if device is not None:
         devices = [device]
@@ -209,13 +217,13 @@ def get_reduction_kernel(
         dtype_out: Any,
         neutral: str,
         reduce_expr: str,
-        map_expr: Optional[str] = None,
-        arguments: Optional[List[DtypedArgument]] = None,
+        map_expr: str | None = None,
+        arguments: list[DtypedArgument] | None = None,
         name: str = "reduce_kernel",
         preamble: str = "",
-        device: Optional[cl.Device] = None,
+        device: cl.Device | None = None,
         options: Any = None,
-        max_group_size: Optional[int] = None) -> _ReductionInfo:
+        max_group_size: int | None = None) -> _ReductionInfo:
     if stage not in (1, 2):
         raise ValueError(f"unknown stage index: '{stage}'")
@@ -308,8 +316,8 @@ class ReductionKernel:
             dtype_out: Any,
             neutral: str,
             reduce_expr: str,
-            map_expr: Optional[str] = None,
-            arguments: Optional[Union[str, List[DtypedArgument]]] = None,
+            map_expr: str | None = None,
+            arguments: str | list[DtypedArgument] | None = None,
             name: str = "reduce_kernel",
             options: Any = None,
             preamble: str = "") -> None:
@@ -349,7 +357,40 @@ class ReductionKernel:
                 name=f"{name}_stage2", options=options, preamble=preamble,
                 max_group_size=max_group_size)
-    def __call__(self, *args: Any, **kwargs: Any) -> cl.Event:
+    @overload
+    def __call__(self,
+                *args: object,
+                return_event: Literal[True],
+                queue: cl.CommandQueue | None = None,
+                allocator: Allocator | None = None,
+                wait_for: cl.WaitList = None,
+                out: cl_array.Array | None = None,
+                range: slice | None = None,
+                slice: slice | None = None
+            ) ->  tuple[cl_array.Array, cl.Event]: ...
+    @overload
+    def __call__(self,
+                *args: object,
+                return_event: Literal[False],
+                queue: cl.CommandQueue | None = None,
+                allocator: Allocator | None = None,
+                wait_for: cl.WaitList = None,
+                out: cl_array.Array | None = None,
+                range: slice | None = None,
+                slice: slice | None = None
+            ) -> cl_array.Array: ...
+    def __call__(self,
+                *args: object,
+                return_event: bool = False,
+                queue: cl.CommandQueue | None = None,
+                allocator: Allocator | None = None,
+                wait_for: cl.WaitList = None,
+                out: cl_array.Array | None = None,
+                range: slice | None = None,
+                slice: slice | None = None
+            ) -> cl_array.Array | tuple[cl_array.Array, cl.Event]:
         """Invoke the generated kernel.
         |explain-waitfor|
@@ -388,18 +429,6 @@ class ReductionKernel:
             ``(scalar_array, event)``.
         """
-        queue = kwargs.pop("queue", None)
-        allocator = kwargs.pop("allocator", None)
-        wait_for = kwargs.pop("wait_for", None)
-        return_event = kwargs.pop("return_event", False)
-        out = kwargs.pop("out", None)
-        range_ = kwargs.pop("range", None)
-        slice_ = kwargs.pop("slice", None)
-        if kwargs:
-            raise TypeError("invalid keyword argument to reduction kernel")
         if wait_for is None:
             wait_for = []
         else:
@@ -413,13 +442,14 @@ class ReductionKernel:
         while True:
             invocation_args = []
-            vectors = []
+            vectors: list[cl_array.Array] = []
             array_empty = empty
             from pyopencl.tools import VectorArg
-            for arg, arg_tp in zip(args, stage_inf.arg_types):
+            for arg, arg_tp in zip(args, stage_inf.arg_types, strict=True):
                 if isinstance(arg_tp, VectorArg):
+                    assert isinstance(arg, cl_array.Array)
                     array_empty = arg.__class__
                     if not arg.flags.forc:
                         raise RuntimeError(
@@ -441,31 +471,30 @@ class ReductionKernel:
             # {{{ range/slice processing
-            if range_ is not None:
-                if slice_ is not None:
+            if range is not None:
+                if slice is not None:
                     raise TypeError("may not specify both range and slice "
                             "keyword arguments")
             else:
-                if slice_ is None:
-                    slice_ = slice(None)
+                if slice is None:
+                    slice = builtins.slice(None)
                 if repr_vec is None:
                     raise TypeError(
                             "must have vector argument when range is not specified")
-                range_ = slice(*slice_.indices(repr_vec.size))
+                range = builtins.slice(*slice.indices(repr_vec.size))
-            assert range_ is not None
+            assert range is not None
-            start = range_.start
+            start = cast("int | None", range.start)
             if start is None:
                 start = 0
-            if range_.step is None:
+            step = cast("int | None", range.step)
+            if step is None:
                 step = 1
-            else:
-                step = range_.step
-            sz = abs(range_.stop - start)//step
+            sz = abs(cast("int", range.stop) - start) //step
             # }}}
@@ -502,7 +531,7 @@ class ReductionKernel:
                 macrogroup_size = group_count*stage_inf.group_size
                 seq_count = (sz + macrogroup_size - 1) // macrogroup_size
-            size_args = [start, step, range_.stop, seq_count, sz]
+            size_args = [start, step, range.stop, seq_count, sz]
             if group_count == 1 and out is not None:
                 result = out
@@ -534,7 +563,7 @@ class ReductionKernel:
                 stage_inf = self.stage_2_inf
                 args = (result, *stage1_args)
-                range_ = slice_ = None
+                range = slice = None
 # }}}
@@ -544,12 +573,12 @@ class ReductionKernel:
 class ReductionTemplate(KernelTemplateBase):
     def __init__(
             self,
-            arguments: Union[str, List[DtypedArgument]],
+            arguments: str | list[DtypedArgument],
             neutral: str,
             reduce_expr: str,
-            map_expr: Optional[str] = None,
-            is_segment_start_expr: Optional[str] = None,
-            input_fetch_exprs: Optional[List[Tuple[str, str, int]]] = None,
+            map_expr: str | None = None,
+            is_segment_start_expr: str | None = None,
+            input_fetch_exprs: list[tuple[str, str, int]] | None = None,
             name_prefix: str = "reduce",
             preamble: str = "",
             template_processor: Any = None) -> None:

pyopencl/scan.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Scan primitive."""
+from __future__ import annotations
 __copyright__ = """
@@ -25,7 +26,7 @@ Derived from code within the Thrust project, https://github.com/NVIDIA/thrust
 import logging
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Set, Tuple, Union
+from typing import Any
 import numpy as np
@@ -33,7 +34,7 @@ from pytools.persistent_dict import WriteOncePersistentDict
 import pyopencl as cl
 import pyopencl._mymako as mako
-import pyopencl.array
+import pyopencl.array as cl_array
 from pyopencl._cluda import CLUDA_PREAMBLE
 from pyopencl.tools import (
     DtypedArgument,
@@ -848,7 +849,7 @@ def _make_template(s: str):
     import re
     leftovers = set()
-    def replace_id(match: "re.Match") -> str:
+    def replace_id(match: re.Match) -> str:
         # avoid name clashes with user code by adding 'psc_' prefix to
         # identifiers.
@@ -874,11 +875,11 @@ def _make_template(s: str):
 class _GeneratedScanKernelInfo:
     scan_src: str
     kernel_name: str
-    scalar_arg_dtypes: List[Optional[np.dtype]]
+    scalar_arg_dtypes: list[np.dtype | None]
     wg_size: int
     k_group_size: int
-    def build(self, context: cl.Context, options: Any) -> "_BuiltScanKernelInfo":
+    def build(self, context: cl.Context, options: Any) -> _BuiltScanKernelInfo:
         program = cl.Program(context, self.scan_src).build(options)
         kernel = getattr(program, self.kernel_name)
         kernel.set_scalar_arg_dtypes(self.scalar_arg_dtypes)
@@ -899,12 +900,12 @@ class _BuiltScanKernelInfo:
 class _GeneratedFinalUpdateKernelInfo:
     source: str
     kernel_name: str
-    scalar_arg_dtypes: List[Optional[np.dtype]]
+    scalar_arg_dtypes: list[np.dtype | None]
     update_wg_size: int
     def build(self,
               context: cl.Context,
-              options: Any) -> "_BuiltFinalUpdateKernelInfo":
+              options: Any) -> _BuiltFinalUpdateKernelInfo:
         program = cl.Program(context, self.source).build(options)
         kernel = getattr(program, self.kernel_name)
         kernel.set_scalar_arg_dtypes(self.scalar_arg_dtypes)
@@ -930,18 +931,18 @@ class GenericScanKernelBase(ABC):
             self,
             ctx: cl.Context,
             dtype: Any,
-            arguments: Union[str, List[DtypedArgument]],
+            arguments: str | list[DtypedArgument],
             input_expr: str,
             scan_expr: str,
-            neutral: Optional[str],
+            neutral: str | None,
             output_statement: str,
-            is_segment_start_expr: Optional[str] = None,
-            input_fetch_exprs: Optional[List[Tuple[str, str, int]]] = None,
+            is_segment_start_expr: str | None = None,
+            input_fetch_exprs: list[tuple[str, str, int]] | None = None,
             index_dtype: Any = None,
             name_prefix: str = "scan",
             options: Any = None,
             preamble: str = "",
-            devices: Optional[cl.Device] = None) -> None:
+            devices: cl.Device | None = None) -> None:
         """
         :arg ctx: a :class:`pyopencl.Context` within which the code
             for this scan kernel will be generated.
@@ -1142,7 +1143,7 @@ class GenericScanKernelBase(ABC):
 if not cl._PYOPENCL_NO_CACHE:
     generic_scan_kernel_cache: WriteOncePersistentDict[Any,
-                    Tuple[_GeneratedScanKernelInfo, _GeneratedScanKernelInfo,
+                    tuple[_GeneratedScanKernelInfo, _GeneratedScanKernelInfo,
                     _GeneratedFinalUpdateKernelInfo]] = \
         WriteOncePersistentDict(
             "pyopencl-generated-scan-kernel-cache-v1",
@@ -1329,7 +1330,7 @@ class GenericScanKernel(GenericScanKernelBase):
             VectorArg(self.dtype, "interval_sums"),
             ]
-        second_level_build_kwargs: Dict[str, Optional[str]] = {}
+        second_level_build_kwargs: dict[str, str | None] = {}
         if self.is_segmented:
             second_level_arguments.append(
                     VectorArg(self.index_dtype,
@@ -1401,7 +1402,7 @@ class GenericScanKernel(GenericScanKernelBase):
         for arg in self.parsed_args:
             arg_dtypes[arg.name] = arg.dtype
-        fetch_expr_offsets: Dict[str, Set] = {}
+        fetch_expr_offsets: dict[str, set] = {}
         for _name, arg_name, ife_offset in self.input_fetch_exprs:
             fetch_expr_offsets.setdefault(arg_name, set()).add(ife_offset)
@@ -1427,10 +1428,10 @@ class GenericScanKernel(GenericScanKernelBase):
     def generate_scan_kernel(
             self,
             max_wg_size: int,
-            arguments: List[DtypedArgument],
+            arguments: list[DtypedArgument],
             input_expr: str,
-            is_segment_start_expr: Optional[str],
-            input_fetch_exprs: List[Tuple[str, str, int]],
+            is_segment_start_expr: str | None,
+            input_fetch_exprs: list[tuple[str, str, int]],
             is_first_level: bool,
             store_segment_start_flags: bool,
             k_group_size: int,
@@ -1527,7 +1528,7 @@ class GenericScanKernel(GenericScanKernelBase):
             return cl.enqueue_marker(queue, wait_for=wait_for)
         data_args = []
-        for arg_descr, arg_val in zip(self.parsed_args, args):
+        for arg_descr, arg_val in zip(self.parsed_args, args, strict=True):
             from pyopencl.tools import VectorArg
             if isinstance(arg_descr, VectorArg):
                 data_args.append(arg_val.base_data)
@@ -1552,16 +1553,16 @@ class GenericScanKernel(GenericScanKernelBase):
         # {{{ allocate some buffers
-        interval_results = cl.array.empty(queue,
+        interval_results = cl_array.empty(queue,
                 num_intervals, dtype=self.dtype,
                 allocator=allocator)
-        partial_scan_buffer = cl.array.empty(
+        partial_scan_buffer = cl_array.empty(
                 queue, n, dtype=self.dtype,
                 allocator=allocator)
         if self.store_segment_start_flags:
-            segment_start_flags = cl.array.empty(
+            segment_start_flags = cl_array.empty(
                     queue, n, dtype=np.bool_,
                     allocator=allocator)
@@ -1575,7 +1576,7 @@ class GenericScanKernel(GenericScanKernelBase):
                 ]
         if self.is_segmented:
-            first_segment_start_in_interval = cl.array.empty(queue,
+            first_segment_start_in_interval = cl_array.empty(queue,
                     num_intervals, dtype=self.index_dtype,
                     allocator=allocator)
             scan1_args.append(first_segment_start_in_interval.data)
@@ -1755,13 +1756,13 @@ class GenericDebugScanKernel(GenericScanKernelBase):
         if n is None:
             n, = first_array.shape
-        scan_tmp = cl.array.empty(queue,
+        scan_tmp = cl_array.empty(queue,
                 n, dtype=self.dtype,
                 allocator=allocator)
         data_args = [scan_tmp.data]
         from pyopencl.tools import VectorArg
-        for arg_descr, arg_val in zip(self.parsed_args, args):
+        for arg_descr, arg_val in zip(self.parsed_args, args, strict=True):
             if isinstance(arg_descr, VectorArg):
                 data_args.append(arg_val.base_data)
                 if arg_descr.with_offset:
@@ -1806,7 +1807,7 @@ class _LegacyScanKernelBase(GenericScanKernel):
             output_ary = input_ary
         if isinstance(output_ary, (str, str)) and output_ary == "new":
-            output_ary = cl.array.empty_like(input_ary, allocator=allocator)
+            output_ary = cl_array.empty_like(input_ary, allocator=allocator)
         if input_ary.shape != output_ary.shape:
             raise ValueError("input and output must have the same shape")
@@ -1841,13 +1842,13 @@ class ExclusiveScanKernel(_LegacyScanKernelBase):
 class ScanTemplate(KernelTemplateBase):
     def __init__(
             self,
-            arguments: Union[str, List[DtypedArgument]],
+            arguments: str | list[DtypedArgument],
             input_expr: str,
             scan_expr: str,
-            neutral: Optional[str],
+            neutral: str | None,
             output_statement: str,
-            is_segment_start_expr: Optional[str] = None,
-            input_fetch_exprs: Optional[List[Tuple[str, str, int]]] = None,
+            is_segment_start_expr: str | None = None,
+            input_fetch_exprs: list[tuple[str, str, int]] | None = None,
             name_prefix: str = "scan",
             preamble: str = "",
             template_processor: Any = None) -> None: