PyPI - pyopencl - Versions diffs - 2024.2.6__cp310-cp310-win_amd64.whl → 2024.3__cp310-cp310-win_amd64.whl - Mend

pyopencl 2024.2.6__cp310-cp310-win_amd64.whl → 2024.3__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pyopencl might be problematic. Click here for more details.

Files changed (38) hide show

pyopencl/__init__.py +127 -122
pyopencl/_cl.cp310-win_amd64.pyd +0 -0
pyopencl/_mymako.py +3 -3
pyopencl/algorithm.py +10 -7
pyopencl/array.py +50 -40
pyopencl/bitonic_sort.py +3 -1
pyopencl/bitonic_sort_templates.py +1 -1
pyopencl/cache.py +23 -22
pyopencl/capture_call.py +5 -4
pyopencl/clrandom.py +1 -0
pyopencl/compyte/dtypes.py +4 -4
pyopencl/compyte/pyproject.toml +54 -0
pyopencl/elementwise.py +9 -2
pyopencl/invoker.py +11 -9
pyopencl/ipython_ext.py +1 -1
pyopencl/reduction.py +16 -10
pyopencl/scan.py +38 -22
pyopencl/tools.py +23 -13
{pyopencl-2024.2.6.dist-info → pyopencl-2024.3.dist-info}/METADATA +11 -8
pyopencl-2024.3.dist-info/RECORD +42 -0
{pyopencl-2024.2.6.dist-info → pyopencl-2024.3.dist-info}/WHEEL +1 -1
pyopencl/compyte/.git +0 -1
pyopencl/compyte/ndarray/Makefile +0 -31
pyopencl/compyte/ndarray/__init__.py +0 -0
pyopencl/compyte/ndarray/gen_elemwise.py +0 -1907
pyopencl/compyte/ndarray/gen_reduction.py +0 -1511
pyopencl/compyte/ndarray/gpu_ndarray.h +0 -35
pyopencl/compyte/ndarray/pygpu_language.h +0 -207
pyopencl/compyte/ndarray/pygpu_language_cuda.cu +0 -622
pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +0 -317
pyopencl/compyte/ndarray/pygpu_ndarray.cpp +0 -1546
pyopencl/compyte/ndarray/pygpu_ndarray.h +0 -71
pyopencl/compyte/ndarray/pygpu_ndarray_object.h +0 -232
pyopencl/compyte/ndarray/setup_opencl.py +0 -101
pyopencl/compyte/ndarray/test_gpu_elemwise.py +0 -411
pyopencl/compyte/ndarray/test_gpu_ndarray.py +0 -487
pyopencl-2024.2.6.dist-info/RECORD +0 -56
{pyopencl-2024.2.6.dist-info → pyopencl-2024.3.dist-info}/licenses/LICENSE +0 -0

pyopencl/array.py CHANGED Viewed

@@ -42,9 +42,12 @@ import pyopencl.elementwise as elementwise
 from pyopencl import cltypes
 from pyopencl.characterize import has_double_support
 from pyopencl.compyte.array import (
-    ArrayFlags as _ArrayFlags, as_strided as _as_strided,
-    c_contiguous_strides as _c_contiguous_strides, equal_strides as _equal_strides,
-    f_contiguous_strides as _f_contiguous_strides)
+    ArrayFlags as _ArrayFlags,
+    as_strided as _as_strided,
+    c_contiguous_strides as _c_contiguous_strides,
+    equal_strides as _equal_strides,
+    f_contiguous_strides as _f_contiguous_strides,
+)
 SCALAR_CLASSES = (Number, np.bool_, bool)
@@ -259,7 +262,7 @@ def _splay(device, n, kernel_specific_max_wg_size=None):
         group_count = max_groups
         work_items_per_group = max_work_items
-    #print "n:%d gc:%d wipg:%d" % (n, group_count, work_items_per_group)
+    # print("n:%d gc:%d wipg:%d" % (n, group_count, work_items_per_group))
     return (group_count*work_items_per_group,), (work_items_per_group,)
@@ -292,7 +295,7 @@ def elwise_kernel_runner(kernel_getter):
             queue.device)
         gs, ls = out._get_sizes(queue, work_group_info)
-        args = (out,) + args + (out.size,)
+        args = (out, *args, out.size)
         if ARRAY_KERNEL_EXEC_HOOK is not None:
             return ARRAY_KERNEL_EXEC_HOOK(  # pylint: disable=not-callable
                     knl, queue, gs, ls, *args, wait_for=wait_for)
@@ -587,7 +590,7 @@ class Array:
                 # FIXME It would be nice to check this. But it would require
                 # changing the allocator interface. Trust the user for now.
-                #assert allocator.context == context
+                # assert allocator.context == context
                 pass
             # Queue-less arrays do have a purpose in life.
@@ -608,11 +611,11 @@ class Array:
             try:
                 shape = tuple(shape)        # type: ignore[arg-type]
-            except TypeError:
+            except TypeError as err:
                 if not isinstance(shape, (int, np.integer)):
                     raise TypeError(
                         "shape must either be iterable or castable to an integer: "
-                        f"got a '{type(shape).__name__}'")
+                        f"got a '{type(shape).__name__}'") from err
                 shape = (shape,)
@@ -654,7 +657,7 @@ class Array:
             # }}}
             assert dtype != object, \
-                    "object arrays on the compute device are not allowed"
+                    "object arrays on the compute device are not allowed"  # noqa: E721
             assert isinstance(shape, tuple)
             assert isinstance(strides, tuple)
@@ -922,7 +925,7 @@ class Array:
                     "device-to-host transfers",
                     DeprecationWarning, stacklevel=2)
-        ary, event1 = self._get(queue=queue, ary=ary, async_=async_, **kwargs)
+        ary, _event1 = self._get(queue=queue, ary=ary, async_=async_, **kwargs)
         return ary
@@ -2047,9 +2050,10 @@ class Array:
         .. versionadded:: 2015.2
         """
-        new_shape = tuple([dim for dim in self.shape if dim > 1])
-        new_strides = tuple([self.strides[i]
-            for i, dim in enumerate(self.shape) if dim > 1])
+        new_shape = tuple(dim for dim in self.shape if dim > 1)
+        new_strides = tuple(
+            self.strides[i] for i, dim in enumerate(self.shape)
+            if dim > 1)
         return self._new_with_changes(
                 self.base_data, self.offset,
@@ -2595,14 +2599,16 @@ def multi_take(arrays, indices, out=None, queue=None):
                     cl.kernel_work_group_info.WORK_GROUP_SIZE,
                     queue.device))
-        wait_for_this = (indices.events
-            + builtins.sum((i.events for i in arrays[chunk_slice]), [])
-            + builtins.sum((o.events for o in out[chunk_slice]), []))
+        wait_for_this = (
+            *indices.events,
+            *[evt for i in arrays[chunk_slice] for evt in i.events],
+            *[evt for o in out[chunk_slice] for evt in o.events])
         evt = knl(queue, gs, ls,
                 indices.data,
-                *([o.data for o in out[chunk_slice]]
-                    + [i.data for i in arrays[chunk_slice]]
-                    + [indices.size]), wait_for=wait_for_this)
+                *[o.data for o in out[chunk_slice]],
+                *[i.data for i in arrays[chunk_slice]],
+                *[indices.size],
+                wait_for=wait_for_this)
         for o in out[chunk_slice]:
             o.add_event(evt)
@@ -2673,15 +2679,19 @@ def multi_take_put(arrays, dest_indices, src_indices, dest_shape=None,
                     cl.kernel_work_group_info.WORK_GROUP_SIZE,
                     queue.device))
-        wait_for_this = (dest_indices.events + src_indices.events
-            + builtins.sum((i.events for i in arrays[chunk_slice]), [])
-            + builtins.sum((o.events for o in out[chunk_slice]), []))
+        wait_for_this = (
+            *dest_indices.events,
+            *src_indices.events,
+            *[evt for i in arrays[chunk_slice] for evt in i.events],
+            *[evt for o in out[chunk_slice] for evt in o.events])
         evt = knl(queue, gs, ls,
-                *(list(out[chunk_slice])
-                    + [dest_indices, src_indices]
-                    + list(arrays[chunk_slice])
-                    + src_offsets_list[chunk_slice]
-                    + [src_indices.size]), wait_for=wait_for_this)
+                  *out[chunk_slice],
+                  dest_indices,
+                  src_indices,
+                  *arrays[chunk_slice],
+                  *src_offsets_list[chunk_slice],
+                  src_indices.size,
+                  wait_for=wait_for_this)
         for o in out[chunk_slice]:
             o.add_event(evt)
@@ -2750,16 +2760,16 @@ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None,
                     cl.kernel_work_group_info.WORK_GROUP_SIZE,
                     queue.device))
-        wait_for_this = (wait_for
-            + builtins.sum([i.events for i in arrays[chunk_slice]], [])
-            + builtins.sum([o.events for o in out[chunk_slice]], []))
+        wait_for_this = (
+            *wait_for,
+            *[evt for i in arrays[chunk_slice] for evt in i.events],
+            *[evt for o in out[chunk_slice] for evt in o.events])
         evt = knl(queue, gs, ls,
-                *(
-                    list(out[chunk_slice])
-                    + [dest_indices]
-                    + list(arrays[chunk_slice])
-                    + [use_fill_cla, array_lengths_cla, dest_indices.size]),
-                wait_for=wait_for_this)
+                  *out[chunk_slice],
+                  dest_indices,
+                  *arrays[chunk_slice],
+                  use_fill_cla, array_lengths_cla, dest_indices.size,
+                  wait_for=wait_for_this)
         for o in out[chunk_slice]:
             o.add_event(evt)
@@ -2874,7 +2884,7 @@ def hstack(arrays, queue=None):
     lead_shape = single_valued(ary.shape[:-1] for ary in arrays)
-    w = builtins.sum([ary.shape[-1] for ary in arrays])
+    w = builtins.sum(ary.shape[-1] for ary in arrays)
     if __debug__:
         if builtins.any(type(ary) != type(arrays[0])  # noqa: E721
@@ -2883,7 +2893,7 @@ def hstack(arrays, queue=None):
                  "an instance of the type of arrays[0]",
                  stacklevel=2)
-    result = arrays[0].__class__(queue, lead_shape+(w,), arrays[0].dtype,
+    result = arrays[0].__class__(queue, (*lead_shape, w), arrays[0].dtype,
                                  allocator=arrays[0].allocator)
     index = 0
     for ary in arrays:
@@ -3150,8 +3160,8 @@ def _logical_op(x1, x2, out, operator, queue=None):
         else:
             out[:] = np.logical_or(x1, x2)
     elif np.isscalar(x1) or np.isscalar(x2):
-        scalar_arg, = [x for x in (x1, x2) if np.isscalar(x)]
-        ary_arg, = [x for x in (x1, x2) if not np.isscalar(x)]
+        scalar_arg, = (x for x in (x1, x2) if np.isscalar(x))
+        ary_arg, = (x for x in (x1, x2) if not np.isscalar(x))
         queue = queue or ary_arg.queue
         allocator = ary_arg.allocator

pyopencl/bitonic_sort.py CHANGED Viewed

@@ -35,8 +35,10 @@ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 from functools import reduce
 from operator import mul
+from typing import ClassVar, Dict
 from mako.template import Template
 from pytools import memoize_method
 import pyopencl as cl
@@ -62,7 +64,7 @@ class BitonicSort:
     .. automethod:: __call__
     """
-    kernels_srcs = {
+    kernels_srcs: ClassVar[Dict[str, str]] = {
             "B2": _tmpl.ParallelBitonic_B2,
             "B4": _tmpl.ParallelBitonic_B4,
             "B8": _tmpl.ParallelBitonic_B8,

pyopencl/bitonic_sort_templates.py CHANGED Viewed

@@ -488,7 +488,7 @@ __kernel void run(__global const data_t * in,__global data_t * out,__local data_
   // Write output
   out[i] = aux[i];
 }
-"""         # noqa: E501
+"""
 # }}}

pyopencl/cache.py CHANGED Viewed

@@ -42,12 +42,14 @@ import hashlib
 new_hash = hashlib.md5
-def _erase_dir(dir):
+def _erase_dir(directory):
     from os import listdir, rmdir, unlink
     from os.path import join
-    for name in listdir(dir):
-        unlink(join(dir, name))
-    rmdir(dir)
+    for name in listdir(directory):
+        unlink(join(directory, name))
+    rmdir(directory)
 def update_checksum(checksum, obj):
@@ -213,7 +215,7 @@ def get_dependencies(src, include_path):
     _inner(src)
-    result = [(name,) + vals for name, vals in result.items()]
+    result = [(name, *vals) for name, vals in result.items()]
     result.sort()
     return result
@@ -266,7 +268,7 @@ def get_cache_key(device, options_bytes, src):
 def retrieve_from_cache(cache_dir, cache_key):
-    class _InvalidInfoFile(RuntimeError):
+    class _InvalidInfoFileError(RuntimeError):
         pass
     from os.path import isdir, join
@@ -290,18 +292,18 @@ def retrieve_from_cache(cache_dir, cache_key):
                 try:
                     info_file = open(info_path, "rb")
-                except OSError:
-                    raise _InvalidInfoFile()
+                except OSError as err:
+                    raise _InvalidInfoFileError() from err
                 try:
                     try:
                         info = load(info_file)
-                    except EOFError:
-                        raise _InvalidInfoFile()
+                    except EOFError as err:
+                        raise _InvalidInfoFileError() from err
                 finally:
                     info_file.close()
-            except _InvalidInfoFile:
+            except _InvalidInfoFileError:
                 mod_cache_dir_m.reset()
                 from warnings import warn
                 warn(
@@ -375,13 +377,13 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
         cache_result = retrieve_from_cache(cache_dir, cache_key)
         if cache_result is None:
-            logger.debug("build program: binary cache miss (key: %s)" % cache_key)
+            logger.debug("build program: binary cache miss (key: %s)", cache_key)
             to_be_built_indices.append(i)
             binaries.append(None)
             logs.append(None)
         else:
-            logger.debug("build program: binary cache hit (key: %s)" % cache_key)
+            logger.debug("build program: binary cache hit (key: %s)", cache_key)
             binary, log = cache_result
             binaries.append(binary)
@@ -410,8 +412,9 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
         src = src + "\n\n__constant int pyopencl_defeat_cache_%s = 0;" % (
                 uuid4().hex)
-        logger.debug("build program: start building program from source on %s"
-                % ", ".join(str(devices[i]) for i in to_be_built_indices))
+        logger.debug(
+                "build program: start building program from source on %s",
+                ", ".join(str(devices[i]) for i in to_be_built_indices))
         prg = _cl._Program(ctx, src)
         prg.build(options_bytes, [devices[i] for i in to_be_built_indices])
@@ -459,13 +462,11 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes,
                     binary_path = mod_cache_dir_m.sub("binary")
                     source_path = mod_cache_dir_m.sub("source.cl")
-                    outf = open(source_path, "wt")
-                    outf.write(src)
-                    outf.close()
+                    with open(source_path, "w") as outf:
+                        outf.write(src)
-                    outf = open(binary_path, "wb")
-                    outf.write(binary)
-                    outf.close()
+                    with open(binary_path, "wb") as outf:
+                        outf.write(binary)
                     from pickle import dump
                     info_file = open(info_path, "wb")
@@ -504,7 +505,7 @@ def create_built_program_from_source_cached(ctx, src, options_bytes, devices=Non
     except Exception as e:
         from pyopencl import Error
         build_program_failure = (isinstance(e, Error)
-                and e.code == _cl.status_code.BUILD_PROGRAM_FAILURE)  # noqa pylint:disable=no-member
+                and e.code == _cl.status_code.BUILD_PROGRAM_FAILURE)  # pylint:disable=no-member
         # Mac error on intel CPU driver: can't build from cached version.
         # If we get a build_program_failure from the cached version then

pyopencl/capture_call.py CHANGED Viewed

@@ -22,6 +22,7 @@ THE SOFTWARE.
 import numpy as np
 from pytools.py_codegen import Indentation, PythonCodeGenerator
 import pyopencl as cl
@@ -30,8 +31,8 @@ import pyopencl as cl
 def capture_kernel_call(kernel, output_file, queue, g_size, l_size, *args, **kwargs):
     try:
         source = kernel._source
-    except AttributeError:
-        raise RuntimeError("cannot capture call, kernel source not available")
+    except AttributeError as err:
+        raise RuntimeError("cannot capture call, kernel source not available") from err
     if source is None:
         raise RuntimeError("cannot capture call, kernel source not available")
@@ -91,9 +92,9 @@ def capture_kernel_call(kernel, output_file, queue, g_size, l_size, *args, **kwa
             else:
                 try:
                     arg_buf = memoryview(arg)
-                except Exception:
+                except Exception as err:
                     raise RuntimeError("cannot capture: "
-                            "unsupported arg nr %d (0-based)" % i)
+                            "unsupported arg nr %d (0-based)" % i) from err
                 arg_data.append(("arg%d_data" % i, arg_buf))
                 kernel_args.append("decompress(b64decode(arg%d_data))" % i)

pyopencl/clrandom.py CHANGED Viewed

@@ -50,6 +50,7 @@ for some documentation if you're planning on using Random123 directly.
 # }}}
 import numpy as np
 from pytools import memoize_method
 import pyopencl as cl

pyopencl/compyte/dtypes.py CHANGED Viewed

@@ -29,7 +29,7 @@ OTHER DEALINGS IN THE SOFTWARE.
 import numpy as np
-class TypeNameNotKnown(RuntimeError):
+class TypeNameNotKnown(RuntimeError):  # noqa: N818
     pass
@@ -89,7 +89,7 @@ class DTypeRegistry:
         if not existed:
             self.dtype_to_name[dtype] = c_names[0]
-        if not str(dtype) in self.dtype_to_name:
+        if str(dtype) not in self.dtype_to_name:
             self.dtype_to_name[str(dtype)] = c_names[0]
         return dtype
@@ -103,7 +103,7 @@ class DTypeRegistry:
         try:
             return self.dtype_to_name[dtype]
         except KeyError:
-            raise ValueError("unable to map dtype '%s'" % dtype)
+            raise ValueError("unable to map dtype '%s'" % dtype) from None
 # }}}
@@ -260,7 +260,7 @@ def parse_c_arg_backend(c_arg, scalar_arg_factory, vec_arg_factory,
     try:
         dtype = name_to_dtype(tp)
     except KeyError:
-        raise ValueError("unknown type '%s'" % tp)
+        raise ValueError("unknown type '%s'" % tp) from None
     return arg_class(dtype, name)

pyopencl/compyte/pyproject.toml ADDED Viewed

@@ -0,0 +1,54 @@
+[tool.ruff]
+preview = true
+[tool.ruff.lint]
+extend-select = [
+    "B",   # flake8-bugbear
+    "C",   # flake8-comprehensions
+    "E",   # pycodestyle
+    "F",   # pyflakes
+    "I",   # flake8-isort
+    "N",   # pep8-naming
+    "NPY", # numpy
+    "Q",   # flake8-quotes
+    "W",   # pycodestyle
+    # TODO
+    # "UP",  # pyupgrade
+    # "RUF", # ruff
+]
+extend-ignore = [
+    "C90",  # McCabe complexity
+    "E221", # multiple spaces before operator
+    "E241", # multiple spaces after comma
+    "E402", # module level import not at the top of file
+    "E226", # missing whitespace around operator
+    "N817", # CamelCase `SubstitutionRuleMappingContext` imported as acronym `SRMC`
+    # FIXME
+    "NPY002", # numpy rng
+    "C408", # unnecssary dict() -> literal
+    "E265", # block comment should start with
+    "F841", # local variable unused
+]
+[tool.ruff.lint.per-file-ignores]
+"ndarray/**/*.py" = ["Q", "B", "E", "F", "N", "C4"]
+[tool.ruff.lint.flake8-quotes]
+docstring-quotes = "double"
+inline-quotes = "double"
+multiline-quotes = "double"
+[tool.ruff.lint.isort]
+combine-as-imports = true
+known-first-party = [
+    "pytools",
+    "pymbolic",
+]
+known-local-folder = [
+    "modepy",
+]
+lines-after-imports = 2

pyopencl/elementwise.py CHANGED Viewed

@@ -31,12 +31,19 @@ import enum
 from typing import Any, List, Optional, Tuple, Union
 import numpy as np
 from pytools import memoize_method
 import pyopencl as cl
 from pyopencl.tools import (
-    DtypedArgument, KernelTemplateBase, ScalarArg, VectorArg,
-    context_dependent_memoize, dtype_to_c_struct, dtype_to_ctype)
+    DtypedArgument,
+    KernelTemplateBase,
+    ScalarArg,
+    VectorArg,
+    context_dependent_memoize,
+    dtype_to_c_struct,
+    dtype_to_ctype,
+)
 # {{{ elementwise kernel code generator

pyopencl/invoker.py CHANGED Viewed

@@ -26,6 +26,7 @@ from typing import Any, Tuple
 from warnings import warn
 import numpy as np
 from pytools.persistent_dict import WriteOncePersistentDict
 from pytools.py_codegen import Indentation, PythonCodeGenerator
@@ -258,16 +259,17 @@ def _generate_enqueue_and_set_args_module(function_name,
     # {{{ generate _enqueue
-    enqueue_name = "enqueue_knl_%s" % function_name
+    from pytools import to_identifier
+    enqueue_name = f"enqueue_knl_{to_identifier(function_name)}"
     gen("def %s(%s):"
             % (enqueue_name,
-                ", ".join(
-                    ["self", "queue", "global_size", "local_size"]
-                    + arg_names
-                    + ["global_offset=None",
-                        "g_times_l=False",
-                        "allow_empty_ndrange=False",
-                        "wait_for=None"])))
+                ", ".join([
+                    "self", "queue", "global_size", "local_size",
+                    *arg_names,
+                    "global_offset=None",
+                    "g_times_l=False",
+                    "allow_empty_ndrange=False",
+                    "wait_for=None"])))
     with Indentation(gen):
         subgen, wait_for_parts = gen_arg_setting(in_enqueue=True)
@@ -295,7 +297,7 @@ def _generate_enqueue_and_set_args_module(function_name,
     gen("")
     gen("def set_args(%s):"
-            % (", ".join(["self"] + arg_names)))
+            % (", ".join(["self", *arg_names])))
     with Indentation(gen):
         gen.extend(gen_arg_setting(in_enqueue=False))

pyopencl/ipython_ext.py CHANGED Viewed

@@ -33,7 +33,7 @@ class PyOpenCLMagics(Magics):
     def cl_kernel(self, line, cell):
         kernel = cell
-        opts, args = self.parse_options(line, "o:")
+        opts, _args = self.parse_options(line, "o:")
         build_options = opts.get("o", "")
         self._run_kernel(kernel, build_options)

pyopencl/reduction.py CHANGED Viewed

@@ -35,8 +35,12 @@ import numpy as np
 import pyopencl as cl
 from pyopencl.tools import (
-    DtypedArgument, KernelTemplateBase, _process_code_for_macro,
-    context_dependent_memoize, dtype_to_ctype)
+    DtypedArgument,
+    KernelTemplateBase,
+    _process_code_for_macro,
+    context_dependent_memoize,
+    dtype_to_ctype,
+)
 # {{{ kernel source
@@ -219,8 +223,11 @@ def get_reduction_kernel(
         map_expr = "pyopencl_reduction_inp[i]" if stage == 2 else "in[i]"
     from pyopencl.tools import (
-        VectorArg, get_arg_list_scalar_arg_dtypes, get_arg_offset_adjuster_code,
-        parse_arg_list)
+        VectorArg,
+        get_arg_list_scalar_arg_dtypes,
+        get_arg_offset_adjuster_code,
+        parse_arg_list,
+    )
     if arguments is None:
         raise ValueError("arguments must not be None")
@@ -229,9 +236,9 @@ def get_reduction_kernel(
     arg_prep = get_arg_offset_adjuster_code(arguments)
     if stage == 2 and arguments is not None:
-        arguments = (
-                [VectorArg(dtype_out, "pyopencl_reduction_inp")]
-                + arguments)
+        arguments = [
+                VectorArg(dtype_out, "pyopencl_reduction_inp"),
+                *arguments]
     source, group_size = _get_reduction_source(
             ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize,
@@ -512,8 +519,7 @@ class ReductionKernel:
                     use_queue,
                     (group_count*stage_inf.group_size,),
                     (stage_inf.group_size,),
-                    *([result.base_data, result.offset]
-                        + invocation_args + size_args),
+                    *([result.base_data, result.offset, *invocation_args, *size_args]),
                     wait_for=wait_for)
             wait_for = [last_evt]
@@ -526,7 +532,7 @@ class ReductionKernel:
                     return result
             else:
                 stage_inf = self.stage_2_inf
-                args = (result,) + stage1_args
+                args = (result, *stage1_args)
                 range_ = slice_ = None