PyPI - pyopencl - Versions diffs - 2024.2__cp311-cp311-macosx_10_14_x86_64.whl - Mend

pyopencl 2024.2__cp311-cp311-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pyopencl might be problematic. Click here for more details.

Files changed (122) hide show

pyopencl/__init__.py +2393 -0
pyopencl/_cl.cpython-311-darwin.so +0 -0
pyopencl/_cluda.py +54 -0
pyopencl/_mymako.py +14 -0
pyopencl/algorithm.py +1444 -0
pyopencl/array.py +3427 -0
pyopencl/bitonic_sort.py +238 -0
pyopencl/bitonic_sort_templates.py +594 -0
pyopencl/cache.py +534 -0
pyopencl/capture_call.py +176 -0
pyopencl/characterize/__init__.py +433 -0
pyopencl/characterize/performance.py +237 -0
pyopencl/cl/pyopencl-airy.cl +324 -0
pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
pyopencl/cl/pyopencl-bessel-y.cl +435 -0
pyopencl/cl/pyopencl-complex.h +303 -0
pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
pyopencl/cl/pyopencl-random123/array.h +325 -0
pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
pyopencl/cl/pyopencl-random123/philox.cl +486 -0
pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
pyopencl/clmath.py +280 -0
pyopencl/clrandom.py +408 -0
pyopencl/cltypes.py +137 -0
pyopencl/compyte/__init__.py +0 -0
pyopencl/compyte/array.py +214 -0
pyopencl/compyte/dtypes.py +290 -0
pyopencl/compyte/ndarray/__init__.py +0 -0
pyopencl/compyte/ndarray/gen_elemwise.py +1907 -0
pyopencl/compyte/ndarray/gen_reduction.py +1511 -0
pyopencl/compyte/ndarray/setup_opencl.py +101 -0
pyopencl/compyte/ndarray/test_gpu_elemwise.py +411 -0
pyopencl/compyte/ndarray/test_gpu_ndarray.py +487 -0
pyopencl/elementwise.py +1164 -0
pyopencl/invoker.py +418 -0
pyopencl/ipython_ext.py +68 -0
pyopencl/reduction.py +780 -0
pyopencl/scan.py +1898 -0
pyopencl/tools.py +1513 -0
pyopencl/version.py +3 -0
pyopencl-2024.2.data/data/CITATION.cff +74 -0
pyopencl-2024.2.data/data/LICENSE +282 -0
pyopencl-2024.2.data/data/Makefile.in +21 -0
pyopencl-2024.2.data/data/README.rst +70 -0
pyopencl-2024.2.data/data/README_SETUP.txt +34 -0
pyopencl-2024.2.data/data/aksetup_helper.py +1013 -0
pyopencl-2024.2.data/data/configure.py +6 -0
pyopencl-2024.2.data/data/contrib/cldis.py +91 -0
pyopencl-2024.2.data/data/contrib/fortran-to-opencl/README +29 -0
pyopencl-2024.2.data/data/contrib/fortran-to-opencl/translate.py +1441 -0
pyopencl-2024.2.data/data/contrib/pyopencl.vim +84 -0
pyopencl-2024.2.data/data/doc/Makefile +23 -0
pyopencl-2024.2.data/data/doc/algorithm.rst +214 -0
pyopencl-2024.2.data/data/doc/array.rst +305 -0
pyopencl-2024.2.data/data/doc/conf.py +26 -0
pyopencl-2024.2.data/data/doc/howto.rst +105 -0
pyopencl-2024.2.data/data/doc/index.rst +137 -0
pyopencl-2024.2.data/data/doc/make_constants.py +561 -0
pyopencl-2024.2.data/data/doc/misc.rst +885 -0
pyopencl-2024.2.data/data/doc/runtime.rst +51 -0
pyopencl-2024.2.data/data/doc/runtime_const.rst +30 -0
pyopencl-2024.2.data/data/doc/runtime_gl.rst +78 -0
pyopencl-2024.2.data/data/doc/runtime_memory.rst +527 -0
pyopencl-2024.2.data/data/doc/runtime_platform.rst +184 -0
pyopencl-2024.2.data/data/doc/runtime_program.rst +364 -0
pyopencl-2024.2.data/data/doc/runtime_queue.rst +182 -0
pyopencl-2024.2.data/data/doc/subst.rst +36 -0
pyopencl-2024.2.data/data/doc/tools.rst +4 -0
pyopencl-2024.2.data/data/doc/types.rst +42 -0
pyopencl-2024.2.data/data/examples/black-hole-accretion.py +2227 -0
pyopencl-2024.2.data/data/examples/demo-struct-reduce.py +75 -0
pyopencl-2024.2.data/data/examples/demo.py +39 -0
pyopencl-2024.2.data/data/examples/demo_array.py +32 -0
pyopencl-2024.2.data/data/examples/demo_array_svm.py +37 -0
pyopencl-2024.2.data/data/examples/demo_elementwise.py +34 -0
pyopencl-2024.2.data/data/examples/demo_elementwise_complex.py +53 -0
pyopencl-2024.2.data/data/examples/demo_mandelbrot.py +183 -0
pyopencl-2024.2.data/data/examples/demo_meta_codepy.py +56 -0
pyopencl-2024.2.data/data/examples/demo_meta_template.py +55 -0
pyopencl-2024.2.data/data/examples/dump-performance.py +38 -0
pyopencl-2024.2.data/data/examples/dump-properties.py +86 -0
pyopencl-2024.2.data/data/examples/gl_interop_demo.py +84 -0
pyopencl-2024.2.data/data/examples/gl_particle_animation.py +218 -0
pyopencl-2024.2.data/data/examples/ipython-demo.ipynb +203 -0
pyopencl-2024.2.data/data/examples/median-filter.py +99 -0
pyopencl-2024.2.data/data/examples/n-body.py +1070 -0
pyopencl-2024.2.data/data/examples/narray.py +37 -0
pyopencl-2024.2.data/data/examples/noisyImage.jpg +0 -0
pyopencl-2024.2.data/data/examples/pi-monte-carlo.py +1166 -0
pyopencl-2024.2.data/data/examples/svm.py +82 -0
pyopencl-2024.2.data/data/examples/transpose.py +229 -0
pyopencl-2024.2.data/data/pytest.ini +3 -0
pyopencl-2024.2.data/data/src/bitlog.cpp +51 -0
pyopencl-2024.2.data/data/src/bitlog.hpp +83 -0
pyopencl-2024.2.data/data/src/clinfo_ext.h +134 -0
pyopencl-2024.2.data/data/src/mempool.hpp +444 -0
pyopencl-2024.2.data/data/src/pyopencl_ext.h +77 -0
pyopencl-2024.2.data/data/src/tools.hpp +90 -0
pyopencl-2024.2.data/data/src/wrap_cl.cpp +61 -0
pyopencl-2024.2.data/data/src/wrap_cl.hpp +5853 -0
pyopencl-2024.2.data/data/src/wrap_cl_part_1.cpp +369 -0
pyopencl-2024.2.data/data/src/wrap_cl_part_2.cpp +702 -0
pyopencl-2024.2.data/data/src/wrap_constants.cpp +1274 -0
pyopencl-2024.2.data/data/src/wrap_helpers.hpp +213 -0
pyopencl-2024.2.data/data/src/wrap_mempool.cpp +731 -0
pyopencl-2024.2.data/data/test/add-vectors-32.spv +0 -0
pyopencl-2024.2.data/data/test/add-vectors-64.spv +0 -0
pyopencl-2024.2.data/data/test/empty-header.h +1 -0
pyopencl-2024.2.data/data/test/test_algorithm.py +1180 -0
pyopencl-2024.2.data/data/test/test_array.py +2392 -0
pyopencl-2024.2.data/data/test/test_arrays_in_structs.py +100 -0
pyopencl-2024.2.data/data/test/test_clmath.py +529 -0
pyopencl-2024.2.data/data/test/test_clrandom.py +75 -0
pyopencl-2024.2.data/data/test/test_enqueue_copy.py +271 -0
pyopencl-2024.2.data/data/test/test_wrapper.py +1554 -0
pyopencl-2024.2.dist-info/LICENSE +282 -0
pyopencl-2024.2.dist-info/METADATA +105 -0
pyopencl-2024.2.dist-info/RECORD +122 -0
pyopencl-2024.2.dist-info/WHEEL +5 -0
pyopencl-2024.2.dist-info/top_level.txt +1 -0

pyopencl/clmath.py ADDED Viewed

@@ -0,0 +1,280 @@
+# pylint:disable=unexpected-keyword-arg  # for @elwise_kernel_runner
+__copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+import numpy as np
+import pyopencl.array as cl_array
+import pyopencl.elementwise as elementwise
+from pyopencl.array import _get_common_dtype
+def _make_unary_array_func(name):
+    @cl_array.elwise_kernel_runner
+    def knl_runner(result, arg):
+        if arg.dtype.kind == "c":
+            from pyopencl.elementwise import complex_dtype_to_name
+            fname = "{}_{}".format(complex_dtype_to_name(arg.dtype), name)
+        else:
+            fname = name
+        return elementwise.get_unary_func_kernel(
+                result.context, fname, arg.dtype)
+    def f(array, queue=None):
+        result = array._new_like_me(queue=queue)
+        event1 = knl_runner(result, array, queue=queue)
+        result.add_event(event1)
+        return result
+    return f
+# See table 6.8 in the CL 1.1 spec
+acos = _make_unary_array_func("acos")
+acosh = _make_unary_array_func("acosh")
+acospi = _make_unary_array_func("acospi")
+asin = _make_unary_array_func("asin")
+asinh = _make_unary_array_func("asinh")
+asinpi = _make_unary_array_func("asinpi")
+@cl_array.elwise_kernel_runner
+def _atan2(result, arg1, arg2):
+    return elementwise.get_float_binary_func_kernel(
+        result.context, "atan2", arg1.dtype, arg2.dtype, result.dtype)
+@cl_array.elwise_kernel_runner
+def _atan2pi(result, arg1, arg2):
+    return elementwise.get_float_binary_func_kernel(
+        result.context, "atan2pi", arg1.dtype, arg2.dtype, result.dtype)
+atan = _make_unary_array_func("atan")
+def atan2(y, x, queue=None):
+    """
+    .. versionadded:: 2013.1
+    """
+    queue = queue or y.queue
+    result = y._new_like_me(_get_common_dtype(y, x, queue))
+    result.add_event(_atan2(result, y, x, queue=queue))
+    return result
+atanh = _make_unary_array_func("atanh")
+atanpi = _make_unary_array_func("atanpi")
+def atan2pi(y, x, queue=None):
+    """
+    .. versionadded:: 2013.1
+    """
+    queue = queue or y.queue
+    result = y._new_like_me(_get_common_dtype(y, x, queue))
+    result.add_event(_atan2pi(result, y, x, queue=queue))
+    return result
+cbrt = _make_unary_array_func("cbrt")
+ceil = _make_unary_array_func("ceil")
+# TODO: copysign
+cos = _make_unary_array_func("cos")
+cosh = _make_unary_array_func("cosh")
+cospi = _make_unary_array_func("cospi")
+erfc = _make_unary_array_func("erfc")
+erf = _make_unary_array_func("erf")
+exp = _make_unary_array_func("exp")
+exp2 = _make_unary_array_func("exp2")
+exp10 = _make_unary_array_func("exp10")
+expm1 = _make_unary_array_func("expm1")
+fabs = _make_unary_array_func("fabs")
+# TODO: fdim
+floor = _make_unary_array_func("floor")
+# TODO: fma
+# TODO: fmax
+# TODO: fmin
+@cl_array.elwise_kernel_runner
+def _fmod(result, arg, mod):
+    return elementwise.get_fmod_kernel(result.context, result.dtype,
+                                       arg.dtype, mod.dtype)
+def fmod(arg, mod, queue=None):
+    """Return the floating point remainder of the division ``arg / mod``,
+    for each element in ``arg`` and ``mod``."""
+    queue = (queue or arg.queue) or mod.queue
+    result = arg._new_like_me(_get_common_dtype(arg, mod, queue))
+    result.add_event(_fmod(result, arg, mod, queue=queue))
+    return result
+# TODO: fract
+@cl_array.elwise_kernel_runner
+def _frexp(sig, expt, arg):
+    return elementwise.get_frexp_kernel(sig.context, sig.dtype,
+                                        expt.dtype, arg.dtype)
+def frexp(arg, queue=None):
+    """Return a tuple ``(significands, exponents)`` such that
+    ``arg == significand * 2**exponent``.
+    """
+    sig = arg._new_like_me(queue=queue)
+    expt = arg._new_like_me(queue=queue, dtype=np.int32)
+    event1 = _frexp(sig, expt, arg, queue=queue)
+    sig.add_event(event1)
+    expt.add_event(event1)
+    return sig, expt
+# TODO: hypot
+ilogb = _make_unary_array_func("ilogb")
+@cl_array.elwise_kernel_runner
+def _ldexp(result, sig, exp):
+    return elementwise.get_ldexp_kernel(result.context, result.dtype,
+                                        sig.dtype, exp.dtype)
+def ldexp(significand, exponent, queue=None):
+    """Return a new array of floating point values composed from the
+    entries of ``significand`` and ``exponent``, paired together as
+    ``result = significand * 2**exponent``.
+    """
+    result = significand._new_like_me(queue=queue)
+    result.add_event(_ldexp(result, significand, exponent))
+    return result
+lgamma = _make_unary_array_func("lgamma")
+# TODO: lgamma_r
+log = _make_unary_array_func("log")
+log2 = _make_unary_array_func("log2")
+log10 = _make_unary_array_func("log10")
+log1p = _make_unary_array_func("log1p")
+logb = _make_unary_array_func("logb")
+# TODO: mad
+# TODO: maxmag
+# TODO: minmag
+@cl_array.elwise_kernel_runner
+def _modf(intpart, fracpart, arg):
+    return elementwise.get_modf_kernel(intpart.context, intpart.dtype,
+                                       fracpart.dtype, arg.dtype)
+def modf(arg, queue=None):
+    """Return a tuple ``(fracpart, intpart)`` of arrays containing the
+    integer and fractional parts of ``arg``.
+    """
+    intpart = arg._new_like_me(queue=queue)
+    fracpart = arg._new_like_me(queue=queue)
+    event1 = _modf(intpart, fracpart, arg, queue=queue)
+    fracpart.add_event(event1)
+    intpart.add_event(event1)
+    return fracpart, intpart
+nan = _make_unary_array_func("nan")
+# TODO: nextafter
+# TODO: remainder
+# TODO: remquo
+rint = _make_unary_array_func("rint")
+# TODO: rootn
+round = _make_unary_array_func("round")
+sin = _make_unary_array_func("sin")
+# TODO: sincos
+sinh = _make_unary_array_func("sinh")
+sinpi = _make_unary_array_func("sinpi")
+sqrt = _make_unary_array_func("sqrt")
+tan = _make_unary_array_func("tan")
+tanh = _make_unary_array_func("tanh")
+tanpi = _make_unary_array_func("tanpi")
+tgamma = _make_unary_array_func("tgamma")
+trunc = _make_unary_array_func("trunc")
+# no point wrapping half_ or native_
+# TODO: table 6.10, integer functions
+# TODO: table 6.12, clamp et al
+@cl_array.elwise_kernel_runner
+def _bessel_jn(result, n, x):
+    return elementwise.get_bessel_kernel(result.context, "j", result.dtype,
+                                         np.dtype(type(n)), x.dtype)
+@cl_array.elwise_kernel_runner
+def _bessel_yn(result, n, x):
+    return elementwise.get_bessel_kernel(result.context, "y", result.dtype,
+                                         np.dtype(type(n)), x.dtype)
+@cl_array.elwise_kernel_runner
+def _hankel_01(h0, h1, x):
+    if h0.dtype != h1.dtype:
+        raise TypeError("types of h0 and h1 must match")
+    return elementwise.get_hankel_01_kernel(
+            h0.context, h0.dtype, x.dtype)
+def bessel_jn(n, x, queue=None):
+    result = x._new_like_me(queue=queue)
+    result.add_event(_bessel_jn(result, n, x, queue=queue))
+    return result
+def bessel_yn(n, x, queue=None):
+    result = x._new_like_me(queue=queue)
+    result.add_event(_bessel_yn(result, n, x, queue=queue))
+    return result
+def hankel_01(x, queue=None):
+    h0 = x._new_like_me(queue=queue)
+    h1 = x._new_like_me(queue=queue)
+    event1 = _hankel_01(h0, h1, x, queue=queue)
+    h0.add_event(event1)
+    h1.add_event(event1)
+    return h0, h1

pyopencl/clrandom.py ADDED Viewed

@@ -0,0 +1,408 @@
+__copyright__ = "Copyright (C) 2009-16 Andreas Kloeckner"
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+# {{{ documentation
+__doc__ = """
+PyOpenCL includes and uses some of the `Random123 random number generators
+<https://www.deshawresearch.com/resources.html>`__ by D.E. Shaw
+Research. In addition to being usable through the convenience functions above,
+they are available in any piece of code compiled through PyOpenCL by::
+    #include <pyopencl-random123/philox.cl>
+    #include <pyopencl-random123/threefry.cl>
+See the `Philox source
+<https://github.com/inducer/pyopencl/blob/main/pyopencl/cl/pyopencl-random123/philox.cl>`__
+and the `Threefry source
+<https://github.com/inducer/pyopencl/blob/main/pyopencl/cl/pyopencl-random123/threefry.cl>`__
+for some documentation if you're planning on using Random123 directly.
+.. autoclass:: PhiloxGenerator
+.. autoclass:: ThreefryGenerator
+.. autofunction:: rand
+.. autofunction:: fill_rand
+"""
+# }}}
+import numpy as np
+from pytools import memoize_method
+import pyopencl as cl
+import pyopencl.array as cl_array
+import pyopencl.cltypes as cltypes
+from pyopencl.tools import first_arg_dependent_memoize
+# {{{ Random123 generators
+class Random123GeneratorBase:
+    """
+    .. versionadded:: 2016.2
+    .. automethod:: fill_uniform
+    .. automethod:: uniform
+    .. automethod:: fill_normal
+    .. automethod:: normal
+    """
+    @property
+    def header_name(self):
+        raise NotImplementedError
+    @property
+    def generator_name(self):
+        raise NotImplementedError
+    @property
+    def key_length(self):
+        raise NotImplementedError
+    def __init__(self, context, key=None, counter=None, seed=None):
+        int32_info = np.iinfo(np.int32)
+        from random import Random
+        rng = Random(seed)
+        if key is not None and counter is not None and seed is not None:
+            raise TypeError("seed is unused and may not be specified "
+                    "if both counter and key are given")
+        if key is None:
+            key = [
+                    rng.randrange(
+                        int(int32_info.min), int(int32_info.max)+1)
+                    for i in range(self.key_length-1)]
+        if counter is None:
+            counter = [
+                    rng.randrange(
+                        int(int32_info.min), int(int32_info.max)+1)
+                    for i in range(4)]
+        self.context = context
+        self.key = key
+        self.counter = counter
+        self.counter_max = int32_info.max
+    @memoize_method
+    def get_gen_kernel(self, dtype, distribution):
+        size_multiplier = 1
+        arg_dtype = dtype
+        rng_key = (distribution, dtype)
+        if rng_key in [("uniform", np.float64), ("normal", np.float64)]:
+            c_type = "double"
+            scale1_const = "((double) %r)" % (1/2**32)
+            scale2_const = "((double) %r)" % (1/2**64)
+            if distribution == "normal":
+                transform = "box_muller"
+            else:
+                transform = ""
+            rng_expr = (
+                    "shift + scale * "
+                    "%s( %s * convert_double4(gen)"
+                    "+ %s * convert_double4(gen))"
+                    % (transform, scale1_const, scale2_const))
+            counter_multiplier = 2
+        elif rng_key in [(dist, cmp_dtype)
+                for dist in ["normal", "uniform"]
+                for cmp_dtype in [
+                    np.float32,
+                    cltypes.float2,
+                    cltypes.float3,
+                    cltypes.float4,
+                    ]]:
+            c_type = "float"
+            scale_const = "((float) %r)" % (1/2**32)
+            if distribution == "normal":
+                transform = "box_muller"
+            else:
+                transform = ""
+            rng_expr = (
+                    "shift + scale * %s(%s * convert_float4(gen))"
+                    % (transform, scale_const))
+            counter_multiplier = 1
+            arg_dtype = np.float32
+            try:
+                _, size_multiplier = cltypes.vec_type_to_scalar_and_count[dtype]
+            except KeyError:
+                pass
+        elif rng_key == ("uniform", np.int32):
+            c_type = "int"
+            rng_expr = (
+                    "shift + convert_int4((convert_long4(gen) * scale) / %s)"
+                    % (str(2**32)+"l")
+                    )
+            counter_multiplier = 1
+        elif rng_key == ("uniform", np.int64):
+            c_type = "long"
+            rng_expr = (
+                    "shift"
+                    "+ convert_long4(gen) * (scale/two32) "
+                    "+ ((convert_long4(gen) * scale) / two32)"
+                    .replace("two32", (str(2**32)+"l")))
+            counter_multiplier = 2
+        else:
+            raise TypeError(
+                    "unsupported RNG distribution/data type combination '%s/%s'"
+                    % rng_key)
+        kernel_name = f"rng_gen_{self.generator_name}_{distribution}"
+        src = """//CL//
+            #include <{header_name}>
+            #ifndef M_PI
+            #ifdef M_PI_F
+            #define M_PI M_PI_F
+            #else
+            #define M_PI 3.14159265359f
+            #endif
+            #endif
+            typedef {output_t} output_t;
+            typedef {output_t}4 output_vec_t;
+            typedef {gen_name}_ctr_t ctr_t;
+            typedef {gen_name}_key_t key_t;
+            uint4 gen_bits(key_t *key, ctr_t *ctr)
+            {{
+                union {{
+                    ctr_t ctr_el;
+                    uint4 vec_el;
+                }} u;
+                u.ctr_el = {gen_name}(*ctr, *key);
+                if (++ctr->v[0] == 0)
+                    if (++ctr->v[1] == 0)
+                        ++ctr->v[2];
+                return u.vec_el;
+            }}
+            #if {include_box_muller}
+            output_vec_t box_muller(output_vec_t x)
+            {{
+                #define BOX_MULLER(I, COMPA, COMPB) \
+                    output_t r##I = sqrt(-2*log(x.COMPA)); \
+                    output_t c##I; \
+                    output_t s##I = sincos((output_t) (2*M_PI) * x.COMPB, &c##I);
+                BOX_MULLER(0, x, y);
+                BOX_MULLER(1, z, w);
+                return (output_vec_t) (r0*c0, r0*s0, r1*c1, r1*s1);
+            }}
+            #endif
+            #define GET_RANDOM_NUM(gen) {rng_expr}
+            kernel void {kernel_name}(
+                int k1,
+                #if {key_length} > 2
+                int k2, int k3,
+                #endif
+                int c0, int c1, int c2, int c3,
+                global output_t *output,
+                long out_size,
+                output_t scale,
+                output_t shift)
+            {{
+                #if {key_length} == 2
+                key_t k = {{{{get_global_id(0), k1}}}};
+                #else
+                key_t k = {{{{get_global_id(0), k1, k2, k3}}}};
+                #endif
+                ctr_t c = {{{{c0, c1, c2, c3}}}};
+                // output bulk
+                unsigned long idx = get_global_id(0)*4;
+                while (idx + 4 < out_size)
+                {{
+                    output_vec_t ran = GET_RANDOM_NUM(gen_bits(&k, &c));
+                    vstore4(ran, 0, &output[idx]);
+                    idx += 4*get_global_size(0);
+                }}
+                // output tail
+                output_vec_t tail_ran = GET_RANDOM_NUM(gen_bits(&k, &c));
+                if (idx < out_size)
+                  output[idx] = tail_ran.x;
+                if (idx+1 < out_size)
+                  output[idx+1] = tail_ran.y;
+                if (idx+2 < out_size)
+                  output[idx+2] = tail_ran.z;
+                if (idx+3 < out_size)
+                  output[idx+3] = tail_ran.w;
+            }}
+            """.format(
+                kernel_name=kernel_name,
+                gen_name=self.generator_name,
+                header_name=self.header_name,
+                output_t=c_type,
+                key_length=self.key_length,
+                include_box_muller=int(distribution == "normal"),
+                rng_expr=rng_expr
+                )
+        prg = cl.Program(self.context, src).build()
+        knl = getattr(prg, kernel_name)
+        knl.set_scalar_arg_dtypes(
+                [np.int32] * (self.key_length - 1 + 4)
+                + [None, np.int64, arg_dtype, arg_dtype])
+        return knl, counter_multiplier, size_multiplier
+    def _fill(self, distribution, ary, scale, shift, queue=None):
+        """Fill *ary* with uniformly distributed random numbers in the interval
+        *(a, b)*, endpoints excluded.
+        :return: a :class:`pyopencl.Event`
+        """
+        if queue is None:
+            queue = ary.queue
+        knl, counter_multiplier, size_multiplier = \
+                self.get_gen_kernel(ary.dtype, distribution)
+        args = self.key + self.counter + [
+                ary.data, ary.size*size_multiplier,
+                scale, shift]
+        n = ary.size
+        from pyopencl.array import _splay
+        gsize, lsize = _splay(queue.device, ary.size)
+        evt = knl(queue, gsize, lsize, *args)
+        ary.add_event(evt)
+        self.counter[0] += n * counter_multiplier
+        c1_incr, self.counter[0] = divmod(self.counter[0], self.counter_max)
+        if c1_incr:
+            self.counter[1] += c1_incr
+            c2_incr, self.counter[1] = divmod(self.counter[1], self.counter_max)
+            self.counter[2] += c2_incr
+        return evt
+    def fill_uniform(self, ary, a=0, b=1, queue=None):
+        return self._fill("uniform", ary,
+                scale=(b-a), shift=a, queue=queue)
+    def uniform(self, *args, **kwargs):
+        """Make a new empty array, apply :meth:`fill_uniform` to it.
+        """
+        a = kwargs.pop("a", 0)
+        b = kwargs.pop("b", 1)
+        result = cl_array.empty(*args, **kwargs)
+        self.fill_uniform(result, queue=result.queue, a=a, b=b)
+        return result
+    def fill_normal(self, ary, mu=0, sigma=1, queue=None):
+        """Fill *ary* with normally distributed numbers with mean *mu* and
+        standard deviation *sigma*.
+        """
+        return self._fill("normal", ary, scale=sigma, shift=mu, queue=queue)
+    def normal(self, *args, **kwargs):
+        """Make a new empty array, apply :meth:`fill_normal` to it.
+        """
+        mu = kwargs.pop("mu", 0)
+        sigma = kwargs.pop("sigma", 1)
+        result = cl_array.empty(*args, **kwargs)
+        self.fill_normal(result, queue=result.queue, mu=mu, sigma=sigma)
+        return result
+class PhiloxGenerator(Random123GeneratorBase):
+    __doc__ = Random123GeneratorBase.__doc__
+    header_name = "pyopencl-random123/philox.cl"
+    generator_name = "philox4x32"
+    key_length = 2
+class ThreefryGenerator(Random123GeneratorBase):
+    __doc__ = Random123GeneratorBase.__doc__
+    header_name = "pyopencl-random123/threefry.cl"
+    generator_name = "threefry4x32"
+    key_length = 4
+# }}}
+@first_arg_dependent_memoize
+def _get_generator(context):
+    if context.devices[0].type & cl.device_type.CPU:
+        gen = PhiloxGenerator(context)
+    else:
+        gen = ThreefryGenerator(context)
+    return gen
+def fill_rand(result, queue=None, a=0, b=1):
+    """Fill *result* with random values in the range :math:`[0, 1)`.
+    """
+    if queue is None:
+        queue = result.queue
+    gen = _get_generator(queue.context)
+    gen.fill_uniform(result, a=a, b=b)
+def rand(queue, shape, dtype, luxury=None, a=0, b=1):
+    """Return an array of *shape* filled with random values of *dtype*
+    in the range :math:`[a, b)`.
+    """
+    if luxury is not None:
+        from warnings import warn
+        warn("Specifying the 'luxury' argument is deprecated and will stop being "
+                "supported in PyOpenCL 2018.x", stacklevel=2)
+    from pyopencl.array import Array
+    gen = _get_generator(queue.context)
+    result = Array(queue, shape, dtype)
+    gen.fill_uniform(result, a=a, b=b)
+    return result
+# vim: filetype=pyopencl:foldmethod=marker