pyopencl 2024.2__cp311-cp311-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (122) hide show
  1. pyopencl/__init__.py +2393 -0
  2. pyopencl/_cl.cpython-311-darwin.so +0 -0
  3. pyopencl/_cluda.py +54 -0
  4. pyopencl/_mymako.py +14 -0
  5. pyopencl/algorithm.py +1444 -0
  6. pyopencl/array.py +3427 -0
  7. pyopencl/bitonic_sort.py +238 -0
  8. pyopencl/bitonic_sort_templates.py +594 -0
  9. pyopencl/cache.py +534 -0
  10. pyopencl/capture_call.py +176 -0
  11. pyopencl/characterize/__init__.py +433 -0
  12. pyopencl/characterize/performance.py +237 -0
  13. pyopencl/cl/pyopencl-airy.cl +324 -0
  14. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  15. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  16. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  17. pyopencl/cl/pyopencl-complex.h +303 -0
  18. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  19. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  20. pyopencl/cl/pyopencl-random123/array.h +325 -0
  21. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  22. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  23. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  24. pyopencl/clmath.py +280 -0
  25. pyopencl/clrandom.py +408 -0
  26. pyopencl/cltypes.py +137 -0
  27. pyopencl/compyte/__init__.py +0 -0
  28. pyopencl/compyte/array.py +214 -0
  29. pyopencl/compyte/dtypes.py +290 -0
  30. pyopencl/compyte/ndarray/__init__.py +0 -0
  31. pyopencl/compyte/ndarray/gen_elemwise.py +1907 -0
  32. pyopencl/compyte/ndarray/gen_reduction.py +1511 -0
  33. pyopencl/compyte/ndarray/setup_opencl.py +101 -0
  34. pyopencl/compyte/ndarray/test_gpu_elemwise.py +411 -0
  35. pyopencl/compyte/ndarray/test_gpu_ndarray.py +487 -0
  36. pyopencl/elementwise.py +1164 -0
  37. pyopencl/invoker.py +418 -0
  38. pyopencl/ipython_ext.py +68 -0
  39. pyopencl/reduction.py +780 -0
  40. pyopencl/scan.py +1898 -0
  41. pyopencl/tools.py +1513 -0
  42. pyopencl/version.py +3 -0
  43. pyopencl-2024.2.data/data/CITATION.cff +74 -0
  44. pyopencl-2024.2.data/data/LICENSE +282 -0
  45. pyopencl-2024.2.data/data/Makefile.in +21 -0
  46. pyopencl-2024.2.data/data/README.rst +70 -0
  47. pyopencl-2024.2.data/data/README_SETUP.txt +34 -0
  48. pyopencl-2024.2.data/data/aksetup_helper.py +1013 -0
  49. pyopencl-2024.2.data/data/configure.py +6 -0
  50. pyopencl-2024.2.data/data/contrib/cldis.py +91 -0
  51. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/README +29 -0
  52. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/translate.py +1441 -0
  53. pyopencl-2024.2.data/data/contrib/pyopencl.vim +84 -0
  54. pyopencl-2024.2.data/data/doc/Makefile +23 -0
  55. pyopencl-2024.2.data/data/doc/algorithm.rst +214 -0
  56. pyopencl-2024.2.data/data/doc/array.rst +305 -0
  57. pyopencl-2024.2.data/data/doc/conf.py +26 -0
  58. pyopencl-2024.2.data/data/doc/howto.rst +105 -0
  59. pyopencl-2024.2.data/data/doc/index.rst +137 -0
  60. pyopencl-2024.2.data/data/doc/make_constants.py +561 -0
  61. pyopencl-2024.2.data/data/doc/misc.rst +885 -0
  62. pyopencl-2024.2.data/data/doc/runtime.rst +51 -0
  63. pyopencl-2024.2.data/data/doc/runtime_const.rst +30 -0
  64. pyopencl-2024.2.data/data/doc/runtime_gl.rst +78 -0
  65. pyopencl-2024.2.data/data/doc/runtime_memory.rst +527 -0
  66. pyopencl-2024.2.data/data/doc/runtime_platform.rst +184 -0
  67. pyopencl-2024.2.data/data/doc/runtime_program.rst +364 -0
  68. pyopencl-2024.2.data/data/doc/runtime_queue.rst +182 -0
  69. pyopencl-2024.2.data/data/doc/subst.rst +36 -0
  70. pyopencl-2024.2.data/data/doc/tools.rst +4 -0
  71. pyopencl-2024.2.data/data/doc/types.rst +42 -0
  72. pyopencl-2024.2.data/data/examples/black-hole-accretion.py +2227 -0
  73. pyopencl-2024.2.data/data/examples/demo-struct-reduce.py +75 -0
  74. pyopencl-2024.2.data/data/examples/demo.py +39 -0
  75. pyopencl-2024.2.data/data/examples/demo_array.py +32 -0
  76. pyopencl-2024.2.data/data/examples/demo_array_svm.py +37 -0
  77. pyopencl-2024.2.data/data/examples/demo_elementwise.py +34 -0
  78. pyopencl-2024.2.data/data/examples/demo_elementwise_complex.py +53 -0
  79. pyopencl-2024.2.data/data/examples/demo_mandelbrot.py +183 -0
  80. pyopencl-2024.2.data/data/examples/demo_meta_codepy.py +56 -0
  81. pyopencl-2024.2.data/data/examples/demo_meta_template.py +55 -0
  82. pyopencl-2024.2.data/data/examples/dump-performance.py +38 -0
  83. pyopencl-2024.2.data/data/examples/dump-properties.py +86 -0
  84. pyopencl-2024.2.data/data/examples/gl_interop_demo.py +84 -0
  85. pyopencl-2024.2.data/data/examples/gl_particle_animation.py +218 -0
  86. pyopencl-2024.2.data/data/examples/ipython-demo.ipynb +203 -0
  87. pyopencl-2024.2.data/data/examples/median-filter.py +99 -0
  88. pyopencl-2024.2.data/data/examples/n-body.py +1070 -0
  89. pyopencl-2024.2.data/data/examples/narray.py +37 -0
  90. pyopencl-2024.2.data/data/examples/noisyImage.jpg +0 -0
  91. pyopencl-2024.2.data/data/examples/pi-monte-carlo.py +1166 -0
  92. pyopencl-2024.2.data/data/examples/svm.py +82 -0
  93. pyopencl-2024.2.data/data/examples/transpose.py +229 -0
  94. pyopencl-2024.2.data/data/pytest.ini +3 -0
  95. pyopencl-2024.2.data/data/src/bitlog.cpp +51 -0
  96. pyopencl-2024.2.data/data/src/bitlog.hpp +83 -0
  97. pyopencl-2024.2.data/data/src/clinfo_ext.h +134 -0
  98. pyopencl-2024.2.data/data/src/mempool.hpp +444 -0
  99. pyopencl-2024.2.data/data/src/pyopencl_ext.h +77 -0
  100. pyopencl-2024.2.data/data/src/tools.hpp +90 -0
  101. pyopencl-2024.2.data/data/src/wrap_cl.cpp +61 -0
  102. pyopencl-2024.2.data/data/src/wrap_cl.hpp +5853 -0
  103. pyopencl-2024.2.data/data/src/wrap_cl_part_1.cpp +369 -0
  104. pyopencl-2024.2.data/data/src/wrap_cl_part_2.cpp +702 -0
  105. pyopencl-2024.2.data/data/src/wrap_constants.cpp +1274 -0
  106. pyopencl-2024.2.data/data/src/wrap_helpers.hpp +213 -0
  107. pyopencl-2024.2.data/data/src/wrap_mempool.cpp +731 -0
  108. pyopencl-2024.2.data/data/test/add-vectors-32.spv +0 -0
  109. pyopencl-2024.2.data/data/test/add-vectors-64.spv +0 -0
  110. pyopencl-2024.2.data/data/test/empty-header.h +1 -0
  111. pyopencl-2024.2.data/data/test/test_algorithm.py +1180 -0
  112. pyopencl-2024.2.data/data/test/test_array.py +2392 -0
  113. pyopencl-2024.2.data/data/test/test_arrays_in_structs.py +100 -0
  114. pyopencl-2024.2.data/data/test/test_clmath.py +529 -0
  115. pyopencl-2024.2.data/data/test/test_clrandom.py +75 -0
  116. pyopencl-2024.2.data/data/test/test_enqueue_copy.py +271 -0
  117. pyopencl-2024.2.data/data/test/test_wrapper.py +1554 -0
  118. pyopencl-2024.2.dist-info/LICENSE +282 -0
  119. pyopencl-2024.2.dist-info/METADATA +105 -0
  120. pyopencl-2024.2.dist-info/RECORD +122 -0
  121. pyopencl-2024.2.dist-info/WHEEL +5 -0
  122. pyopencl-2024.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1164 @@
1
+ """Elementwise functionality."""
2
+
3
+
4
+ __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
5
+
6
+ __license__ = """
7
+ Permission is hereby granted, free of charge, to any person
8
+ obtaining a copy of this software and associated documentation
9
+ files (the "Software"), to deal in the Software without
10
+ restriction, including without limitation the rights to use,
11
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ copies of the Software, and to permit persons to whom the
13
+ Software is furnished to do so, subject to the following
14
+ conditions:
15
+
16
+ The above copyright notice and this permission notice shall be
17
+ included in all copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26
+ OTHER DEALINGS IN THE SOFTWARE.
27
+ """
28
+
29
+
30
+ import enum
31
+ from typing import Any, List, Optional, Tuple, Union
32
+
33
+ import numpy as np
34
+ from pytools import memoize_method
35
+
36
+ import pyopencl as cl
37
+ from pyopencl.tools import (
38
+ DtypedArgument, KernelTemplateBase, ScalarArg, VectorArg,
39
+ context_dependent_memoize, dtype_to_c_struct, dtype_to_ctype)
40
+
41
+
42
+ # {{{ elementwise kernel code generator
43
+
44
+ def get_elwise_program(
45
+ context: cl.Context,
46
+ arguments: List[DtypedArgument],
47
+ operation: str, *,
48
+ name: str = "elwise_kernel",
49
+ options: Any = None,
50
+ preamble: str = "",
51
+ loop_prep: str = "",
52
+ after_loop: str = "",
53
+ use_range: bool = False) -> cl.Program:
54
+
55
+ if use_range:
56
+ body = r"""//CL//
57
+ if (step < 0)
58
+ {
59
+ for (i = start + (work_group_start + lid)*step;
60
+ i > stop; i += gsize*step)
61
+ {
62
+ %(operation)s;
63
+ }
64
+ }
65
+ else
66
+ {
67
+ for (i = start + (work_group_start + lid)*step;
68
+ i < stop; i += gsize*step)
69
+ {
70
+ %(operation)s;
71
+ }
72
+ }
73
+ """
74
+ else:
75
+ body = """//CL//
76
+ for (i = work_group_start + lid; i < n; i += gsize)
77
+ {
78
+ %(operation)s;
79
+ }
80
+ """
81
+
82
+ import re
83
+ return_match = re.search(r"\breturn\b", operation)
84
+ if return_match is not None:
85
+ from warnings import warn
86
+ warn("Using a 'return' statement in an element-wise operation will "
87
+ "likely lead to incorrect results. Use "
88
+ "PYOPENCL_ELWISE_CONTINUE instead.",
89
+ stacklevel=3)
90
+
91
+ source = (f"""//CL//
92
+ {preamble}
93
+
94
+ #define PYOPENCL_ELWISE_CONTINUE continue
95
+
96
+ __kernel void {name}({", ".join(arg.declarator() for arg in arguments)})
97
+ {{
98
+ int lid = get_local_id(0);
99
+ int gsize = get_global_size(0);
100
+ int work_group_start = get_local_size(0)*get_group_id(0);
101
+ long i;
102
+
103
+ {loop_prep};
104
+ {body % {"operation": operation}}
105
+ {after_loop};
106
+ }}
107
+ """)
108
+
109
+ return cl.Program(context, source).build(options)
110
+
111
+
112
+ def get_elwise_kernel_and_types(
113
+ context: cl.Context,
114
+ arguments: Union[str, List[DtypedArgument]],
115
+ operation: str, *,
116
+ name: str = "elwise_kernel",
117
+ options: Any = None,
118
+ preamble: str = "",
119
+ use_range: bool = False,
120
+ **kwargs: Any) -> Tuple[cl.Kernel, List[DtypedArgument]]:
121
+
122
+ from pyopencl.tools import get_arg_offset_adjuster_code, parse_arg_list
123
+ parsed_args = parse_arg_list(arguments, with_offset=True)
124
+
125
+ auto_preamble = kwargs.pop("auto_preamble", True)
126
+
127
+ pragmas = []
128
+ includes = []
129
+ have_double_pragma = False
130
+ have_complex_include = False
131
+
132
+ if auto_preamble:
133
+ for arg in parsed_args:
134
+ if arg.dtype in [np.float64, np.complex128]:
135
+ if not have_double_pragma:
136
+ pragmas.append("""
137
+ #if __OPENCL_C_VERSION__ < 120
138
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
139
+ #endif
140
+ #define PYOPENCL_DEFINE_CDOUBLE
141
+ """)
142
+ have_double_pragma = True
143
+ if arg.dtype.kind == "c":
144
+ if not have_complex_include:
145
+ includes.append("#include <pyopencl-complex.h>\n")
146
+ have_complex_include = True
147
+
148
+ if pragmas or includes:
149
+ preamble = "\n".join(pragmas+includes) + "\n" + preamble
150
+
151
+ if use_range:
152
+ parsed_args.extend([
153
+ ScalarArg(np.intp, "start"),
154
+ ScalarArg(np.intp, "stop"),
155
+ ScalarArg(np.intp, "step"),
156
+ ])
157
+ else:
158
+ parsed_args.append(ScalarArg(np.intp, "n"))
159
+
160
+ loop_prep = kwargs.pop("loop_prep", "")
161
+ loop_prep = get_arg_offset_adjuster_code(parsed_args) + loop_prep
162
+ prg = get_elwise_program(
163
+ context, parsed_args, operation,
164
+ name=name, options=options, preamble=preamble,
165
+ use_range=use_range, loop_prep=loop_prep, **kwargs)
166
+
167
+ from pyopencl.tools import get_arg_list_arg_types
168
+
169
+ kernel = getattr(prg, name)
170
+ kernel.set_scalar_arg_dtypes(get_arg_list_arg_types(parsed_args))
171
+
172
+ return kernel, parsed_args
173
+
174
+
175
+ def get_elwise_kernel(
176
+ context: cl.Context,
177
+ arguments: Union[str, List[DtypedArgument]],
178
+ operation: str, *,
179
+ name: str = "elwise_kernel",
180
+ options: Any = None, **kwargs: Any) -> cl.Kernel:
181
+ """Return a L{pyopencl.Kernel} that performs the same scalar operation
182
+ on one or several vectors.
183
+ """
184
+ func, arguments = get_elwise_kernel_and_types(
185
+ context, arguments, operation,
186
+ name=name, options=options, **kwargs)
187
+
188
+ return func
189
+
190
+ # }}}
191
+
192
+
193
+ # {{{ ElementwiseKernel driver
194
+
195
+ class ElementwiseKernel:
196
+ """
197
+ A kernel that takes a number of scalar or vector *arguments* and performs
198
+ an *operation* specified as a snippet of C on these arguments.
199
+
200
+ :arg arguments: a string formatted as a C argument list.
201
+ :arg operation: a snippet of C that carries out the desired 'map'
202
+ operation. The current index is available as the variable *i*.
203
+ *operation* may contain the statement ``PYOPENCL_ELWISE_CONTINUE``,
204
+ which will terminate processing for the current element.
205
+ :arg name: the function name as which the kernel is compiled
206
+ :arg options: passed unmodified to :meth:`pyopencl.Program.build`.
207
+ :arg preamble: a piece of C source code that gets inserted outside of the
208
+ function context in the elementwise operation's kernel source code.
209
+
210
+ .. warning :: Using a ``return`` statement in *operation* will lead to
211
+ incorrect results, as some elements may never get processed. Use
212
+ ``PYOPENCL_ELWISE_CONTINUE`` instead.
213
+
214
+ .. versionchanged:: 2013.1
215
+
216
+ Added ``PYOPENCL_ELWISE_CONTINUE``.
217
+
218
+ .. automethod:: __call__
219
+ """
220
+
221
+ def __init__(
222
+ self,
223
+ context: cl.Context,
224
+ arguments: Union[str, List[DtypedArgument]],
225
+ operation: str,
226
+ name: str = "elwise_kernel",
227
+ options: Any = None, **kwargs: Any) -> None:
228
+ self.context = context
229
+ self.arguments = arguments
230
+ self.operation = operation
231
+ self.name = name
232
+ self.options = options
233
+ self.kwargs = kwargs
234
+
235
+ @memoize_method
236
+ def get_kernel(self, use_range: bool):
237
+ knl, arg_descrs = get_elwise_kernel_and_types(
238
+ self.context, self.arguments, self.operation,
239
+ name=self.name, options=self.options,
240
+ use_range=use_range, **self.kwargs)
241
+
242
+ for arg in arg_descrs:
243
+ if isinstance(arg, VectorArg) and not arg.with_offset:
244
+ from warnings import warn
245
+ warn(
246
+ f"ElementwiseKernel '{self.name}' used with VectorArgs "
247
+ "that do not have offset support enabled. This usage is "
248
+ "deprecated. Just pass with_offset=True to VectorArg, "
249
+ "everything should sort itself out automatically.",
250
+ DeprecationWarning, stacklevel=2)
251
+
252
+ if not any(isinstance(arg, VectorArg) for arg in arg_descrs):
253
+ raise RuntimeError(
254
+ "ElementwiseKernel can only be used with functions that have "
255
+ "at least one vector argument")
256
+
257
+ return knl, arg_descrs
258
+
259
+ def __call__(self, *args, **kwargs) -> cl.Event:
260
+ """
261
+ Invoke the generated scalar kernel.
262
+
263
+ The arguments may either be scalars or :class:`pyopencl.array.Array`
264
+ instances.
265
+
266
+ |std-enqueue-blurb|
267
+ """
268
+ range_ = kwargs.pop("range", None)
269
+ slice_ = kwargs.pop("slice", None)
270
+ capture_as = kwargs.pop("capture_as", None)
271
+ queue = kwargs.pop("queue", None)
272
+ wait_for = kwargs.pop("wait_for", None)
273
+
274
+ if kwargs:
275
+ raise TypeError(f"unknown keyword arguments: '{', '.join(kwargs)}'")
276
+
277
+ use_range = range_ is not None or slice_ is not None
278
+ kernel, arg_descrs = self.get_kernel(use_range)
279
+
280
+ if wait_for is None:
281
+ wait_for = []
282
+ else:
283
+ # We'll be modifying it below.
284
+ wait_for = list(wait_for)
285
+
286
+ # {{{ assemble arg array
287
+
288
+ repr_vec = None
289
+ invocation_args = []
290
+ for arg, arg_descr in zip(args, arg_descrs):
291
+ if isinstance(arg_descr, VectorArg):
292
+ if repr_vec is None:
293
+ repr_vec = arg
294
+
295
+ invocation_args.append(arg)
296
+ else:
297
+ invocation_args.append(arg)
298
+
299
+ assert repr_vec is not None
300
+
301
+ # }}}
302
+
303
+ if queue is None:
304
+ queue = repr_vec.queue
305
+
306
+ if slice_ is not None:
307
+ if range_ is not None:
308
+ raise TypeError(
309
+ "may not specify both range and slice keyword arguments")
310
+
311
+ range_ = slice(*slice_.indices(repr_vec.size))
312
+
313
+ max_wg_size = kernel.get_work_group_info(
314
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
315
+ queue.device)
316
+
317
+ if range_ is not None:
318
+ start = range_.start
319
+ if start is None:
320
+ start = 0
321
+ invocation_args.append(start)
322
+ invocation_args.append(range_.stop)
323
+ if range_.step is None:
324
+ step = 1
325
+ else:
326
+ step = range_.step
327
+
328
+ invocation_args.append(step)
329
+
330
+ from pyopencl.array import _splay
331
+ gs, ls = _splay(queue.device,
332
+ abs(range_.stop - start)//step,
333
+ max_wg_size)
334
+ else:
335
+ invocation_args.append(repr_vec.size)
336
+ gs, ls = repr_vec._get_sizes(queue, max_wg_size)
337
+
338
+ if capture_as is not None:
339
+ kernel.set_args(*invocation_args)
340
+ kernel.capture_call(
341
+ capture_as, queue,
342
+ gs, ls, *invocation_args, wait_for=wait_for)
343
+
344
+ return kernel(queue, gs, ls, *invocation_args, wait_for=wait_for)
345
+
346
+ # }}}
347
+
348
+
349
+ # {{{ template
350
+
351
+ class ElementwiseTemplate(KernelTemplateBase):
352
+ def __init__(
353
+ self,
354
+ arguments: Union[str, List[DtypedArgument]],
355
+ operation: str,
356
+ name: str = "elwise",
357
+ preamble: str = "",
358
+ template_processor: Optional[str] = None) -> None:
359
+ super().__init__(template_processor=template_processor)
360
+ self.arguments = arguments
361
+ self.operation = operation
362
+ self.name = name
363
+ self.preamble = preamble
364
+
365
+ def build_inner(self, context, type_aliases=(), var_values=(),
366
+ more_preamble="", more_arguments=(), declare_types=(),
367
+ options=None):
368
+ renderer = self.get_renderer(
369
+ type_aliases, var_values, context, options)
370
+
371
+ arg_list = renderer.render_argument_list(
372
+ self.arguments, more_arguments, with_offset=True)
373
+ type_decl_preamble = renderer.get_type_decl_preamble(
374
+ context.devices[0], declare_types, arg_list)
375
+
376
+ return ElementwiseKernel(context,
377
+ arg_list, renderer(self.operation),
378
+ name=renderer(self.name), options=options,
379
+ preamble=(
380
+ type_decl_preamble
381
+ + "\n"
382
+ + renderer(self.preamble + "\n" + more_preamble)),
383
+ auto_preamble=False)
384
+
385
+ # }}}
386
+
387
+
388
+ # {{{ argument kinds
389
+
390
+ class ArgumentKind(enum.Enum):
391
+ ARRAY = enum.auto()
392
+ DEV_SCALAR = enum.auto()
393
+ SCALAR = enum.auto()
394
+
395
+
396
+ def get_argument_kind(v: Any) -> ArgumentKind:
397
+ from pyopencl.array import Array
398
+ if isinstance(v, Array):
399
+ if v.shape == ():
400
+ return ArgumentKind.DEV_SCALAR
401
+ else:
402
+ return ArgumentKind.ARRAY
403
+ else:
404
+ return ArgumentKind.SCALAR
405
+
406
+
407
+ def get_decl_and_access_for_kind(name: str, kind: ArgumentKind) -> Tuple[str, str]:
408
+ if kind == ArgumentKind.ARRAY:
409
+ return f"*{name}", f"{name}[i]"
410
+ elif kind == ArgumentKind.SCALAR:
411
+ return f"{name}", name
412
+ elif kind == ArgumentKind.DEV_SCALAR:
413
+ return f"*{name}", f"{name}[0]"
414
+ else:
415
+ raise AssertionError()
416
+
417
+ # }}}
418
+
419
+
420
+ # {{{ kernels supporting array functionality
421
+
422
+ @context_dependent_memoize
423
+ def get_take_kernel(context, dtype, idx_dtype, vec_count=1):
424
+ idx_tp = dtype_to_ctype(idx_dtype)
425
+
426
+ args = ([VectorArg(dtype, f"dest{i}", with_offset=True)
427
+ for i in range(vec_count)]
428
+ + [VectorArg(dtype, f"src{i}", with_offset=True)
429
+ for i in range(vec_count)]
430
+ + [VectorArg(idx_dtype, "idx", with_offset=True)])
431
+ body = (
432
+ f"{idx_tp} src_idx = idx[i];\n"
433
+ + "\n".join(
434
+ f"dest{i}[i] = src{i}[src_idx];"
435
+ for i in range(vec_count))
436
+ )
437
+
438
+ return get_elwise_kernel(context, args, body,
439
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
440
+ name="take")
441
+
442
+
443
+ @context_dependent_memoize
444
+ def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1):
445
+ idx_tp = dtype_to_ctype(idx_dtype)
446
+
447
+ args = [
448
+ VectorArg(dtype, f"dest{i}")
449
+ for i in range(vec_count)
450
+ ] + [
451
+ VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
452
+ VectorArg(idx_dtype, "gmem_src_idx", with_offset=True),
453
+ ] + [
454
+ VectorArg(dtype, f"src{i}", with_offset=True)
455
+ for i in range(vec_count)
456
+ ] + [
457
+ ScalarArg(idx_dtype, f"offset{i}")
458
+ for i in range(vec_count) if with_offsets
459
+ ]
460
+
461
+ if with_offsets:
462
+ def get_copy_insn(i):
463
+ return f"dest{i}[dest_idx] = src{i}[src_idx + offset{i}];"
464
+ else:
465
+ def get_copy_insn(i):
466
+ return f"dest{i}[dest_idx] = src{i}[src_idx];"
467
+
468
+ body = ((f"{idx_tp} src_idx = gmem_src_idx[i];\n"
469
+ f"{idx_tp} dest_idx = gmem_dest_idx[i];\n")
470
+ + "\n".join(get_copy_insn(i) for i in range(vec_count)))
471
+
472
+ return get_elwise_kernel(context, args, body,
473
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
474
+ name="take_put")
475
+
476
+
477
+ @context_dependent_memoize
478
+ def get_put_kernel(context, dtype, idx_dtype, vec_count=1):
479
+ idx_tp = dtype_to_ctype(idx_dtype)
480
+
481
+ args = [
482
+ VectorArg(dtype, f"dest{i}", with_offset=True)
483
+ for i in range(vec_count)
484
+ ] + [
485
+ VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
486
+ ] + [
487
+ VectorArg(dtype, f"src{i}", with_offset=True)
488
+ for i in range(vec_count)
489
+ ] + [
490
+ VectorArg(np.uint8, "use_fill", with_offset=True)
491
+ ] + [
492
+ VectorArg(np.int64, "val_ary_lengths", with_offset=True)
493
+ ]
494
+
495
+ body = (
496
+ f"{idx_tp} dest_idx = gmem_dest_idx[i];\n"
497
+ + "\n".join(
498
+ f"dest{i}[dest_idx] = (use_fill[{i}] ? src{i}[0] : "
499
+ f"src{i}[i % val_ary_lengths[{i}]]);"
500
+ for i in range(vec_count)
501
+ )
502
+ )
503
+
504
+ return get_elwise_kernel(context, args, body,
505
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
506
+ name="put")
507
+
508
+
509
+ @context_dependent_memoize
510
+ def get_copy_kernel(context, dtype_dest, dtype_src):
511
+ src = "src[i]"
512
+ if dtype_dest.kind == "c" != dtype_src.kind:
513
+ name = complex_dtype_to_name(dtype_dest)
514
+ src = f"{name}_fromreal({src})"
515
+
516
+ if dtype_dest.kind == "c" and dtype_src != dtype_dest:
517
+ name = complex_dtype_to_name(dtype_dest)
518
+ src = f"{name}_cast({src})"
519
+
520
+ if dtype_dest != dtype_src and (
521
+ dtype_dest.kind == "V" or dtype_src.kind == "V"):
522
+ raise TypeError("copying between non-identical struct types")
523
+
524
+ return get_elwise_kernel(context,
525
+ "{tp_dest} *dest, {tp_src} *src".format(
526
+ tp_dest=dtype_to_ctype(dtype_dest),
527
+ tp_src=dtype_to_ctype(dtype_src),
528
+ ),
529
+ f"dest[i] = {src}",
530
+ preamble=dtype_to_c_struct(context.devices[0], dtype_dest),
531
+ name="copy")
532
+
533
+
534
+ def complex_dtype_to_name(dtype) -> str:
535
+ if dtype == np.complex128:
536
+ return "cdouble"
537
+ elif dtype == np.complex64:
538
+ return "cfloat"
539
+ else:
540
+ raise RuntimeError(f"invalid complex type: {dtype}")
541
+
542
+
543
+ def real_dtype(dtype):
544
+ return dtype.type(0).real.dtype
545
+
546
+
547
+ @context_dependent_memoize
548
+ def get_axpbyz_kernel(context, dtype_x, dtype_y, dtype_z,
549
+ x_is_scalar=False, y_is_scalar=False):
550
+ result_t = dtype_to_ctype(dtype_z)
551
+
552
+ x_is_complex = dtype_x.kind == "c"
553
+ y_is_complex = dtype_y.kind == "c"
554
+
555
+ x = "x[0]" if x_is_scalar else "x[i]"
556
+ y = "y[0]" if y_is_scalar else "y[i]"
557
+
558
+ if dtype_z.kind == "c":
559
+ # a and b will always be complex here.
560
+ z_ct = complex_dtype_to_name(dtype_z)
561
+
562
+ if x_is_complex:
563
+ ax = f"{z_ct}_mul(a, {z_ct}_cast({x}))"
564
+ else:
565
+ ax = f"{z_ct}_mulr(a, {x})"
566
+
567
+ if y_is_complex:
568
+ by = f"{z_ct}_mul(b, {z_ct}_cast({y}))"
569
+ else:
570
+ by = f"{z_ct}_mulr(b, {y})"
571
+
572
+ result = f"{z_ct}_add({ax}, {by})"
573
+ else:
574
+ # real-only
575
+
576
+ ax = f"a*(({result_t}) {x})"
577
+ by = f"b*(({result_t}) {y})"
578
+
579
+ result = f"{ax} + {by}"
580
+
581
+ return get_elwise_kernel(context,
582
+ "{tp_z} *z, {tp_z} a, {tp_x} *x, {tp_z} b, {tp_y} *y".format(
583
+ tp_x=dtype_to_ctype(dtype_x),
584
+ tp_y=dtype_to_ctype(dtype_y),
585
+ tp_z=dtype_to_ctype(dtype_z),
586
+ ),
587
+ f"z[i] = {result}",
588
+ name="axpbyz")
589
+
590
+
591
+ @context_dependent_memoize
592
+ def get_axpbz_kernel(context, dtype_a, dtype_x, dtype_b, dtype_z):
593
+ a_is_complex = dtype_a.kind == "c"
594
+ x_is_complex = dtype_x.kind == "c"
595
+ b_is_complex = dtype_b.kind == "c"
596
+
597
+ z_is_complex = dtype_z.kind == "c"
598
+
599
+ ax = "a*x[i]"
600
+ if x_is_complex:
601
+ a = "a"
602
+ x = "x[i]"
603
+
604
+ if dtype_x != dtype_z:
605
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
606
+
607
+ if a_is_complex:
608
+ if dtype_a != dtype_z:
609
+ a = "{}_cast({})".format(complex_dtype_to_name(dtype_z), a)
610
+
611
+ ax = "{}_mul({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
612
+ else:
613
+ ax = "{}_rmul({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
614
+ elif a_is_complex:
615
+ a = "a"
616
+ x = "x[i]"
617
+
618
+ if dtype_a != dtype_z:
619
+ a = "{}_cast({})".format(complex_dtype_to_name(dtype_z), a)
620
+ ax = "{}_mulr({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
621
+
622
+ b = "b"
623
+ if z_is_complex and not b_is_complex:
624
+ b = "{}_fromreal({})".format(complex_dtype_to_name(dtype_z), b)
625
+
626
+ if z_is_complex and not (a_is_complex or x_is_complex):
627
+ ax = "{}_fromreal({})".format(complex_dtype_to_name(dtype_z), ax)
628
+
629
+ if z_is_complex:
630
+ ax = "{}_cast({})".format(complex_dtype_to_name(dtype_z), ax)
631
+ b = "{}_cast({})".format(complex_dtype_to_name(dtype_z), b)
632
+
633
+ if a_is_complex or x_is_complex or b_is_complex:
634
+ expr = "{root}_add({ax}, {b})".format(
635
+ ax=ax,
636
+ b=b,
637
+ root=complex_dtype_to_name(dtype_z))
638
+ else:
639
+ expr = f"{ax} + {b}"
640
+
641
+ return get_elwise_kernel(context,
642
+ "{tp_z} *z, {tp_a} a, {tp_x} *x,{tp_b} b".format(
643
+ tp_a=dtype_to_ctype(dtype_a),
644
+ tp_x=dtype_to_ctype(dtype_x),
645
+ tp_b=dtype_to_ctype(dtype_b),
646
+ tp_z=dtype_to_ctype(dtype_z),
647
+ ),
648
+ f"z[i] = {expr}",
649
+ name="axpb")
650
+
651
+
652
+ @context_dependent_memoize
653
+ def get_multiply_kernel(context, dtype_x, dtype_y, dtype_z,
654
+ x_is_scalar=False, y_is_scalar=False):
655
+ x_is_complex = dtype_x.kind == "c"
656
+ y_is_complex = dtype_y.kind == "c"
657
+
658
+ x = "x[0]" if x_is_scalar else "x[i]"
659
+ y = "y[0]" if y_is_scalar else "y[i]"
660
+
661
+ if x_is_complex and dtype_x != dtype_z:
662
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
663
+ if y_is_complex and dtype_y != dtype_z:
664
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
665
+
666
+ if x_is_complex and y_is_complex:
667
+ xy = "{}_mul({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
668
+ elif x_is_complex and not y_is_complex:
669
+ xy = "{}_mulr({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
670
+ elif not x_is_complex and y_is_complex:
671
+ xy = "{}_rmul({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
672
+ else:
673
+ xy = f"{x} * {y}"
674
+
675
+ return get_elwise_kernel(context,
676
+ "{tp_z} *z, {tp_x} *x, {tp_y} *y".format(
677
+ tp_x=dtype_to_ctype(dtype_x),
678
+ tp_y=dtype_to_ctype(dtype_y),
679
+ tp_z=dtype_to_ctype(dtype_z),
680
+ ),
681
+ f"z[i] = {xy}",
682
+ name="multiply")
683
+
684
+
685
+ @context_dependent_memoize
686
+ def get_divide_kernel(context, dtype_x, dtype_y, dtype_z,
687
+ x_is_scalar=False, y_is_scalar=False):
688
+ x_is_complex = dtype_x.kind == "c"
689
+ y_is_complex = dtype_y.kind == "c"
690
+ z_is_complex = dtype_z.kind == "c"
691
+
692
+ x = "x[0]" if x_is_scalar else "x[i]"
693
+ y = "y[0]" if y_is_scalar else "y[i]"
694
+
695
+ if z_is_complex and dtype_x != dtype_y:
696
+ if x_is_complex and dtype_x != dtype_z:
697
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
698
+ if y_is_complex and dtype_y != dtype_z:
699
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
700
+ else:
701
+ if dtype_x != dtype_z:
702
+ x = f"({dtype_to_ctype(dtype_z)}) ({x})"
703
+ if dtype_y != dtype_z:
704
+ y = f"({dtype_to_ctype(dtype_z)}) ({y})"
705
+
706
+ if x_is_complex and y_is_complex:
707
+ xoy = "{}_divide({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
708
+ elif not x_is_complex and y_is_complex:
709
+ xoy = "{}_rdivide({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
710
+ elif x_is_complex and not y_is_complex:
711
+ xoy = "{}_divider({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
712
+ else:
713
+ xoy = f"{x} / {y}"
714
+
715
+ if z_is_complex:
716
+ xoy = "{}_cast({})".format(complex_dtype_to_name(dtype_z), xoy)
717
+
718
+ return get_elwise_kernel(context,
719
+ "{tp_z} *z, {tp_x} *x, {tp_y} *y".format(
720
+ tp_x=dtype_to_ctype(dtype_x),
721
+ tp_y=dtype_to_ctype(dtype_y),
722
+ tp_z=dtype_to_ctype(dtype_z),
723
+ ),
724
+ f"z[i] = {xoy}",
725
+ name="divide")
726
+
727
+
728
+ @context_dependent_memoize
729
+ def get_rdivide_elwise_kernel(context, dtype_x, dtype_y, dtype_z):
730
+ # implements y / x!
731
+ x_is_complex = dtype_x.kind == "c"
732
+ y_is_complex = dtype_y.kind == "c"
733
+ z_is_complex = dtype_z.kind == "c"
734
+
735
+ x = "x[i]"
736
+ y = "y"
737
+
738
+ if z_is_complex and dtype_x != dtype_y:
739
+ if x_is_complex and dtype_x != dtype_z:
740
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
741
+ if y_is_complex and dtype_y != dtype_z:
742
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
743
+
744
+ if x_is_complex and y_is_complex:
745
+ yox = "{}_divide({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
746
+ elif not y_is_complex and x_is_complex:
747
+ yox = "{}_rdivide({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
748
+ elif y_is_complex and not x_is_complex:
749
+ yox = "{}_divider({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
750
+ else:
751
+ yox = f"{y} / {x}"
752
+
753
+ return get_elwise_kernel(context,
754
+ "{tp_z} *z, {tp_x} *x, {tp_y} y".format(
755
+ tp_x=dtype_to_ctype(dtype_x),
756
+ tp_y=dtype_to_ctype(dtype_y),
757
+ tp_z=dtype_to_ctype(dtype_z),
758
+ ),
759
+ f"z[i] = {yox}",
760
+ name="divide_r")
761
+
762
+
763
+ @context_dependent_memoize
764
+ def get_fill_kernel(context, dtype):
765
+ return get_elwise_kernel(context,
766
+ "{tp} *z, {tp} a".format(tp=dtype_to_ctype(dtype)),
767
+ "z[i] = a",
768
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
769
+ name="fill")
770
+
771
+
772
+ @context_dependent_memoize
773
+ def get_reverse_kernel(context, dtype):
774
+ return get_elwise_kernel(context,
775
+ "{tp} *z, {tp} *y".format(tp=dtype_to_ctype(dtype)),
776
+ "z[i] = y[n-1-i]",
777
+ name="reverse")
778
+
779
+
780
+ @context_dependent_memoize
781
+ def get_arange_kernel(context, dtype):
782
+ if dtype.kind == "c":
783
+ expr = (
784
+ "{root}_add(start, {root}_rmul(i, step))"
785
+ .format(root=complex_dtype_to_name(dtype)))
786
+ else:
787
+ expr = f"start + (({dtype_to_ctype(dtype)}) i) * step"
788
+
789
+ return get_elwise_kernel(context, [
790
+ VectorArg(dtype, "z", with_offset=True),
791
+ ScalarArg(dtype, "start"),
792
+ ScalarArg(dtype, "step"),
793
+ ],
794
+ f"z[i] = {expr}",
795
+ name="arange")
796
+
797
+
798
+ @context_dependent_memoize
799
+ def get_pow_kernel(context, dtype_x, dtype_y, dtype_z,
800
+ is_base_array, is_exp_array):
801
+ if is_base_array:
802
+ x = "x[i]"
803
+ x_ctype = "{tp_x} *x"
804
+ else:
805
+ x = "x"
806
+ x_ctype = "{tp_x} x"
807
+
808
+ if is_exp_array:
809
+ y = "y[i]"
810
+ y_ctype = "{tp_y} *y"
811
+ else:
812
+ y = "y"
813
+ y_ctype = "{tp_y} y"
814
+
815
+ x_is_complex = dtype_x.kind == "c"
816
+ y_is_complex = dtype_y.kind == "c"
817
+ z_is_complex = dtype_z.kind == "c"
818
+
819
+ if z_is_complex and dtype_x != dtype_y:
820
+ if x_is_complex and dtype_x != dtype_z:
821
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
822
+ if y_is_complex and dtype_y != dtype_z:
823
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
824
+ elif dtype_x != dtype_y:
825
+ if dtype_x != dtype_z:
826
+ x = "({}) ({})".format(dtype_to_ctype(dtype_z), x)
827
+ if dtype_y != dtype_z:
828
+ y = "({}) ({})".format(dtype_to_ctype(dtype_z), y)
829
+
830
+ if x_is_complex and y_is_complex:
831
+ result = "{}_pow({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
832
+ elif x_is_complex and not y_is_complex:
833
+ result = "{}_powr({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
834
+ elif not x_is_complex and y_is_complex:
835
+ result = "{}_rpow({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
836
+ else:
837
+ result = f"pow({x}, {y})"
838
+
839
+ return get_elwise_kernel(context,
840
+ ("{tp_z} *z, " + x_ctype + ", " + y_ctype).format(
841
+ tp_x=dtype_to_ctype(dtype_x),
842
+ tp_y=dtype_to_ctype(dtype_y),
843
+ tp_z=dtype_to_ctype(dtype_z),
844
+ ),
845
+ f"z[i] = {result}",
846
+ name="pow_method")
847
+
848
+
849
+ @context_dependent_memoize
850
+ def get_unop_kernel(context, operator, res_dtype, in_dtype):
851
+ return get_elwise_kernel(context, [
852
+ VectorArg(res_dtype, "z", with_offset=True),
853
+ VectorArg(in_dtype, "y", with_offset=True),
854
+ ],
855
+ f"z[i] = {operator} y[i]",
856
+ name="unary_op_kernel")
857
+
858
+
859
+ @context_dependent_memoize
860
+ def get_array_scalar_binop_kernel(context, operator, dtype_res, dtype_a, dtype_b):
861
+ return get_elwise_kernel(context, [
862
+ VectorArg(dtype_res, "out", with_offset=True),
863
+ VectorArg(dtype_a, "a", with_offset=True),
864
+ ScalarArg(dtype_b, "b"),
865
+ ],
866
+ f"out[i] = a[i] {operator} b",
867
+ name="scalar_binop_kernel")
868
+
869
+
870
+ @context_dependent_memoize
871
+ def get_array_binop_kernel(context, operator, dtype_res, dtype_a, dtype_b,
872
+ a_is_scalar=False, b_is_scalar=False):
873
+ a = "a[0]" if a_is_scalar else "a[i]"
874
+ b = "b[0]" if b_is_scalar else "b[i]"
875
+ return get_elwise_kernel(context, [
876
+ VectorArg(dtype_res, "out", with_offset=True),
877
+ VectorArg(dtype_a, "a", with_offset=True),
878
+ VectorArg(dtype_b, "b", with_offset=True),
879
+ ],
880
+ f"out[i] = {a} {operator} {b}",
881
+ name="binop_kernel")
882
+
883
+
884
+ @context_dependent_memoize
885
+ def get_array_scalar_comparison_kernel(context, operator, dtype_a):
886
+ return get_elwise_kernel(context, [
887
+ VectorArg(np.int8, "out", with_offset=True),
888
+ VectorArg(dtype_a, "a", with_offset=True),
889
+ ScalarArg(dtype_a, "b"),
890
+ ],
891
+ f"out[i] = a[i] {operator} b",
892
+ name="scalar_comparison_kernel")
893
+
894
+
895
+ @context_dependent_memoize
896
+ def get_array_comparison_kernel(context, operator, dtype_a, dtype_b):
897
+ return get_elwise_kernel(context, [
898
+ VectorArg(np.int8, "out", with_offset=True),
899
+ VectorArg(dtype_a, "a", with_offset=True),
900
+ VectorArg(dtype_b, "b", with_offset=True),
901
+ ],
902
+ f"out[i] = a[i] {operator} b[i]",
903
+ name="comparison_kernel")
904
+
905
+
906
+ @context_dependent_memoize
907
+ def get_unary_func_kernel(context, func_name, in_dtype, out_dtype=None):
908
+ if out_dtype is None:
909
+ out_dtype = in_dtype
910
+
911
+ return get_elwise_kernel(context, [
912
+ VectorArg(out_dtype, "z", with_offset=True),
913
+ VectorArg(in_dtype, "y", with_offset=True),
914
+ ],
915
+ f"z[i] = {func_name}(y[i])",
916
+ name=f"{func_name}_kernel")
917
+
918
+
919
+ @context_dependent_memoize
920
+ def get_binary_func_kernel(context, func_name, x_dtype, y_dtype, out_dtype,
921
+ preamble="", name=None):
922
+ if name is None:
923
+ name = func_name
924
+
925
+ return get_elwise_kernel(context, [
926
+ VectorArg(out_dtype, "z", with_offset=True),
927
+ VectorArg(x_dtype, "x", with_offset=True),
928
+ VectorArg(y_dtype, "y", with_offset=True),
929
+ ],
930
+ f"z[i] = {func_name}(x[i], y[i])",
931
+ name=f"{name}_kernel",
932
+ preamble=preamble)
933
+
934
+
935
+ @context_dependent_memoize
936
+ def get_float_binary_func_kernel(context, func_name, x_dtype, y_dtype,
937
+ out_dtype, preamble="", name=None):
938
+ if name is None:
939
+ name = func_name
940
+
941
+ if (np.array(0, x_dtype) * np.array(0, y_dtype)).itemsize > 4:
942
+ arg_type = "double"
943
+ preamble = """
944
+ #if __OPENCL_C_VERSION__ < 120
945
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
946
+ #endif
947
+ #define PYOPENCL_DEFINE_CDOUBLE
948
+ """ + preamble
949
+ else:
950
+ arg_type = "float"
951
+
952
+ return get_elwise_kernel(context, [
953
+ VectorArg(out_dtype, "z", with_offset=True),
954
+ VectorArg(x_dtype, "x", with_offset=True),
955
+ VectorArg(y_dtype, "y", with_offset=True),
956
+ ],
957
+ f"z[i] = {func_name}(({arg_type})x[i], ({arg_type})y[i])",
958
+ name=f"{name}_kernel",
959
+ preamble=preamble)
960
+
961
+
962
+ @context_dependent_memoize
963
+ def get_fmod_kernel(context, out_dtype=np.float32, arg_dtype=np.float32,
964
+ mod_dtype=np.float32):
965
+ return get_float_binary_func_kernel(context, "fmod", arg_dtype,
966
+ mod_dtype, out_dtype)
967
+
968
+
969
+ @context_dependent_memoize
970
+ def get_modf_kernel(context, int_dtype=np.float32,
971
+ frac_dtype=np.float32, x_dtype=np.float32):
972
+ return get_elwise_kernel(context, [
973
+ VectorArg(int_dtype, "intpart", with_offset=True),
974
+ VectorArg(frac_dtype, "fracpart", with_offset=True),
975
+ VectorArg(x_dtype, "x", with_offset=True),
976
+ ],
977
+ """
978
+ fracpart[i] = modf(x[i], &intpart[i])
979
+ """,
980
+ name="modf_kernel")
981
+
982
+
983
+ @context_dependent_memoize
984
+ def get_frexp_kernel(context, sign_dtype=np.float32, exp_dtype=np.float32,
985
+ x_dtype=np.float32):
986
+ return get_elwise_kernel(context, [
987
+ VectorArg(sign_dtype, "significand", with_offset=True),
988
+ VectorArg(exp_dtype, "exponent", with_offset=True),
989
+ VectorArg(x_dtype, "x", with_offset=True),
990
+ ],
991
+ """
992
+ int expt = 0;
993
+ significand[i] = frexp(x[i], &expt);
994
+ exponent[i] = expt;
995
+ """,
996
+ name="frexp_kernel")
997
+
998
+
999
+ @context_dependent_memoize
1000
+ def get_ldexp_kernel(context, out_dtype=np.float32, sig_dtype=np.float32,
1001
+ expt_dtype=np.float32):
1002
+ return get_binary_func_kernel(
1003
+ context, "_PYOCL_LDEXP", sig_dtype, expt_dtype, out_dtype,
1004
+ preamble="#define _PYOCL_LDEXP(x, y) ldexp(x, (int)(y))",
1005
+ name="ldexp_kernel")
1006
+
1007
+
1008
+ @context_dependent_memoize
1009
+ def get_minmaximum_kernel(context, minmax, dtype_z, dtype_x, dtype_y,
1010
+ kind_x: ArgumentKind, kind_y: ArgumentKind):
1011
+ if dtype_z.kind == "f":
1012
+ reduce_func = f"f{minmax}_nanprop"
1013
+ elif dtype_z.kind in "iu":
1014
+ reduce_func = minmax
1015
+ else:
1016
+ raise TypeError("unsupported dtype specified")
1017
+
1018
+ tp_x = dtype_to_ctype(dtype_x)
1019
+ tp_y = dtype_to_ctype(dtype_y)
1020
+ tp_z = dtype_to_ctype(dtype_z)
1021
+ decl_x, acc_x = get_decl_and_access_for_kind("x", kind_x)
1022
+ decl_y, acc_y = get_decl_and_access_for_kind("y", kind_y)
1023
+
1024
+ return get_elwise_kernel(context,
1025
+ f"{tp_z} *z, {tp_x} {decl_x}, {tp_y} {decl_y}",
1026
+ f"z[i] = {reduce_func}({acc_x}, {acc_y})",
1027
+ name=f"{minmax}imum",
1028
+ preamble="""
1029
+ #define fmin_nanprop(a, b) (isnan(a) || isnan(b)) ? a+b : fmin(a, b)
1030
+ #define fmax_nanprop(a, b) (isnan(a) || isnan(b)) ? a+b : fmax(a, b)
1031
+ """)
1032
+
1033
+
1034
+ @context_dependent_memoize
1035
+ def get_bessel_kernel(context, which_func, out_dtype=np.float64,
1036
+ order_dtype=np.int32, x_dtype=np.float64):
1037
+ if x_dtype.kind != "c":
1038
+ return get_elwise_kernel(context, [
1039
+ VectorArg(out_dtype, "z", with_offset=True),
1040
+ ScalarArg(order_dtype, "ord_n"),
1041
+ VectorArg(x_dtype, "x", with_offset=True),
1042
+ ],
1043
+ f"z[i] = bessel_{which_func}n(ord_n, x[i])",
1044
+ name=f"bessel_{which_func}n_kernel",
1045
+ preamble=f"""
1046
+ #if __OPENCL_C_VERSION__ < 120
1047
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
1048
+ #endif
1049
+ #define PYOPENCL_DEFINE_CDOUBLE
1050
+ #include <pyopencl-bessel-{which_func}.cl>
1051
+ """)
1052
+ else:
1053
+ if which_func != "j":
1054
+ raise NotImplementedError("complex arguments for Bessel Y")
1055
+
1056
+ if x_dtype != np.complex128:
1057
+ raise NotImplementedError("non-complex double dtype")
1058
+ if x_dtype != out_dtype:
1059
+ raise NotImplementedError("different input/output types")
1060
+
1061
+ return get_elwise_kernel(context, [
1062
+ VectorArg(out_dtype, "z", with_offset=True),
1063
+ ScalarArg(order_dtype, "ord_n"),
1064
+ VectorArg(x_dtype, "x", with_offset=True),
1065
+ ],
1066
+ """
1067
+ cdouble_t jv_loc;
1068
+ cdouble_t jvp1_loc;
1069
+ bessel_j_complex(ord_n, x[i], &jv_loc, &jvp1_loc);
1070
+ z[i] = jv_loc;
1071
+ """,
1072
+ name="bessel_j_complex_kernel",
1073
+ preamble="""
1074
+ #if __OPENCL_C_VERSION__ < 120
1075
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
1076
+ #endif
1077
+ #define PYOPENCL_DEFINE_CDOUBLE
1078
+ #include <pyopencl-complex.h>
1079
+ #include <pyopencl-bessel-j-complex.cl>
1080
+ """)
1081
+
1082
+
1083
+ @context_dependent_memoize
1084
+ def get_hankel_01_kernel(context, out_dtype, x_dtype):
1085
+ if x_dtype != np.complex128:
1086
+ raise NotImplementedError("non-complex double dtype")
1087
+ if x_dtype != out_dtype:
1088
+ raise NotImplementedError("different input/output types")
1089
+
1090
+ return get_elwise_kernel(context, [
1091
+ VectorArg(out_dtype, "h0", with_offset=True),
1092
+ VectorArg(out_dtype, "h1", with_offset=True),
1093
+ VectorArg(x_dtype, "x", with_offset=True),
1094
+ ],
1095
+ """
1096
+ cdouble_t h0_loc;
1097
+ cdouble_t h1_loc;
1098
+ hankel_01_complex(x[i], &h0_loc, &h1_loc, 1);
1099
+ h0[i] = h0_loc;
1100
+ h1[i] = h1_loc;
1101
+ """,
1102
+ name="hankel_complex_kernel",
1103
+ preamble="""
1104
+ #if __OPENCL_C_VERSION__ < 120
1105
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
1106
+ #endif
1107
+ #define PYOPENCL_DEFINE_CDOUBLE
1108
+ #include <pyopencl-complex.h>
1109
+ #include <pyopencl-hankel-complex.cl>
1110
+ """)
1111
+
1112
+
1113
+ @context_dependent_memoize
1114
+ def get_diff_kernel(context, dtype):
1115
+ return get_elwise_kernel(context, [
1116
+ VectorArg(dtype, "result", with_offset=True),
1117
+ VectorArg(dtype, "array", with_offset=True),
1118
+ ],
1119
+ "result[i] = array[i+1] - array[i]",
1120
+ name="diff")
1121
+
1122
+
1123
+ @context_dependent_memoize
1124
+ def get_if_positive_kernel(
1125
+ context, crit_dtype, then_else_dtype,
1126
+ is_then_array, is_else_array,
1127
+ is_then_scalar, is_else_scalar):
1128
+ if is_then_array:
1129
+ then_ = "then_[0]" if is_then_scalar else "then_[i]"
1130
+ then_arg = VectorArg(then_else_dtype, "then_", with_offset=True)
1131
+ else:
1132
+ assert is_then_scalar
1133
+ then_ = "then_"
1134
+ then_arg = ScalarArg(then_else_dtype, "then_")
1135
+
1136
+ if is_else_array:
1137
+ else_ = "else_[0]" if is_else_scalar else "else_[i]"
1138
+ else_arg = VectorArg(then_else_dtype, "else_", with_offset=True)
1139
+ else:
1140
+ assert is_else_scalar
1141
+ else_ = "else_"
1142
+ else_arg = ScalarArg(then_else_dtype, "else_")
1143
+
1144
+ return get_elwise_kernel(context, [
1145
+ VectorArg(then_else_dtype, "result", with_offset=True),
1146
+ VectorArg(crit_dtype, "crit", with_offset=True),
1147
+ then_arg, else_arg,
1148
+ ],
1149
+ f"result[i] = crit[i] > 0 ? {then_} : {else_}",
1150
+ name="if_positive")
1151
+
1152
+
1153
+ @context_dependent_memoize
1154
+ def get_logical_not_kernel(context, in_dtype):
1155
+ return get_elwise_kernel(context, [
1156
+ VectorArg(np.int8, "z", with_offset=True),
1157
+ VectorArg(in_dtype, "y", with_offset=True),
1158
+ ],
1159
+ "z[i] = (y[i] == 0)",
1160
+ name="logical_not_kernel")
1161
+
1162
+ # }}}
1163
+
1164
+ # vim: fdm=marker