pyopencl 2025.2.5__cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (47) hide show
  1. pyopencl/.libs/libOpenCL-83a5a7fd.so.1.0.0 +0 -0
  2. pyopencl/__init__.py +1995 -0
  3. pyopencl/_cl.cpython-313-x86_64-linux-gnu.so +0 -0
  4. pyopencl/_cl.pyi +2006 -0
  5. pyopencl/_cluda.py +57 -0
  6. pyopencl/_monkeypatch.py +1069 -0
  7. pyopencl/_mymako.py +17 -0
  8. pyopencl/algorithm.py +1454 -0
  9. pyopencl/array.py +3441 -0
  10. pyopencl/bitonic_sort.py +245 -0
  11. pyopencl/bitonic_sort_templates.py +597 -0
  12. pyopencl/cache.py +535 -0
  13. pyopencl/capture_call.py +200 -0
  14. pyopencl/characterize/__init__.py +463 -0
  15. pyopencl/characterize/performance.py +240 -0
  16. pyopencl/cl/pyopencl-airy.cl +324 -0
  17. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  18. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  19. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  20. pyopencl/cl/pyopencl-complex.h +303 -0
  21. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  22. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  23. pyopencl/cl/pyopencl-random123/array.h +325 -0
  24. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  25. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  26. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  27. pyopencl/clmath.py +282 -0
  28. pyopencl/clrandom.py +412 -0
  29. pyopencl/cltypes.py +202 -0
  30. pyopencl/compyte/.gitignore +21 -0
  31. pyopencl/compyte/__init__.py +0 -0
  32. pyopencl/compyte/array.py +241 -0
  33. pyopencl/compyte/dtypes.py +316 -0
  34. pyopencl/compyte/pyproject.toml +52 -0
  35. pyopencl/elementwise.py +1178 -0
  36. pyopencl/invoker.py +417 -0
  37. pyopencl/ipython_ext.py +70 -0
  38. pyopencl/py.typed +0 -0
  39. pyopencl/reduction.py +815 -0
  40. pyopencl/scan.py +1916 -0
  41. pyopencl/tools.py +1565 -0
  42. pyopencl/typing.py +61 -0
  43. pyopencl/version.py +11 -0
  44. pyopencl-2025.2.5.dist-info/METADATA +109 -0
  45. pyopencl-2025.2.5.dist-info/RECORD +47 -0
  46. pyopencl-2025.2.5.dist-info/WHEEL +6 -0
  47. pyopencl-2025.2.5.dist-info/licenses/LICENSE +104 -0
@@ -0,0 +1,1178 @@
1
+ """Elementwise functionality."""
2
+ from __future__ import annotations
3
+
4
+
5
+ __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
6
+
7
+ __license__ = """
8
+ Permission is hereby granted, free of charge, to any person
9
+ obtaining a copy of this software and associated documentation
10
+ files (the "Software"), to deal in the Software without
11
+ restriction, including without limitation the rights to use,
12
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the
14
+ Software is furnished to do so, subject to the following
15
+ conditions:
16
+
17
+ The above copyright notice and this permission notice shall be
18
+ included in all copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27
+ OTHER DEALINGS IN THE SOFTWARE.
28
+ """
29
+
30
+
31
+ import enum
32
+ from typing import TYPE_CHECKING, Any
33
+
34
+ import numpy as np
35
+
36
+ from pytools import memoize_method
37
+
38
+ import pyopencl as cl
39
+ from pyopencl.tools import (
40
+ DtypedArgument,
41
+ KernelTemplateBase,
42
+ ScalarArg,
43
+ VectorArg,
44
+ context_dependent_memoize,
45
+ dtype_to_c_struct,
46
+ dtype_to_ctype,
47
+ )
48
+
49
+
50
+ if TYPE_CHECKING:
51
+ from collections.abc import Sequence
52
+
53
+
54
+ # {{{ elementwise kernel code generator
55
+
56
+ def get_elwise_program(
57
+ context: cl.Context,
58
+ arguments: list[DtypedArgument],
59
+ operation: str, *,
60
+ name: str = "elwise_kernel",
61
+ options: Any = None,
62
+ preamble: str = "",
63
+ loop_prep: str = "",
64
+ after_loop: str = "",
65
+ use_range: bool = False) -> cl.Program:
66
+
67
+ if use_range:
68
+ body = r"""//CL//
69
+ if (step < 0)
70
+ {
71
+ for (i = start + (work_group_start + lid)*step;
72
+ i > stop; i += gsize*step)
73
+ {
74
+ %(operation)s;
75
+ }
76
+ }
77
+ else
78
+ {
79
+ for (i = start + (work_group_start + lid)*step;
80
+ i < stop; i += gsize*step)
81
+ {
82
+ %(operation)s;
83
+ }
84
+ }
85
+ """
86
+ else:
87
+ body = """//CL//
88
+ for (i = work_group_start + lid; i < n; i += gsize)
89
+ {
90
+ %(operation)s;
91
+ }
92
+ """
93
+
94
+ import re
95
+ return_match = re.search(r"\breturn\b", operation)
96
+ if return_match is not None:
97
+ from warnings import warn
98
+ warn("Using a 'return' statement in an element-wise operation will "
99
+ "likely lead to incorrect results. Use "
100
+ "PYOPENCL_ELWISE_CONTINUE instead.",
101
+ stacklevel=3)
102
+
103
+ source = (f"""//CL//
104
+ {preamble}
105
+
106
+ #define PYOPENCL_ELWISE_CONTINUE continue
107
+
108
+ __kernel void {name}({", ".join(arg.declarator() for arg in arguments)})
109
+ {{
110
+ int lid = get_local_id(0);
111
+ int gsize = get_global_size(0);
112
+ int work_group_start = get_local_size(0)*get_group_id(0);
113
+ long i;
114
+
115
+ {loop_prep};
116
+ {body % {"operation": operation}}
117
+ {after_loop};
118
+ }}
119
+ """)
120
+
121
+ return cl.Program(context, source).build(options)
122
+
123
+
124
+ def get_elwise_kernel_and_types(
125
+ context: cl.Context,
126
+ arguments: str | Sequence[DtypedArgument],
127
+ operation: str, *,
128
+ name: str = "elwise_kernel",
129
+ options: Any = None,
130
+ preamble: str = "",
131
+ use_range: bool = False,
132
+ **kwargs: Any) -> tuple[cl.Kernel, list[DtypedArgument]]:
133
+
134
+ from pyopencl.tools import get_arg_offset_adjuster_code, parse_arg_list
135
+ parsed_args = parse_arg_list(arguments, with_offset=True)
136
+
137
+ auto_preamble = kwargs.pop("auto_preamble", True)
138
+
139
+ pragmas = []
140
+ includes = []
141
+ have_double_pragma = False
142
+ have_complex_include = False
143
+
144
+ if auto_preamble:
145
+ for arg in parsed_args:
146
+ if arg.dtype in [np.float64, np.complex128]:
147
+ if not have_double_pragma:
148
+ pragmas.append("""
149
+ #if __OPENCL_C_VERSION__ < 120
150
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
151
+ #endif
152
+ #define PYOPENCL_DEFINE_CDOUBLE
153
+ """)
154
+ have_double_pragma = True
155
+ if arg.dtype.kind == "c":
156
+ if not have_complex_include:
157
+ includes.append("#include <pyopencl-complex.h>\n")
158
+ have_complex_include = True
159
+
160
+ if pragmas or includes:
161
+ preamble = "\n".join(pragmas+includes) + "\n" + preamble
162
+
163
+ if use_range:
164
+ parsed_args.extend([
165
+ ScalarArg(np.intp, "start"),
166
+ ScalarArg(np.intp, "stop"),
167
+ ScalarArg(np.intp, "step"),
168
+ ])
169
+ else:
170
+ parsed_args.append(ScalarArg(np.intp, "n"))
171
+
172
+ loop_prep = kwargs.pop("loop_prep", "")
173
+ loop_prep = get_arg_offset_adjuster_code(parsed_args) + loop_prep
174
+ prg = get_elwise_program(
175
+ context, parsed_args, operation,
176
+ name=name, options=options, preamble=preamble,
177
+ use_range=use_range, loop_prep=loop_prep, **kwargs)
178
+
179
+ from pyopencl.tools import get_arg_list_arg_types
180
+
181
+ kernel = getattr(prg, name)
182
+ kernel.set_scalar_arg_dtypes(get_arg_list_arg_types(parsed_args))
183
+
184
+ return kernel, parsed_args
185
+
186
+
187
+ def get_elwise_kernel(
188
+ context: cl.Context,
189
+ arguments: str | list[DtypedArgument],
190
+ operation: str, *,
191
+ name: str = "elwise_kernel",
192
+ options: Any = None, **kwargs: Any) -> cl.Kernel:
193
+ """Return a L{pyopencl.Kernel} that performs the same scalar operation
194
+ on one or several vectors.
195
+ """
196
+ func, arguments = get_elwise_kernel_and_types(
197
+ context, arguments, operation,
198
+ name=name, options=options, **kwargs)
199
+
200
+ return func
201
+
202
+ # }}}
203
+
204
+
205
+ # {{{ ElementwiseKernel driver
206
+
207
+ class ElementwiseKernel:
208
+ """
209
+ A kernel that takes a number of scalar or vector *arguments* and performs
210
+ an *operation* specified as a snippet of C on these arguments.
211
+
212
+ :arg arguments: a string formatted as a C argument list.
213
+ :arg operation: a snippet of C that carries out the desired 'map'
214
+ operation. The current index is available as the variable *i*.
215
+ *operation* may contain the statement ``PYOPENCL_ELWISE_CONTINUE``,
216
+ which will terminate processing for the current element.
217
+ :arg name: the function name as which the kernel is compiled
218
+ :arg options: passed unmodified to :meth:`pyopencl.Program.build`.
219
+ :arg preamble: a piece of C source code that gets inserted outside of the
220
+ function context in the elementwise operation's kernel source code.
221
+
222
+ .. warning :: Using a ``return`` statement in *operation* will lead to
223
+ incorrect results, as some elements may never get processed. Use
224
+ ``PYOPENCL_ELWISE_CONTINUE`` instead.
225
+
226
+ .. versionchanged:: 2013.1
227
+
228
+ Added ``PYOPENCL_ELWISE_CONTINUE``.
229
+
230
+ .. automethod:: __call__
231
+ """
232
+
233
+ def __init__(
234
+ self,
235
+ context: cl.Context,
236
+ arguments: str | Sequence[DtypedArgument],
237
+ operation: str,
238
+ name: str = "elwise_kernel",
239
+ options: Any = None, **kwargs: Any) -> None:
240
+ self.context = context
241
+ self.arguments = arguments
242
+ self.operation = operation
243
+ self.name = name
244
+ self.options = options
245
+ self.kwargs = kwargs
246
+
247
+ @memoize_method
248
+ def get_kernel(self, use_range: bool):
249
+ knl, arg_descrs = get_elwise_kernel_and_types(
250
+ self.context, self.arguments, self.operation,
251
+ name=self.name, options=self.options,
252
+ use_range=use_range, **self.kwargs)
253
+
254
+ for arg in arg_descrs:
255
+ if isinstance(arg, VectorArg) and not arg.with_offset:
256
+ from warnings import warn
257
+ warn(
258
+ f"ElementwiseKernel '{self.name}' used with VectorArgs "
259
+ "that do not have offset support enabled. This usage is "
260
+ "deprecated. Just pass with_offset=True to VectorArg, "
261
+ "everything should sort itself out automatically.",
262
+ DeprecationWarning, stacklevel=2)
263
+
264
+ if not any(isinstance(arg, VectorArg) for arg in arg_descrs):
265
+ raise RuntimeError(
266
+ "ElementwiseKernel can only be used with functions that have "
267
+ "at least one vector argument")
268
+
269
+ return knl, arg_descrs
270
+
271
+ def __call__(self, *args, **kwargs) -> cl.Event:
272
+ """
273
+ Invoke the generated scalar kernel.
274
+
275
+ The arguments may either be scalars or :class:`pyopencl.array.Array`
276
+ instances.
277
+
278
+ |std-enqueue-blurb|
279
+ """
280
+ range_ = kwargs.pop("range", None)
281
+ slice_ = kwargs.pop("slice", None)
282
+ capture_as = kwargs.pop("capture_as", None)
283
+ queue = kwargs.pop("queue", None)
284
+ wait_for = kwargs.pop("wait_for", None)
285
+
286
+ if kwargs:
287
+ raise TypeError(f"unknown keyword arguments: '{', '.join(kwargs)}'")
288
+
289
+ use_range = range_ is not None or slice_ is not None
290
+ kernel, arg_descrs = self.get_kernel(use_range)
291
+
292
+ if wait_for is None:
293
+ wait_for = []
294
+ else:
295
+ # We'll be modifying it below.
296
+ wait_for = list(wait_for)
297
+
298
+ # {{{ assemble arg array
299
+
300
+ repr_vec = None
301
+ invocation_args = []
302
+
303
+ # non-strict because length arg gets appended below
304
+ for arg, arg_descr in zip(args, arg_descrs, strict=False):
305
+ if isinstance(arg_descr, VectorArg):
306
+ if repr_vec is None:
307
+ repr_vec = arg
308
+
309
+ invocation_args.append(arg)
310
+ else:
311
+ invocation_args.append(arg)
312
+
313
+ assert repr_vec is not None
314
+
315
+ # }}}
316
+
317
+ if queue is None:
318
+ queue = repr_vec.queue
319
+
320
+ if slice_ is not None:
321
+ if range_ is not None:
322
+ raise TypeError(
323
+ "may not specify both range and slice keyword arguments")
324
+
325
+ range_ = slice(*slice_.indices(repr_vec.size))
326
+
327
+ max_wg_size = kernel.get_work_group_info(
328
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
329
+ queue.device)
330
+
331
+ if range_ is not None:
332
+ start = range_.start
333
+ if start is None:
334
+ start = 0
335
+ invocation_args.append(start)
336
+ invocation_args.append(range_.stop)
337
+ if range_.step is None:
338
+ step = 1
339
+ else:
340
+ step = range_.step
341
+
342
+ invocation_args.append(step)
343
+
344
+ from pyopencl.array import _splay
345
+ gs, ls = _splay(queue.device,
346
+ abs(range_.stop - start)//step,
347
+ max_wg_size)
348
+ else:
349
+ invocation_args.append(repr_vec.size)
350
+ gs, ls = repr_vec._get_sizes(queue, max_wg_size)
351
+
352
+ if capture_as is not None:
353
+ kernel.set_args(*invocation_args)
354
+ kernel.capture_call(
355
+ capture_as, queue,
356
+ gs, ls, *invocation_args, wait_for=wait_for)
357
+
358
+ return kernel(queue, gs, ls, *invocation_args, wait_for=wait_for)
359
+
360
+ # }}}
361
+
362
+
363
+ # {{{ template
364
+
365
+ class ElementwiseTemplate(KernelTemplateBase):
366
+ def __init__(
367
+ self,
368
+ arguments: str | list[DtypedArgument],
369
+ operation: str,
370
+ name: str = "elwise",
371
+ preamble: str = "",
372
+ template_processor: str | None = None) -> None:
373
+ super().__init__(template_processor=template_processor)
374
+ self.arguments = arguments
375
+ self.operation = operation
376
+ self.name = name
377
+ self.preamble = preamble
378
+
379
+ def build_inner(self, context, type_aliases=(), var_values=(),
380
+ more_preamble="", more_arguments=(), declare_types=(),
381
+ options=None):
382
+ renderer = self.get_renderer(
383
+ type_aliases, var_values, context, options)
384
+
385
+ arg_list = renderer.render_argument_list(
386
+ self.arguments, more_arguments, with_offset=True)
387
+ type_decl_preamble = renderer.get_type_decl_preamble(
388
+ context.devices[0], declare_types, arg_list)
389
+
390
+ return ElementwiseKernel(context,
391
+ arg_list, renderer(self.operation),
392
+ name=renderer(self.name), options=options,
393
+ preamble=(
394
+ type_decl_preamble
395
+ + "\n"
396
+ + renderer(self.preamble + "\n" + more_preamble)),
397
+ auto_preamble=False)
398
+
399
+ # }}}
400
+
401
+
402
+ # {{{ argument kinds
403
+
404
+ class ArgumentKind(enum.Enum):
405
+ ARRAY = enum.auto()
406
+ DEV_SCALAR = enum.auto()
407
+ SCALAR = enum.auto()
408
+
409
+
410
+ def get_argument_kind(v: Any) -> ArgumentKind:
411
+ from pyopencl.array import Array
412
+ if isinstance(v, Array):
413
+ if v.shape == ():
414
+ return ArgumentKind.DEV_SCALAR
415
+ else:
416
+ return ArgumentKind.ARRAY
417
+ else:
418
+ return ArgumentKind.SCALAR
419
+
420
+
421
+ def get_decl_and_access_for_kind(name: str, kind: ArgumentKind) -> tuple[str, str]:
422
+ if kind == ArgumentKind.ARRAY:
423
+ return f"*{name}", f"{name}[i]"
424
+ elif kind == ArgumentKind.SCALAR:
425
+ return f"{name}", name
426
+ elif kind == ArgumentKind.DEV_SCALAR:
427
+ return f"*{name}", f"{name}[0]"
428
+ else:
429
+ raise AssertionError()
430
+
431
+ # }}}
432
+
433
+
434
+ # {{{ kernels supporting array functionality
435
+
436
+ @context_dependent_memoize
437
+ def get_take_kernel(context, dtype, idx_dtype, vec_count=1):
438
+ idx_tp = dtype_to_ctype(idx_dtype)
439
+
440
+ args = ([VectorArg(dtype, f"dest{i}", with_offset=True)
441
+ for i in range(vec_count)]
442
+ + [VectorArg(dtype, f"src{i}", with_offset=True)
443
+ for i in range(vec_count)]
444
+ + [VectorArg(idx_dtype, "idx", with_offset=True)])
445
+ body = (
446
+ f"{idx_tp} src_idx = idx[i];\n"
447
+ + "\n".join(
448
+ f"dest{i}[i] = src{i}[src_idx];"
449
+ for i in range(vec_count))
450
+ )
451
+
452
+ return get_elwise_kernel(context, args, body,
453
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
454
+ name="take")
455
+
456
+
457
+ @context_dependent_memoize
458
+ def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1):
459
+ idx_tp = dtype_to_ctype(idx_dtype)
460
+
461
+ args = [
462
+ VectorArg(dtype, f"dest{i}")
463
+ for i in range(vec_count)
464
+ ] + [
465
+ VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
466
+ VectorArg(idx_dtype, "gmem_src_idx", with_offset=True),
467
+ ] + [
468
+ VectorArg(dtype, f"src{i}", with_offset=True)
469
+ for i in range(vec_count)
470
+ ] + [
471
+ ScalarArg(idx_dtype, f"offset{i}")
472
+ for i in range(vec_count) if with_offsets
473
+ ]
474
+
475
+ if with_offsets:
476
+ def get_copy_insn(i):
477
+ return f"dest{i}[dest_idx] = src{i}[src_idx + offset{i}];"
478
+ else:
479
+ def get_copy_insn(i):
480
+ return f"dest{i}[dest_idx] = src{i}[src_idx];"
481
+
482
+ body = ((f"{idx_tp} src_idx = gmem_src_idx[i];\n"
483
+ f"{idx_tp} dest_idx = gmem_dest_idx[i];\n")
484
+ + "\n".join(get_copy_insn(i) for i in range(vec_count)))
485
+
486
+ return get_elwise_kernel(context, args, body,
487
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
488
+ name="take_put")
489
+
490
+
491
+ @context_dependent_memoize
492
+ def get_put_kernel(context, dtype, idx_dtype, vec_count=1):
493
+ idx_tp = dtype_to_ctype(idx_dtype)
494
+
495
+ args = [
496
+ VectorArg(dtype, f"dest{i}", with_offset=True)
497
+ for i in range(vec_count)
498
+ ] + [
499
+ VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
500
+ ] + [
501
+ VectorArg(dtype, f"src{i}", with_offset=True)
502
+ for i in range(vec_count)
503
+ ] + [
504
+ VectorArg(np.uint8, "use_fill", with_offset=True)
505
+ ] + [
506
+ VectorArg(np.int64, "val_ary_lengths", with_offset=True)
507
+ ]
508
+
509
+ body = (
510
+ f"{idx_tp} dest_idx = gmem_dest_idx[i];\n"
511
+ + "\n".join(
512
+ f"dest{i}[dest_idx] = (use_fill[{i}] ? src{i}[0] : "
513
+ f"src{i}[i % val_ary_lengths[{i}]]);"
514
+ for i in range(vec_count)
515
+ )
516
+ )
517
+
518
+ return get_elwise_kernel(context, args, body,
519
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
520
+ name="put")
521
+
522
+
523
+ @context_dependent_memoize
524
+ def get_copy_kernel(context, dtype_dest, dtype_src):
525
+ src = "src[i]"
526
+ if dtype_dest.kind == "c" != dtype_src.kind:
527
+ name = complex_dtype_to_name(dtype_dest)
528
+ src = f"{name}_fromreal({src})"
529
+
530
+ if dtype_dest.kind == "c" and dtype_src != dtype_dest:
531
+ name = complex_dtype_to_name(dtype_dest)
532
+ src = f"{name}_cast({src})"
533
+
534
+ if dtype_dest != dtype_src and (
535
+ dtype_dest.kind == "V" or dtype_src.kind == "V"):
536
+ raise TypeError("copying between non-identical struct types")
537
+
538
+ return get_elwise_kernel(context,
539
+ "{tp_dest} *dest, {tp_src} *src".format(
540
+ tp_dest=dtype_to_ctype(dtype_dest),
541
+ tp_src=dtype_to_ctype(dtype_src),
542
+ ),
543
+ f"dest[i] = {src}",
544
+ preamble=dtype_to_c_struct(context.devices[0], dtype_dest),
545
+ name="copy")
546
+
547
+
548
+ def complex_dtype_to_name(dtype) -> str:
549
+ if dtype == np.complex128:
550
+ return "cdouble"
551
+ elif dtype == np.complex64:
552
+ return "cfloat"
553
+ else:
554
+ raise RuntimeError(f"invalid complex type: {dtype}")
555
+
556
+
557
+ def real_dtype(dtype):
558
+ return dtype.type(0).real.dtype
559
+
560
+
561
+ @context_dependent_memoize
562
+ def get_axpbyz_kernel(context, dtype_x, dtype_y, dtype_z,
563
+ x_is_scalar=False, y_is_scalar=False):
564
+ result_t = dtype_to_ctype(dtype_z)
565
+
566
+ x_is_complex = dtype_x.kind == "c"
567
+ y_is_complex = dtype_y.kind == "c"
568
+
569
+ x = "x[0]" if x_is_scalar else "x[i]"
570
+ y = "y[0]" if y_is_scalar else "y[i]"
571
+
572
+ if dtype_z.kind == "c":
573
+ # a and b will always be complex here.
574
+ z_ct = complex_dtype_to_name(dtype_z)
575
+
576
+ if x_is_complex:
577
+ ax = f"{z_ct}_mul(a, {z_ct}_cast({x}))"
578
+ else:
579
+ ax = f"{z_ct}_mulr(a, {x})"
580
+
581
+ if y_is_complex:
582
+ by = f"{z_ct}_mul(b, {z_ct}_cast({y}))"
583
+ else:
584
+ by = f"{z_ct}_mulr(b, {y})"
585
+
586
+ result = f"{z_ct}_add({ax}, {by})"
587
+ else:
588
+ # real-only
589
+
590
+ ax = f"a*(({result_t}) {x})"
591
+ by = f"b*(({result_t}) {y})"
592
+
593
+ result = f"{ax} + {by}"
594
+
595
+ return get_elwise_kernel(context,
596
+ "{tp_z} *z, {tp_z} a, {tp_x} *x, {tp_z} b, {tp_y} *y".format(
597
+ tp_x=dtype_to_ctype(dtype_x),
598
+ tp_y=dtype_to_ctype(dtype_y),
599
+ tp_z=dtype_to_ctype(dtype_z),
600
+ ),
601
+ f"z[i] = {result}",
602
+ name="axpbyz")
603
+
604
+
605
+ @context_dependent_memoize
606
+ def get_axpbz_kernel(context, dtype_a, dtype_x, dtype_b, dtype_z):
607
+ a_is_complex = dtype_a.kind == "c"
608
+ x_is_complex = dtype_x.kind == "c"
609
+ b_is_complex = dtype_b.kind == "c"
610
+
611
+ z_is_complex = dtype_z.kind == "c"
612
+
613
+ ax = "a*x[i]"
614
+ if x_is_complex:
615
+ a = "a"
616
+ x = "x[i]"
617
+
618
+ if dtype_x != dtype_z:
619
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
620
+
621
+ if a_is_complex:
622
+ if dtype_a != dtype_z:
623
+ a = "{}_cast({})".format(complex_dtype_to_name(dtype_z), a)
624
+
625
+ ax = "{}_mul({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
626
+ else:
627
+ ax = "{}_rmul({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
628
+ elif a_is_complex:
629
+ a = "a"
630
+ x = "x[i]"
631
+
632
+ if dtype_a != dtype_z:
633
+ a = "{}_cast({})".format(complex_dtype_to_name(dtype_z), a)
634
+ ax = "{}_mulr({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
635
+
636
+ b = "b"
637
+ if z_is_complex and not b_is_complex:
638
+ b = "{}_fromreal({})".format(complex_dtype_to_name(dtype_z), b)
639
+
640
+ if z_is_complex and not (a_is_complex or x_is_complex):
641
+ ax = "{}_fromreal({})".format(complex_dtype_to_name(dtype_z), ax)
642
+
643
+ if z_is_complex:
644
+ ax = "{}_cast({})".format(complex_dtype_to_name(dtype_z), ax)
645
+ b = "{}_cast({})".format(complex_dtype_to_name(dtype_z), b)
646
+
647
+ if a_is_complex or x_is_complex or b_is_complex:
648
+ expr = "{root}_add({ax}, {b})".format(
649
+ ax=ax,
650
+ b=b,
651
+ root=complex_dtype_to_name(dtype_z))
652
+ else:
653
+ expr = f"{ax} + {b}"
654
+
655
+ return get_elwise_kernel(context,
656
+ "{tp_z} *z, {tp_a} a, {tp_x} *x,{tp_b} b".format(
657
+ tp_a=dtype_to_ctype(dtype_a),
658
+ tp_x=dtype_to_ctype(dtype_x),
659
+ tp_b=dtype_to_ctype(dtype_b),
660
+ tp_z=dtype_to_ctype(dtype_z),
661
+ ),
662
+ f"z[i] = {expr}",
663
+ name="axpb")
664
+
665
+
666
+ @context_dependent_memoize
667
+ def get_multiply_kernel(context, dtype_x, dtype_y, dtype_z,
668
+ x_is_scalar=False, y_is_scalar=False):
669
+ x_is_complex = dtype_x.kind == "c"
670
+ y_is_complex = dtype_y.kind == "c"
671
+
672
+ x = "x[0]" if x_is_scalar else "x[i]"
673
+ y = "y[0]" if y_is_scalar else "y[i]"
674
+
675
+ if x_is_complex and dtype_x != dtype_z:
676
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
677
+ if y_is_complex and dtype_y != dtype_z:
678
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
679
+
680
+ if x_is_complex and y_is_complex:
681
+ xy = "{}_mul({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
682
+ elif x_is_complex and not y_is_complex:
683
+ xy = "{}_mulr({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
684
+ elif not x_is_complex and y_is_complex:
685
+ xy = "{}_rmul({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
686
+ else:
687
+ xy = f"{x} * {y}"
688
+
689
+ return get_elwise_kernel(context,
690
+ "{tp_z} *z, {tp_x} *x, {tp_y} *y".format(
691
+ tp_x=dtype_to_ctype(dtype_x),
692
+ tp_y=dtype_to_ctype(dtype_y),
693
+ tp_z=dtype_to_ctype(dtype_z),
694
+ ),
695
+ f"z[i] = {xy}",
696
+ name="multiply")
697
+
698
+
699
+ @context_dependent_memoize
700
+ def get_divide_kernel(context, dtype_x, dtype_y, dtype_z,
701
+ x_is_scalar=False, y_is_scalar=False):
702
+ x_is_complex = dtype_x.kind == "c"
703
+ y_is_complex = dtype_y.kind == "c"
704
+ z_is_complex = dtype_z.kind == "c"
705
+
706
+ x = "x[0]" if x_is_scalar else "x[i]"
707
+ y = "y[0]" if y_is_scalar else "y[i]"
708
+
709
+ if z_is_complex and dtype_x != dtype_y:
710
+ if x_is_complex and dtype_x != dtype_z:
711
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
712
+ if y_is_complex and dtype_y != dtype_z:
713
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
714
+ else:
715
+ if dtype_x != dtype_z:
716
+ x = f"({dtype_to_ctype(dtype_z)}) ({x})"
717
+ if dtype_y != dtype_z:
718
+ y = f"({dtype_to_ctype(dtype_z)}) ({y})"
719
+
720
+ if x_is_complex and y_is_complex:
721
+ xoy = "{}_divide({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
722
+ elif not x_is_complex and y_is_complex:
723
+ xoy = "{}_rdivide({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
724
+ elif x_is_complex and not y_is_complex:
725
+ xoy = "{}_divider({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
726
+ else:
727
+ xoy = f"{x} / {y}"
728
+
729
+ if z_is_complex:
730
+ xoy = "{}_cast({})".format(complex_dtype_to_name(dtype_z), xoy)
731
+
732
+ return get_elwise_kernel(context,
733
+ "{tp_z} *z, {tp_x} *x, {tp_y} *y".format(
734
+ tp_x=dtype_to_ctype(dtype_x),
735
+ tp_y=dtype_to_ctype(dtype_y),
736
+ tp_z=dtype_to_ctype(dtype_z),
737
+ ),
738
+ f"z[i] = {xoy}",
739
+ name="divide")
740
+
741
+
742
+ @context_dependent_memoize
743
+ def get_rdivide_elwise_kernel(context, dtype_x, dtype_y, dtype_z):
744
+ # implements y / x!
745
+ x_is_complex = dtype_x.kind == "c"
746
+ y_is_complex = dtype_y.kind == "c"
747
+ z_is_complex = dtype_z.kind == "c"
748
+
749
+ x = "x[i]"
750
+ y = "y"
751
+
752
+ if z_is_complex and dtype_x != dtype_y:
753
+ if x_is_complex and dtype_x != dtype_z:
754
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
755
+ if y_is_complex and dtype_y != dtype_z:
756
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
757
+
758
+ if x_is_complex and y_is_complex:
759
+ yox = "{}_divide({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
760
+ elif not y_is_complex and x_is_complex:
761
+ yox = "{}_rdivide({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
762
+ elif y_is_complex and not x_is_complex:
763
+ yox = "{}_divider({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
764
+ else:
765
+ yox = f"{y} / {x}"
766
+
767
+ return get_elwise_kernel(context,
768
+ "{tp_z} *z, {tp_x} *x, {tp_y} y".format(
769
+ tp_x=dtype_to_ctype(dtype_x),
770
+ tp_y=dtype_to_ctype(dtype_y),
771
+ tp_z=dtype_to_ctype(dtype_z),
772
+ ),
773
+ f"z[i] = {yox}",
774
+ name="divide_r")
775
+
776
+
777
+ @context_dependent_memoize
778
+ def get_fill_kernel(context, dtype):
779
+ return get_elwise_kernel(context,
780
+ "{tp} *z, {tp} a".format(tp=dtype_to_ctype(dtype)),
781
+ "z[i] = a",
782
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
783
+ name="fill")
784
+
785
+
786
+ @context_dependent_memoize
787
+ def get_reverse_kernel(context, dtype):
788
+ return get_elwise_kernel(context,
789
+ "{tp} *z, {tp} *y".format(tp=dtype_to_ctype(dtype)),
790
+ "z[i] = y[n-1-i]",
791
+ name="reverse")
792
+
793
+
794
+ @context_dependent_memoize
795
+ def get_arange_kernel(context, dtype):
796
+ if dtype.kind == "c":
797
+ expr = (
798
+ "{root}_add(start, {root}_rmul(i, step))"
799
+ .format(root=complex_dtype_to_name(dtype)))
800
+ else:
801
+ expr = f"start + (({dtype_to_ctype(dtype)}) i) * step"
802
+
803
+ return get_elwise_kernel(context, [
804
+ VectorArg(dtype, "z", with_offset=True),
805
+ ScalarArg(dtype, "start"),
806
+ ScalarArg(dtype, "step"),
807
+ ],
808
+ f"z[i] = {expr}",
809
+ name="arange")
810
+
811
+
812
+ @context_dependent_memoize
813
+ def get_pow_kernel(context, dtype_x, dtype_y, dtype_z,
814
+ is_base_array, is_exp_array):
815
+ if is_base_array:
816
+ x = "x[i]"
817
+ x_ctype = "{tp_x} *x"
818
+ else:
819
+ x = "x"
820
+ x_ctype = "{tp_x} x"
821
+
822
+ if is_exp_array:
823
+ y = "y[i]"
824
+ y_ctype = "{tp_y} *y"
825
+ else:
826
+ y = "y"
827
+ y_ctype = "{tp_y} y"
828
+
829
+ x_is_complex = dtype_x.kind == "c"
830
+ y_is_complex = dtype_y.kind == "c"
831
+ z_is_complex = dtype_z.kind == "c"
832
+
833
+ if z_is_complex and dtype_x != dtype_y:
834
+ if x_is_complex and dtype_x != dtype_z:
835
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
836
+ if y_is_complex and dtype_y != dtype_z:
837
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
838
+ elif dtype_x != dtype_y:
839
+ if dtype_x != dtype_z:
840
+ x = "({}) ({})".format(dtype_to_ctype(dtype_z), x)
841
+ if dtype_y != dtype_z:
842
+ y = "({}) ({})".format(dtype_to_ctype(dtype_z), y)
843
+
844
+ if x_is_complex and y_is_complex:
845
+ result = "{}_pow({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
846
+ elif x_is_complex and not y_is_complex:
847
+ result = "{}_powr({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
848
+ elif not x_is_complex and y_is_complex:
849
+ result = "{}_rpow({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
850
+ else:
851
+ result = f"pow({x}, {y})"
852
+
853
+ return get_elwise_kernel(context,
854
+ ("{tp_z} *z, " + x_ctype + ", " + y_ctype).format(
855
+ tp_x=dtype_to_ctype(dtype_x),
856
+ tp_y=dtype_to_ctype(dtype_y),
857
+ tp_z=dtype_to_ctype(dtype_z),
858
+ ),
859
+ f"z[i] = {result}",
860
+ name="pow_method")
861
+
862
+
863
+ @context_dependent_memoize
864
+ def get_unop_kernel(context, operator, res_dtype, in_dtype):
865
+ return get_elwise_kernel(context, [
866
+ VectorArg(res_dtype, "z", with_offset=True),
867
+ VectorArg(in_dtype, "y", with_offset=True),
868
+ ],
869
+ f"z[i] = {operator} y[i]",
870
+ name="unary_op_kernel")
871
+
872
+
873
+ @context_dependent_memoize
874
+ def get_array_scalar_binop_kernel(context, operator, dtype_res, dtype_a, dtype_b):
875
+ return get_elwise_kernel(context, [
876
+ VectorArg(dtype_res, "out", with_offset=True),
877
+ VectorArg(dtype_a, "a", with_offset=True),
878
+ ScalarArg(dtype_b, "b"),
879
+ ],
880
+ f"out[i] = a[i] {operator} b",
881
+ name="scalar_binop_kernel")
882
+
883
+
884
+ @context_dependent_memoize
885
+ def get_array_binop_kernel(context, operator, dtype_res, dtype_a, dtype_b,
886
+ a_is_scalar=False, b_is_scalar=False):
887
+ a = "a[0]" if a_is_scalar else "a[i]"
888
+ b = "b[0]" if b_is_scalar else "b[i]"
889
+ return get_elwise_kernel(context, [
890
+ VectorArg(dtype_res, "out", with_offset=True),
891
+ VectorArg(dtype_a, "a", with_offset=True),
892
+ VectorArg(dtype_b, "b", with_offset=True),
893
+ ],
894
+ f"out[i] = {a} {operator} {b}",
895
+ name="binop_kernel")
896
+
897
+
898
+ @context_dependent_memoize
899
+ def get_array_scalar_comparison_kernel(context, operator, dtype_a):
900
+ return get_elwise_kernel(context, [
901
+ VectorArg(np.int8, "out", with_offset=True),
902
+ VectorArg(dtype_a, "a", with_offset=True),
903
+ ScalarArg(dtype_a, "b"),
904
+ ],
905
+ f"out[i] = a[i] {operator} b",
906
+ name="scalar_comparison_kernel")
907
+
908
+
909
+ @context_dependent_memoize
910
+ def get_array_comparison_kernel(context, operator, dtype_a, dtype_b):
911
+ return get_elwise_kernel(context, [
912
+ VectorArg(np.int8, "out", with_offset=True),
913
+ VectorArg(dtype_a, "a", with_offset=True),
914
+ VectorArg(dtype_b, "b", with_offset=True),
915
+ ],
916
+ f"out[i] = a[i] {operator} b[i]",
917
+ name="comparison_kernel")
918
+
919
+
920
+ @context_dependent_memoize
921
+ def get_unary_func_kernel(context, func_name, in_dtype, out_dtype=None):
922
+ if out_dtype is None:
923
+ out_dtype = in_dtype
924
+
925
+ return get_elwise_kernel(context, [
926
+ VectorArg(out_dtype, "z", with_offset=True),
927
+ VectorArg(in_dtype, "y", with_offset=True),
928
+ ],
929
+ f"z[i] = {func_name}(y[i])",
930
+ name=f"{func_name}_kernel")
931
+
932
+
933
+ @context_dependent_memoize
934
+ def get_binary_func_kernel(context, func_name, x_dtype, y_dtype, out_dtype,
935
+ preamble="", name=None):
936
+ if name is None:
937
+ name = func_name
938
+
939
+ return get_elwise_kernel(context, [
940
+ VectorArg(out_dtype, "z", with_offset=True),
941
+ VectorArg(x_dtype, "x", with_offset=True),
942
+ VectorArg(y_dtype, "y", with_offset=True),
943
+ ],
944
+ f"z[i] = {func_name}(x[i], y[i])",
945
+ name=f"{name}_kernel",
946
+ preamble=preamble)
947
+
948
+
949
+ @context_dependent_memoize
950
+ def get_float_binary_func_kernel(context, func_name, x_dtype, y_dtype,
951
+ out_dtype, preamble="", name=None):
952
+ if name is None:
953
+ name = func_name
954
+
955
+ if (np.array(0, x_dtype) * np.array(0, y_dtype)).itemsize > 4:
956
+ arg_type = "double"
957
+ preamble = """
958
+ #if __OPENCL_C_VERSION__ < 120
959
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
960
+ #endif
961
+ #define PYOPENCL_DEFINE_CDOUBLE
962
+ """ + preamble
963
+ else:
964
+ arg_type = "float"
965
+
966
+ return get_elwise_kernel(context, [
967
+ VectorArg(out_dtype, "z", with_offset=True),
968
+ VectorArg(x_dtype, "x", with_offset=True),
969
+ VectorArg(y_dtype, "y", with_offset=True),
970
+ ],
971
+ f"z[i] = {func_name}(({arg_type})x[i], ({arg_type})y[i])",
972
+ name=f"{name}_kernel",
973
+ preamble=preamble)
974
+
975
+
976
+ @context_dependent_memoize
977
+ def get_fmod_kernel(context, out_dtype=np.float32, arg_dtype=np.float32,
978
+ mod_dtype=np.float32):
979
+ return get_float_binary_func_kernel(context, "fmod", arg_dtype,
980
+ mod_dtype, out_dtype)
981
+
982
+
983
+ @context_dependent_memoize
984
+ def get_modf_kernel(context, int_dtype=np.float32,
985
+ frac_dtype=np.float32, x_dtype=np.float32):
986
+ return get_elwise_kernel(context, [
987
+ VectorArg(int_dtype, "intpart", with_offset=True),
988
+ VectorArg(frac_dtype, "fracpart", with_offset=True),
989
+ VectorArg(x_dtype, "x", with_offset=True),
990
+ ],
991
+ """
992
+ fracpart[i] = modf(x[i], &intpart[i])
993
+ """,
994
+ name="modf_kernel")
995
+
996
+
997
+ @context_dependent_memoize
998
+ def get_frexp_kernel(context, sign_dtype=np.float32, exp_dtype=np.float32,
999
+ x_dtype=np.float32):
1000
+ return get_elwise_kernel(context, [
1001
+ VectorArg(sign_dtype, "significand", with_offset=True),
1002
+ VectorArg(exp_dtype, "exponent", with_offset=True),
1003
+ VectorArg(x_dtype, "x", with_offset=True),
1004
+ ],
1005
+ """
1006
+ int expt = 0;
1007
+ significand[i] = frexp(x[i], &expt);
1008
+ exponent[i] = expt;
1009
+ """,
1010
+ name="frexp_kernel")
1011
+
1012
+
1013
+ @context_dependent_memoize
1014
+ def get_ldexp_kernel(context, out_dtype=np.float32, sig_dtype=np.float32,
1015
+ expt_dtype=np.float32):
1016
+ return get_binary_func_kernel(
1017
+ context, "_PYOCL_LDEXP", sig_dtype, expt_dtype, out_dtype,
1018
+ preamble="#define _PYOCL_LDEXP(x, y) ldexp(x, (int)(y))",
1019
+ name="ldexp_kernel")
1020
+
1021
+
1022
+ @context_dependent_memoize
1023
+ def get_minmaximum_kernel(context, minmax, dtype_z, dtype_x, dtype_y,
1024
+ kind_x: ArgumentKind, kind_y: ArgumentKind):
1025
+ if dtype_z.kind == "f":
1026
+ reduce_func = f"f{minmax}_nanprop"
1027
+ elif dtype_z.kind in "iu":
1028
+ reduce_func = minmax
1029
+ else:
1030
+ raise TypeError("unsupported dtype specified")
1031
+
1032
+ tp_x = dtype_to_ctype(dtype_x)
1033
+ tp_y = dtype_to_ctype(dtype_y)
1034
+ tp_z = dtype_to_ctype(dtype_z)
1035
+ decl_x, acc_x = get_decl_and_access_for_kind("x", kind_x)
1036
+ decl_y, acc_y = get_decl_and_access_for_kind("y", kind_y)
1037
+
1038
+ return get_elwise_kernel(context,
1039
+ f"{tp_z} *z, {tp_x} {decl_x}, {tp_y} {decl_y}",
1040
+ f"z[i] = {reduce_func}({acc_x}, {acc_y})",
1041
+ name=f"{minmax}imum",
1042
+ preamble="""
1043
+ #define fmin_nanprop(a, b) (isnan(a) || isnan(b)) ? a+b : fmin(a, b)
1044
+ #define fmax_nanprop(a, b) (isnan(a) || isnan(b)) ? a+b : fmax(a, b)
1045
+ """)
1046
+
1047
+
1048
+ @context_dependent_memoize
1049
+ def get_bessel_kernel(context, which_func, out_dtype=np.float64,
1050
+ order_dtype=np.int32, x_dtype=np.float64):
1051
+ if x_dtype.kind != "c":
1052
+ return get_elwise_kernel(context, [
1053
+ VectorArg(out_dtype, "z", with_offset=True),
1054
+ ScalarArg(order_dtype, "ord_n"),
1055
+ VectorArg(x_dtype, "x", with_offset=True),
1056
+ ],
1057
+ f"z[i] = bessel_{which_func}n(ord_n, x[i])",
1058
+ name=f"bessel_{which_func}n_kernel",
1059
+ preamble=f"""
1060
+ #if __OPENCL_C_VERSION__ < 120
1061
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
1062
+ #endif
1063
+ #define PYOPENCL_DEFINE_CDOUBLE
1064
+ #include <pyopencl-bessel-{which_func}.cl>
1065
+ """)
1066
+ else:
1067
+ if which_func != "j":
1068
+ raise NotImplementedError("complex arguments for Bessel Y")
1069
+
1070
+ if x_dtype != np.complex128:
1071
+ raise NotImplementedError("non-complex double dtype")
1072
+ if x_dtype != out_dtype:
1073
+ raise NotImplementedError("different input/output types")
1074
+
1075
+ return get_elwise_kernel(context, [
1076
+ VectorArg(out_dtype, "z", with_offset=True),
1077
+ ScalarArg(order_dtype, "ord_n"),
1078
+ VectorArg(x_dtype, "x", with_offset=True),
1079
+ ],
1080
+ """
1081
+ cdouble_t jv_loc;
1082
+ cdouble_t jvp1_loc;
1083
+ bessel_j_complex(ord_n, x[i], &jv_loc, &jvp1_loc);
1084
+ z[i] = jv_loc;
1085
+ """,
1086
+ name="bessel_j_complex_kernel",
1087
+ preamble="""
1088
+ #if __OPENCL_C_VERSION__ < 120
1089
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
1090
+ #endif
1091
+ #define PYOPENCL_DEFINE_CDOUBLE
1092
+ #include <pyopencl-complex.h>
1093
+ #include <pyopencl-bessel-j-complex.cl>
1094
+ """)
1095
+
1096
+
1097
+ @context_dependent_memoize
1098
+ def get_hankel_01_kernel(context, out_dtype, x_dtype):
1099
+ if x_dtype != np.complex128:
1100
+ raise NotImplementedError("non-complex double dtype")
1101
+ if x_dtype != out_dtype:
1102
+ raise NotImplementedError("different input/output types")
1103
+
1104
+ return get_elwise_kernel(context, [
1105
+ VectorArg(out_dtype, "h0", with_offset=True),
1106
+ VectorArg(out_dtype, "h1", with_offset=True),
1107
+ VectorArg(x_dtype, "x", with_offset=True),
1108
+ ],
1109
+ """
1110
+ cdouble_t h0_loc;
1111
+ cdouble_t h1_loc;
1112
+ hankel_01_complex(x[i], &h0_loc, &h1_loc, 1);
1113
+ h0[i] = h0_loc;
1114
+ h1[i] = h1_loc;
1115
+ """,
1116
+ name="hankel_complex_kernel",
1117
+ preamble="""
1118
+ #if __OPENCL_C_VERSION__ < 120
1119
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
1120
+ #endif
1121
+ #define PYOPENCL_DEFINE_CDOUBLE
1122
+ #include <pyopencl-complex.h>
1123
+ #include <pyopencl-hankel-complex.cl>
1124
+ """)
1125
+
1126
+
1127
+ @context_dependent_memoize
1128
+ def get_diff_kernel(context, dtype):
1129
+ return get_elwise_kernel(context, [
1130
+ VectorArg(dtype, "result", with_offset=True),
1131
+ VectorArg(dtype, "array", with_offset=True),
1132
+ ],
1133
+ "result[i] = array[i+1] - array[i]",
1134
+ name="diff")
1135
+
1136
+
1137
+ @context_dependent_memoize
1138
+ def get_if_positive_kernel(
1139
+ context, crit_dtype, then_else_dtype,
1140
+ is_then_array, is_else_array,
1141
+ is_then_scalar, is_else_scalar):
1142
+ if is_then_array:
1143
+ then_ = "then_[0]" if is_then_scalar else "then_[i]"
1144
+ then_arg = VectorArg(then_else_dtype, "then_", with_offset=True)
1145
+ else:
1146
+ assert is_then_scalar
1147
+ then_ = "then_"
1148
+ then_arg = ScalarArg(then_else_dtype, "then_")
1149
+
1150
+ if is_else_array:
1151
+ else_ = "else_[0]" if is_else_scalar else "else_[i]"
1152
+ else_arg = VectorArg(then_else_dtype, "else_", with_offset=True)
1153
+ else:
1154
+ assert is_else_scalar
1155
+ else_ = "else_"
1156
+ else_arg = ScalarArg(then_else_dtype, "else_")
1157
+
1158
+ return get_elwise_kernel(context, [
1159
+ VectorArg(then_else_dtype, "result", with_offset=True),
1160
+ VectorArg(crit_dtype, "crit", with_offset=True),
1161
+ then_arg, else_arg,
1162
+ ],
1163
+ f"result[i] = crit[i] > 0 ? {then_} : {else_}",
1164
+ name="if_positive")
1165
+
1166
+
1167
+ @context_dependent_memoize
1168
+ def get_logical_not_kernel(context, in_dtype):
1169
+ return get_elwise_kernel(context, [
1170
+ VectorArg(np.int8, "z", with_offset=True),
1171
+ VectorArg(in_dtype, "y", with_offset=True),
1172
+ ],
1173
+ "z[i] = (y[i] == 0)",
1174
+ name="logical_not_kernel")
1175
+
1176
+ # }}}
1177
+
1178
+ # vim: fdm=marker