pyopencl 2025.1__cp313-cp313-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (42) hide show
  1. pyopencl/__init__.py +2410 -0
  2. pyopencl/_cl.cpython-313-darwin.so +0 -0
  3. pyopencl/_cluda.py +54 -0
  4. pyopencl/_mymako.py +14 -0
  5. pyopencl/algorithm.py +1449 -0
  6. pyopencl/array.py +3362 -0
  7. pyopencl/bitonic_sort.py +242 -0
  8. pyopencl/bitonic_sort_templates.py +594 -0
  9. pyopencl/cache.py +535 -0
  10. pyopencl/capture_call.py +177 -0
  11. pyopencl/characterize/__init__.py +456 -0
  12. pyopencl/characterize/performance.py +237 -0
  13. pyopencl/cl/pyopencl-airy.cl +324 -0
  14. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  15. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  16. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  17. pyopencl/cl/pyopencl-complex.h +303 -0
  18. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  19. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  20. pyopencl/cl/pyopencl-random123/array.h +325 -0
  21. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  22. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  23. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  24. pyopencl/clmath.py +280 -0
  25. pyopencl/clrandom.py +409 -0
  26. pyopencl/cltypes.py +137 -0
  27. pyopencl/compyte/.gitignore +21 -0
  28. pyopencl/compyte/__init__.py +0 -0
  29. pyopencl/compyte/array.py +214 -0
  30. pyopencl/compyte/dtypes.py +290 -0
  31. pyopencl/compyte/pyproject.toml +54 -0
  32. pyopencl/elementwise.py +1171 -0
  33. pyopencl/invoker.py +421 -0
  34. pyopencl/ipython_ext.py +68 -0
  35. pyopencl/reduction.py +786 -0
  36. pyopencl/scan.py +1915 -0
  37. pyopencl/tools.py +1527 -0
  38. pyopencl/version.py +9 -0
  39. pyopencl-2025.1.dist-info/METADATA +108 -0
  40. pyopencl-2025.1.dist-info/RECORD +42 -0
  41. pyopencl-2025.1.dist-info/WHEEL +5 -0
  42. pyopencl-2025.1.dist-info/licenses/LICENSE +282 -0
@@ -0,0 +1,1171 @@
1
+ """Elementwise functionality."""
2
+
3
+
4
+ __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
5
+
6
+ __license__ = """
7
+ Permission is hereby granted, free of charge, to any person
8
+ obtaining a copy of this software and associated documentation
9
+ files (the "Software"), to deal in the Software without
10
+ restriction, including without limitation the rights to use,
11
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ copies of the Software, and to permit persons to whom the
13
+ Software is furnished to do so, subject to the following
14
+ conditions:
15
+
16
+ The above copyright notice and this permission notice shall be
17
+ included in all copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26
+ OTHER DEALINGS IN THE SOFTWARE.
27
+ """
28
+
29
+
30
+ import enum
31
+ from typing import Any, List, Optional, Tuple, Union
32
+
33
+ import numpy as np
34
+
35
+ from pytools import memoize_method
36
+
37
+ import pyopencl as cl
38
+ from pyopencl.tools import (
39
+ DtypedArgument,
40
+ KernelTemplateBase,
41
+ ScalarArg,
42
+ VectorArg,
43
+ context_dependent_memoize,
44
+ dtype_to_c_struct,
45
+ dtype_to_ctype,
46
+ )
47
+
48
+
49
+ # {{{ elementwise kernel code generator
50
+
51
+ def get_elwise_program(
52
+ context: cl.Context,
53
+ arguments: List[DtypedArgument],
54
+ operation: str, *,
55
+ name: str = "elwise_kernel",
56
+ options: Any = None,
57
+ preamble: str = "",
58
+ loop_prep: str = "",
59
+ after_loop: str = "",
60
+ use_range: bool = False) -> cl.Program:
61
+
62
+ if use_range:
63
+ body = r"""//CL//
64
+ if (step < 0)
65
+ {
66
+ for (i = start + (work_group_start + lid)*step;
67
+ i > stop; i += gsize*step)
68
+ {
69
+ %(operation)s;
70
+ }
71
+ }
72
+ else
73
+ {
74
+ for (i = start + (work_group_start + lid)*step;
75
+ i < stop; i += gsize*step)
76
+ {
77
+ %(operation)s;
78
+ }
79
+ }
80
+ """
81
+ else:
82
+ body = """//CL//
83
+ for (i = work_group_start + lid; i < n; i += gsize)
84
+ {
85
+ %(operation)s;
86
+ }
87
+ """
88
+
89
+ import re
90
+ return_match = re.search(r"\breturn\b", operation)
91
+ if return_match is not None:
92
+ from warnings import warn
93
+ warn("Using a 'return' statement in an element-wise operation will "
94
+ "likely lead to incorrect results. Use "
95
+ "PYOPENCL_ELWISE_CONTINUE instead.",
96
+ stacklevel=3)
97
+
98
+ source = (f"""//CL//
99
+ {preamble}
100
+
101
+ #define PYOPENCL_ELWISE_CONTINUE continue
102
+
103
+ __kernel void {name}({", ".join(arg.declarator() for arg in arguments)})
104
+ {{
105
+ int lid = get_local_id(0);
106
+ int gsize = get_global_size(0);
107
+ int work_group_start = get_local_size(0)*get_group_id(0);
108
+ long i;
109
+
110
+ {loop_prep};
111
+ {body % {"operation": operation}}
112
+ {after_loop};
113
+ }}
114
+ """)
115
+
116
+ return cl.Program(context, source).build(options)
117
+
118
+
119
+ def get_elwise_kernel_and_types(
120
+ context: cl.Context,
121
+ arguments: Union[str, List[DtypedArgument]],
122
+ operation: str, *,
123
+ name: str = "elwise_kernel",
124
+ options: Any = None,
125
+ preamble: str = "",
126
+ use_range: bool = False,
127
+ **kwargs: Any) -> Tuple[cl.Kernel, List[DtypedArgument]]:
128
+
129
+ from pyopencl.tools import get_arg_offset_adjuster_code, parse_arg_list
130
+ parsed_args = parse_arg_list(arguments, with_offset=True)
131
+
132
+ auto_preamble = kwargs.pop("auto_preamble", True)
133
+
134
+ pragmas = []
135
+ includes = []
136
+ have_double_pragma = False
137
+ have_complex_include = False
138
+
139
+ if auto_preamble:
140
+ for arg in parsed_args:
141
+ if arg.dtype in [np.float64, np.complex128]:
142
+ if not have_double_pragma:
143
+ pragmas.append("""
144
+ #if __OPENCL_C_VERSION__ < 120
145
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
146
+ #endif
147
+ #define PYOPENCL_DEFINE_CDOUBLE
148
+ """)
149
+ have_double_pragma = True
150
+ if arg.dtype.kind == "c":
151
+ if not have_complex_include:
152
+ includes.append("#include <pyopencl-complex.h>\n")
153
+ have_complex_include = True
154
+
155
+ if pragmas or includes:
156
+ preamble = "\n".join(pragmas+includes) + "\n" + preamble
157
+
158
+ if use_range:
159
+ parsed_args.extend([
160
+ ScalarArg(np.intp, "start"),
161
+ ScalarArg(np.intp, "stop"),
162
+ ScalarArg(np.intp, "step"),
163
+ ])
164
+ else:
165
+ parsed_args.append(ScalarArg(np.intp, "n"))
166
+
167
+ loop_prep = kwargs.pop("loop_prep", "")
168
+ loop_prep = get_arg_offset_adjuster_code(parsed_args) + loop_prep
169
+ prg = get_elwise_program(
170
+ context, parsed_args, operation,
171
+ name=name, options=options, preamble=preamble,
172
+ use_range=use_range, loop_prep=loop_prep, **kwargs)
173
+
174
+ from pyopencl.tools import get_arg_list_arg_types
175
+
176
+ kernel = getattr(prg, name)
177
+ kernel.set_scalar_arg_dtypes(get_arg_list_arg_types(parsed_args))
178
+
179
+ return kernel, parsed_args
180
+
181
+
182
+ def get_elwise_kernel(
183
+ context: cl.Context,
184
+ arguments: Union[str, List[DtypedArgument]],
185
+ operation: str, *,
186
+ name: str = "elwise_kernel",
187
+ options: Any = None, **kwargs: Any) -> cl.Kernel:
188
+ """Return a L{pyopencl.Kernel} that performs the same scalar operation
189
+ on one or several vectors.
190
+ """
191
+ func, arguments = get_elwise_kernel_and_types(
192
+ context, arguments, operation,
193
+ name=name, options=options, **kwargs)
194
+
195
+ return func
196
+
197
+ # }}}
198
+
199
+
200
+ # {{{ ElementwiseKernel driver
201
+
202
+ class ElementwiseKernel:
203
+ """
204
+ A kernel that takes a number of scalar or vector *arguments* and performs
205
+ an *operation* specified as a snippet of C on these arguments.
206
+
207
+ :arg arguments: a string formatted as a C argument list.
208
+ :arg operation: a snippet of C that carries out the desired 'map'
209
+ operation. The current index is available as the variable *i*.
210
+ *operation* may contain the statement ``PYOPENCL_ELWISE_CONTINUE``,
211
+ which will terminate processing for the current element.
212
+ :arg name: the function name as which the kernel is compiled
213
+ :arg options: passed unmodified to :meth:`pyopencl.Program.build`.
214
+ :arg preamble: a piece of C source code that gets inserted outside of the
215
+ function context in the elementwise operation's kernel source code.
216
+
217
+ .. warning :: Using a ``return`` statement in *operation* will lead to
218
+ incorrect results, as some elements may never get processed. Use
219
+ ``PYOPENCL_ELWISE_CONTINUE`` instead.
220
+
221
+ .. versionchanged:: 2013.1
222
+
223
+ Added ``PYOPENCL_ELWISE_CONTINUE``.
224
+
225
+ .. automethod:: __call__
226
+ """
227
+
228
+ def __init__(
229
+ self,
230
+ context: cl.Context,
231
+ arguments: Union[str, List[DtypedArgument]],
232
+ operation: str,
233
+ name: str = "elwise_kernel",
234
+ options: Any = None, **kwargs: Any) -> None:
235
+ self.context = context
236
+ self.arguments = arguments
237
+ self.operation = operation
238
+ self.name = name
239
+ self.options = options
240
+ self.kwargs = kwargs
241
+
242
+ @memoize_method
243
+ def get_kernel(self, use_range: bool):
244
+ knl, arg_descrs = get_elwise_kernel_and_types(
245
+ self.context, self.arguments, self.operation,
246
+ name=self.name, options=self.options,
247
+ use_range=use_range, **self.kwargs)
248
+
249
+ for arg in arg_descrs:
250
+ if isinstance(arg, VectorArg) and not arg.with_offset:
251
+ from warnings import warn
252
+ warn(
253
+ f"ElementwiseKernel '{self.name}' used with VectorArgs "
254
+ "that do not have offset support enabled. This usage is "
255
+ "deprecated. Just pass with_offset=True to VectorArg, "
256
+ "everything should sort itself out automatically.",
257
+ DeprecationWarning, stacklevel=2)
258
+
259
+ if not any(isinstance(arg, VectorArg) for arg in arg_descrs):
260
+ raise RuntimeError(
261
+ "ElementwiseKernel can only be used with functions that have "
262
+ "at least one vector argument")
263
+
264
+ return knl, arg_descrs
265
+
266
+ def __call__(self, *args, **kwargs) -> cl.Event:
267
+ """
268
+ Invoke the generated scalar kernel.
269
+
270
+ The arguments may either be scalars or :class:`pyopencl.array.Array`
271
+ instances.
272
+
273
+ |std-enqueue-blurb|
274
+ """
275
+ range_ = kwargs.pop("range", None)
276
+ slice_ = kwargs.pop("slice", None)
277
+ capture_as = kwargs.pop("capture_as", None)
278
+ queue = kwargs.pop("queue", None)
279
+ wait_for = kwargs.pop("wait_for", None)
280
+
281
+ if kwargs:
282
+ raise TypeError(f"unknown keyword arguments: '{', '.join(kwargs)}'")
283
+
284
+ use_range = range_ is not None or slice_ is not None
285
+ kernel, arg_descrs = self.get_kernel(use_range)
286
+
287
+ if wait_for is None:
288
+ wait_for = []
289
+ else:
290
+ # We'll be modifying it below.
291
+ wait_for = list(wait_for)
292
+
293
+ # {{{ assemble arg array
294
+
295
+ repr_vec = None
296
+ invocation_args = []
297
+ for arg, arg_descr in zip(args, arg_descrs):
298
+ if isinstance(arg_descr, VectorArg):
299
+ if repr_vec is None:
300
+ repr_vec = arg
301
+
302
+ invocation_args.append(arg)
303
+ else:
304
+ invocation_args.append(arg)
305
+
306
+ assert repr_vec is not None
307
+
308
+ # }}}
309
+
310
+ if queue is None:
311
+ queue = repr_vec.queue
312
+
313
+ if slice_ is not None:
314
+ if range_ is not None:
315
+ raise TypeError(
316
+ "may not specify both range and slice keyword arguments")
317
+
318
+ range_ = slice(*slice_.indices(repr_vec.size))
319
+
320
+ max_wg_size = kernel.get_work_group_info(
321
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
322
+ queue.device)
323
+
324
+ if range_ is not None:
325
+ start = range_.start
326
+ if start is None:
327
+ start = 0
328
+ invocation_args.append(start)
329
+ invocation_args.append(range_.stop)
330
+ if range_.step is None:
331
+ step = 1
332
+ else:
333
+ step = range_.step
334
+
335
+ invocation_args.append(step)
336
+
337
+ from pyopencl.array import _splay
338
+ gs, ls = _splay(queue.device,
339
+ abs(range_.stop - start)//step,
340
+ max_wg_size)
341
+ else:
342
+ invocation_args.append(repr_vec.size)
343
+ gs, ls = repr_vec._get_sizes(queue, max_wg_size)
344
+
345
+ if capture_as is not None:
346
+ kernel.set_args(*invocation_args)
347
+ kernel.capture_call(
348
+ capture_as, queue,
349
+ gs, ls, *invocation_args, wait_for=wait_for)
350
+
351
+ return kernel(queue, gs, ls, *invocation_args, wait_for=wait_for)
352
+
353
+ # }}}
354
+
355
+
356
+ # {{{ template
357
+
358
+ class ElementwiseTemplate(KernelTemplateBase):
359
+ def __init__(
360
+ self,
361
+ arguments: Union[str, List[DtypedArgument]],
362
+ operation: str,
363
+ name: str = "elwise",
364
+ preamble: str = "",
365
+ template_processor: Optional[str] = None) -> None:
366
+ super().__init__(template_processor=template_processor)
367
+ self.arguments = arguments
368
+ self.operation = operation
369
+ self.name = name
370
+ self.preamble = preamble
371
+
372
+ def build_inner(self, context, type_aliases=(), var_values=(),
373
+ more_preamble="", more_arguments=(), declare_types=(),
374
+ options=None):
375
+ renderer = self.get_renderer(
376
+ type_aliases, var_values, context, options)
377
+
378
+ arg_list = renderer.render_argument_list(
379
+ self.arguments, more_arguments, with_offset=True)
380
+ type_decl_preamble = renderer.get_type_decl_preamble(
381
+ context.devices[0], declare_types, arg_list)
382
+
383
+ return ElementwiseKernel(context,
384
+ arg_list, renderer(self.operation),
385
+ name=renderer(self.name), options=options,
386
+ preamble=(
387
+ type_decl_preamble
388
+ + "\n"
389
+ + renderer(self.preamble + "\n" + more_preamble)),
390
+ auto_preamble=False)
391
+
392
+ # }}}
393
+
394
+
395
+ # {{{ argument kinds
396
+
397
+ class ArgumentKind(enum.Enum):
398
+ ARRAY = enum.auto()
399
+ DEV_SCALAR = enum.auto()
400
+ SCALAR = enum.auto()
401
+
402
+
403
+ def get_argument_kind(v: Any) -> ArgumentKind:
404
+ from pyopencl.array import Array
405
+ if isinstance(v, Array):
406
+ if v.shape == ():
407
+ return ArgumentKind.DEV_SCALAR
408
+ else:
409
+ return ArgumentKind.ARRAY
410
+ else:
411
+ return ArgumentKind.SCALAR
412
+
413
+
414
+ def get_decl_and_access_for_kind(name: str, kind: ArgumentKind) -> Tuple[str, str]:
415
+ if kind == ArgumentKind.ARRAY:
416
+ return f"*{name}", f"{name}[i]"
417
+ elif kind == ArgumentKind.SCALAR:
418
+ return f"{name}", name
419
+ elif kind == ArgumentKind.DEV_SCALAR:
420
+ return f"*{name}", f"{name}[0]"
421
+ else:
422
+ raise AssertionError()
423
+
424
+ # }}}
425
+
426
+
427
+ # {{{ kernels supporting array functionality
428
+
429
+ @context_dependent_memoize
430
+ def get_take_kernel(context, dtype, idx_dtype, vec_count=1):
431
+ idx_tp = dtype_to_ctype(idx_dtype)
432
+
433
+ args = ([VectorArg(dtype, f"dest{i}", with_offset=True)
434
+ for i in range(vec_count)]
435
+ + [VectorArg(dtype, f"src{i}", with_offset=True)
436
+ for i in range(vec_count)]
437
+ + [VectorArg(idx_dtype, "idx", with_offset=True)])
438
+ body = (
439
+ f"{idx_tp} src_idx = idx[i];\n"
440
+ + "\n".join(
441
+ f"dest{i}[i] = src{i}[src_idx];"
442
+ for i in range(vec_count))
443
+ )
444
+
445
+ return get_elwise_kernel(context, args, body,
446
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
447
+ name="take")
448
+
449
+
450
+ @context_dependent_memoize
451
+ def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1):
452
+ idx_tp = dtype_to_ctype(idx_dtype)
453
+
454
+ args = [
455
+ VectorArg(dtype, f"dest{i}")
456
+ for i in range(vec_count)
457
+ ] + [
458
+ VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
459
+ VectorArg(idx_dtype, "gmem_src_idx", with_offset=True),
460
+ ] + [
461
+ VectorArg(dtype, f"src{i}", with_offset=True)
462
+ for i in range(vec_count)
463
+ ] + [
464
+ ScalarArg(idx_dtype, f"offset{i}")
465
+ for i in range(vec_count) if with_offsets
466
+ ]
467
+
468
+ if with_offsets:
469
+ def get_copy_insn(i):
470
+ return f"dest{i}[dest_idx] = src{i}[src_idx + offset{i}];"
471
+ else:
472
+ def get_copy_insn(i):
473
+ return f"dest{i}[dest_idx] = src{i}[src_idx];"
474
+
475
+ body = ((f"{idx_tp} src_idx = gmem_src_idx[i];\n"
476
+ f"{idx_tp} dest_idx = gmem_dest_idx[i];\n")
477
+ + "\n".join(get_copy_insn(i) for i in range(vec_count)))
478
+
479
+ return get_elwise_kernel(context, args, body,
480
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
481
+ name="take_put")
482
+
483
+
484
+ @context_dependent_memoize
485
+ def get_put_kernel(context, dtype, idx_dtype, vec_count=1):
486
+ idx_tp = dtype_to_ctype(idx_dtype)
487
+
488
+ args = [
489
+ VectorArg(dtype, f"dest{i}", with_offset=True)
490
+ for i in range(vec_count)
491
+ ] + [
492
+ VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
493
+ ] + [
494
+ VectorArg(dtype, f"src{i}", with_offset=True)
495
+ for i in range(vec_count)
496
+ ] + [
497
+ VectorArg(np.uint8, "use_fill", with_offset=True)
498
+ ] + [
499
+ VectorArg(np.int64, "val_ary_lengths", with_offset=True)
500
+ ]
501
+
502
+ body = (
503
+ f"{idx_tp} dest_idx = gmem_dest_idx[i];\n"
504
+ + "\n".join(
505
+ f"dest{i}[dest_idx] = (use_fill[{i}] ? src{i}[0] : "
506
+ f"src{i}[i % val_ary_lengths[{i}]]);"
507
+ for i in range(vec_count)
508
+ )
509
+ )
510
+
511
+ return get_elwise_kernel(context, args, body,
512
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
513
+ name="put")
514
+
515
+
516
+ @context_dependent_memoize
517
+ def get_copy_kernel(context, dtype_dest, dtype_src):
518
+ src = "src[i]"
519
+ if dtype_dest.kind == "c" != dtype_src.kind:
520
+ name = complex_dtype_to_name(dtype_dest)
521
+ src = f"{name}_fromreal({src})"
522
+
523
+ if dtype_dest.kind == "c" and dtype_src != dtype_dest:
524
+ name = complex_dtype_to_name(dtype_dest)
525
+ src = f"{name}_cast({src})"
526
+
527
+ if dtype_dest != dtype_src and (
528
+ dtype_dest.kind == "V" or dtype_src.kind == "V"):
529
+ raise TypeError("copying between non-identical struct types")
530
+
531
+ return get_elwise_kernel(context,
532
+ "{tp_dest} *dest, {tp_src} *src".format(
533
+ tp_dest=dtype_to_ctype(dtype_dest),
534
+ tp_src=dtype_to_ctype(dtype_src),
535
+ ),
536
+ f"dest[i] = {src}",
537
+ preamble=dtype_to_c_struct(context.devices[0], dtype_dest),
538
+ name="copy")
539
+
540
+
541
+ def complex_dtype_to_name(dtype) -> str:
542
+ if dtype == np.complex128:
543
+ return "cdouble"
544
+ elif dtype == np.complex64:
545
+ return "cfloat"
546
+ else:
547
+ raise RuntimeError(f"invalid complex type: {dtype}")
548
+
549
+
550
+ def real_dtype(dtype):
551
+ return dtype.type(0).real.dtype
552
+
553
+
554
+ @context_dependent_memoize
555
+ def get_axpbyz_kernel(context, dtype_x, dtype_y, dtype_z,
556
+ x_is_scalar=False, y_is_scalar=False):
557
+ result_t = dtype_to_ctype(dtype_z)
558
+
559
+ x_is_complex = dtype_x.kind == "c"
560
+ y_is_complex = dtype_y.kind == "c"
561
+
562
+ x = "x[0]" if x_is_scalar else "x[i]"
563
+ y = "y[0]" if y_is_scalar else "y[i]"
564
+
565
+ if dtype_z.kind == "c":
566
+ # a and b will always be complex here.
567
+ z_ct = complex_dtype_to_name(dtype_z)
568
+
569
+ if x_is_complex:
570
+ ax = f"{z_ct}_mul(a, {z_ct}_cast({x}))"
571
+ else:
572
+ ax = f"{z_ct}_mulr(a, {x})"
573
+
574
+ if y_is_complex:
575
+ by = f"{z_ct}_mul(b, {z_ct}_cast({y}))"
576
+ else:
577
+ by = f"{z_ct}_mulr(b, {y})"
578
+
579
+ result = f"{z_ct}_add({ax}, {by})"
580
+ else:
581
+ # real-only
582
+
583
+ ax = f"a*(({result_t}) {x})"
584
+ by = f"b*(({result_t}) {y})"
585
+
586
+ result = f"{ax} + {by}"
587
+
588
+ return get_elwise_kernel(context,
589
+ "{tp_z} *z, {tp_z} a, {tp_x} *x, {tp_z} b, {tp_y} *y".format(
590
+ tp_x=dtype_to_ctype(dtype_x),
591
+ tp_y=dtype_to_ctype(dtype_y),
592
+ tp_z=dtype_to_ctype(dtype_z),
593
+ ),
594
+ f"z[i] = {result}",
595
+ name="axpbyz")
596
+
597
+
598
+ @context_dependent_memoize
599
+ def get_axpbz_kernel(context, dtype_a, dtype_x, dtype_b, dtype_z):
600
+ a_is_complex = dtype_a.kind == "c"
601
+ x_is_complex = dtype_x.kind == "c"
602
+ b_is_complex = dtype_b.kind == "c"
603
+
604
+ z_is_complex = dtype_z.kind == "c"
605
+
606
+ ax = "a*x[i]"
607
+ if x_is_complex:
608
+ a = "a"
609
+ x = "x[i]"
610
+
611
+ if dtype_x != dtype_z:
612
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
613
+
614
+ if a_is_complex:
615
+ if dtype_a != dtype_z:
616
+ a = "{}_cast({})".format(complex_dtype_to_name(dtype_z), a)
617
+
618
+ ax = "{}_mul({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
619
+ else:
620
+ ax = "{}_rmul({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
621
+ elif a_is_complex:
622
+ a = "a"
623
+ x = "x[i]"
624
+
625
+ if dtype_a != dtype_z:
626
+ a = "{}_cast({})".format(complex_dtype_to_name(dtype_z), a)
627
+ ax = "{}_mulr({}, {})".format(complex_dtype_to_name(dtype_z), a, x)
628
+
629
+ b = "b"
630
+ if z_is_complex and not b_is_complex:
631
+ b = "{}_fromreal({})".format(complex_dtype_to_name(dtype_z), b)
632
+
633
+ if z_is_complex and not (a_is_complex or x_is_complex):
634
+ ax = "{}_fromreal({})".format(complex_dtype_to_name(dtype_z), ax)
635
+
636
+ if z_is_complex:
637
+ ax = "{}_cast({})".format(complex_dtype_to_name(dtype_z), ax)
638
+ b = "{}_cast({})".format(complex_dtype_to_name(dtype_z), b)
639
+
640
+ if a_is_complex or x_is_complex or b_is_complex:
641
+ expr = "{root}_add({ax}, {b})".format(
642
+ ax=ax,
643
+ b=b,
644
+ root=complex_dtype_to_name(dtype_z))
645
+ else:
646
+ expr = f"{ax} + {b}"
647
+
648
+ return get_elwise_kernel(context,
649
+ "{tp_z} *z, {tp_a} a, {tp_x} *x,{tp_b} b".format(
650
+ tp_a=dtype_to_ctype(dtype_a),
651
+ tp_x=dtype_to_ctype(dtype_x),
652
+ tp_b=dtype_to_ctype(dtype_b),
653
+ tp_z=dtype_to_ctype(dtype_z),
654
+ ),
655
+ f"z[i] = {expr}",
656
+ name="axpb")
657
+
658
+
659
+ @context_dependent_memoize
660
+ def get_multiply_kernel(context, dtype_x, dtype_y, dtype_z,
661
+ x_is_scalar=False, y_is_scalar=False):
662
+ x_is_complex = dtype_x.kind == "c"
663
+ y_is_complex = dtype_y.kind == "c"
664
+
665
+ x = "x[0]" if x_is_scalar else "x[i]"
666
+ y = "y[0]" if y_is_scalar else "y[i]"
667
+
668
+ if x_is_complex and dtype_x != dtype_z:
669
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
670
+ if y_is_complex and dtype_y != dtype_z:
671
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
672
+
673
+ if x_is_complex and y_is_complex:
674
+ xy = "{}_mul({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
675
+ elif x_is_complex and not y_is_complex:
676
+ xy = "{}_mulr({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
677
+ elif not x_is_complex and y_is_complex:
678
+ xy = "{}_rmul({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
679
+ else:
680
+ xy = f"{x} * {y}"
681
+
682
+ return get_elwise_kernel(context,
683
+ "{tp_z} *z, {tp_x} *x, {tp_y} *y".format(
684
+ tp_x=dtype_to_ctype(dtype_x),
685
+ tp_y=dtype_to_ctype(dtype_y),
686
+ tp_z=dtype_to_ctype(dtype_z),
687
+ ),
688
+ f"z[i] = {xy}",
689
+ name="multiply")
690
+
691
+
692
+ @context_dependent_memoize
693
+ def get_divide_kernel(context, dtype_x, dtype_y, dtype_z,
694
+ x_is_scalar=False, y_is_scalar=False):
695
+ x_is_complex = dtype_x.kind == "c"
696
+ y_is_complex = dtype_y.kind == "c"
697
+ z_is_complex = dtype_z.kind == "c"
698
+
699
+ x = "x[0]" if x_is_scalar else "x[i]"
700
+ y = "y[0]" if y_is_scalar else "y[i]"
701
+
702
+ if z_is_complex and dtype_x != dtype_y:
703
+ if x_is_complex and dtype_x != dtype_z:
704
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
705
+ if y_is_complex and dtype_y != dtype_z:
706
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
707
+ else:
708
+ if dtype_x != dtype_z:
709
+ x = f"({dtype_to_ctype(dtype_z)}) ({x})"
710
+ if dtype_y != dtype_z:
711
+ y = f"({dtype_to_ctype(dtype_z)}) ({y})"
712
+
713
+ if x_is_complex and y_is_complex:
714
+ xoy = "{}_divide({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
715
+ elif not x_is_complex and y_is_complex:
716
+ xoy = "{}_rdivide({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
717
+ elif x_is_complex and not y_is_complex:
718
+ xoy = "{}_divider({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
719
+ else:
720
+ xoy = f"{x} / {y}"
721
+
722
+ if z_is_complex:
723
+ xoy = "{}_cast({})".format(complex_dtype_to_name(dtype_z), xoy)
724
+
725
+ return get_elwise_kernel(context,
726
+ "{tp_z} *z, {tp_x} *x, {tp_y} *y".format(
727
+ tp_x=dtype_to_ctype(dtype_x),
728
+ tp_y=dtype_to_ctype(dtype_y),
729
+ tp_z=dtype_to_ctype(dtype_z),
730
+ ),
731
+ f"z[i] = {xoy}",
732
+ name="divide")
733
+
734
+
735
+ @context_dependent_memoize
736
+ def get_rdivide_elwise_kernel(context, dtype_x, dtype_y, dtype_z):
737
+ # implements y / x!
738
+ x_is_complex = dtype_x.kind == "c"
739
+ y_is_complex = dtype_y.kind == "c"
740
+ z_is_complex = dtype_z.kind == "c"
741
+
742
+ x = "x[i]"
743
+ y = "y"
744
+
745
+ if z_is_complex and dtype_x != dtype_y:
746
+ if x_is_complex and dtype_x != dtype_z:
747
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
748
+ if y_is_complex and dtype_y != dtype_z:
749
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
750
+
751
+ if x_is_complex and y_is_complex:
752
+ yox = "{}_divide({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
753
+ elif not y_is_complex and x_is_complex:
754
+ yox = "{}_rdivide({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
755
+ elif y_is_complex and not x_is_complex:
756
+ yox = "{}_divider({}, {})".format(complex_dtype_to_name(dtype_z), y, x)
757
+ else:
758
+ yox = f"{y} / {x}"
759
+
760
+ return get_elwise_kernel(context,
761
+ "{tp_z} *z, {tp_x} *x, {tp_y} y".format(
762
+ tp_x=dtype_to_ctype(dtype_x),
763
+ tp_y=dtype_to_ctype(dtype_y),
764
+ tp_z=dtype_to_ctype(dtype_z),
765
+ ),
766
+ f"z[i] = {yox}",
767
+ name="divide_r")
768
+
769
+
770
+ @context_dependent_memoize
771
+ def get_fill_kernel(context, dtype):
772
+ return get_elwise_kernel(context,
773
+ "{tp} *z, {tp} a".format(tp=dtype_to_ctype(dtype)),
774
+ "z[i] = a",
775
+ preamble=dtype_to_c_struct(context.devices[0], dtype),
776
+ name="fill")
777
+
778
+
779
+ @context_dependent_memoize
780
+ def get_reverse_kernel(context, dtype):
781
+ return get_elwise_kernel(context,
782
+ "{tp} *z, {tp} *y".format(tp=dtype_to_ctype(dtype)),
783
+ "z[i] = y[n-1-i]",
784
+ name="reverse")
785
+
786
+
787
+ @context_dependent_memoize
788
+ def get_arange_kernel(context, dtype):
789
+ if dtype.kind == "c":
790
+ expr = (
791
+ "{root}_add(start, {root}_rmul(i, step))"
792
+ .format(root=complex_dtype_to_name(dtype)))
793
+ else:
794
+ expr = f"start + (({dtype_to_ctype(dtype)}) i) * step"
795
+
796
+ return get_elwise_kernel(context, [
797
+ VectorArg(dtype, "z", with_offset=True),
798
+ ScalarArg(dtype, "start"),
799
+ ScalarArg(dtype, "step"),
800
+ ],
801
+ f"z[i] = {expr}",
802
+ name="arange")
803
+
804
+
805
+ @context_dependent_memoize
806
+ def get_pow_kernel(context, dtype_x, dtype_y, dtype_z,
807
+ is_base_array, is_exp_array):
808
+ if is_base_array:
809
+ x = "x[i]"
810
+ x_ctype = "{tp_x} *x"
811
+ else:
812
+ x = "x"
813
+ x_ctype = "{tp_x} x"
814
+
815
+ if is_exp_array:
816
+ y = "y[i]"
817
+ y_ctype = "{tp_y} *y"
818
+ else:
819
+ y = "y"
820
+ y_ctype = "{tp_y} y"
821
+
822
+ x_is_complex = dtype_x.kind == "c"
823
+ y_is_complex = dtype_y.kind == "c"
824
+ z_is_complex = dtype_z.kind == "c"
825
+
826
+ if z_is_complex and dtype_x != dtype_y:
827
+ if x_is_complex and dtype_x != dtype_z:
828
+ x = "{}_cast({})".format(complex_dtype_to_name(dtype_z), x)
829
+ if y_is_complex and dtype_y != dtype_z:
830
+ y = "{}_cast({})".format(complex_dtype_to_name(dtype_z), y)
831
+ elif dtype_x != dtype_y:
832
+ if dtype_x != dtype_z:
833
+ x = "({}) ({})".format(dtype_to_ctype(dtype_z), x)
834
+ if dtype_y != dtype_z:
835
+ y = "({}) ({})".format(dtype_to_ctype(dtype_z), y)
836
+
837
+ if x_is_complex and y_is_complex:
838
+ result = "{}_pow({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
839
+ elif x_is_complex and not y_is_complex:
840
+ result = "{}_powr({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
841
+ elif not x_is_complex and y_is_complex:
842
+ result = "{}_rpow({}, {})".format(complex_dtype_to_name(dtype_z), x, y)
843
+ else:
844
+ result = f"pow({x}, {y})"
845
+
846
+ return get_elwise_kernel(context,
847
+ ("{tp_z} *z, " + x_ctype + ", " + y_ctype).format(
848
+ tp_x=dtype_to_ctype(dtype_x),
849
+ tp_y=dtype_to_ctype(dtype_y),
850
+ tp_z=dtype_to_ctype(dtype_z),
851
+ ),
852
+ f"z[i] = {result}",
853
+ name="pow_method")
854
+
855
+
856
+ @context_dependent_memoize
857
+ def get_unop_kernel(context, operator, res_dtype, in_dtype):
858
+ return get_elwise_kernel(context, [
859
+ VectorArg(res_dtype, "z", with_offset=True),
860
+ VectorArg(in_dtype, "y", with_offset=True),
861
+ ],
862
+ f"z[i] = {operator} y[i]",
863
+ name="unary_op_kernel")
864
+
865
+
866
+ @context_dependent_memoize
867
+ def get_array_scalar_binop_kernel(context, operator, dtype_res, dtype_a, dtype_b):
868
+ return get_elwise_kernel(context, [
869
+ VectorArg(dtype_res, "out", with_offset=True),
870
+ VectorArg(dtype_a, "a", with_offset=True),
871
+ ScalarArg(dtype_b, "b"),
872
+ ],
873
+ f"out[i] = a[i] {operator} b",
874
+ name="scalar_binop_kernel")
875
+
876
+
877
+ @context_dependent_memoize
878
+ def get_array_binop_kernel(context, operator, dtype_res, dtype_a, dtype_b,
879
+ a_is_scalar=False, b_is_scalar=False):
880
+ a = "a[0]" if a_is_scalar else "a[i]"
881
+ b = "b[0]" if b_is_scalar else "b[i]"
882
+ return get_elwise_kernel(context, [
883
+ VectorArg(dtype_res, "out", with_offset=True),
884
+ VectorArg(dtype_a, "a", with_offset=True),
885
+ VectorArg(dtype_b, "b", with_offset=True),
886
+ ],
887
+ f"out[i] = {a} {operator} {b}",
888
+ name="binop_kernel")
889
+
890
+
891
+ @context_dependent_memoize
892
+ def get_array_scalar_comparison_kernel(context, operator, dtype_a):
893
+ return get_elwise_kernel(context, [
894
+ VectorArg(np.int8, "out", with_offset=True),
895
+ VectorArg(dtype_a, "a", with_offset=True),
896
+ ScalarArg(dtype_a, "b"),
897
+ ],
898
+ f"out[i] = a[i] {operator} b",
899
+ name="scalar_comparison_kernel")
900
+
901
+
902
+ @context_dependent_memoize
903
+ def get_array_comparison_kernel(context, operator, dtype_a, dtype_b):
904
+ return get_elwise_kernel(context, [
905
+ VectorArg(np.int8, "out", with_offset=True),
906
+ VectorArg(dtype_a, "a", with_offset=True),
907
+ VectorArg(dtype_b, "b", with_offset=True),
908
+ ],
909
+ f"out[i] = a[i] {operator} b[i]",
910
+ name="comparison_kernel")
911
+
912
+
913
+ @context_dependent_memoize
914
+ def get_unary_func_kernel(context, func_name, in_dtype, out_dtype=None):
915
+ if out_dtype is None:
916
+ out_dtype = in_dtype
917
+
918
+ return get_elwise_kernel(context, [
919
+ VectorArg(out_dtype, "z", with_offset=True),
920
+ VectorArg(in_dtype, "y", with_offset=True),
921
+ ],
922
+ f"z[i] = {func_name}(y[i])",
923
+ name=f"{func_name}_kernel")
924
+
925
+
926
+ @context_dependent_memoize
927
+ def get_binary_func_kernel(context, func_name, x_dtype, y_dtype, out_dtype,
928
+ preamble="", name=None):
929
+ if name is None:
930
+ name = func_name
931
+
932
+ return get_elwise_kernel(context, [
933
+ VectorArg(out_dtype, "z", with_offset=True),
934
+ VectorArg(x_dtype, "x", with_offset=True),
935
+ VectorArg(y_dtype, "y", with_offset=True),
936
+ ],
937
+ f"z[i] = {func_name}(x[i], y[i])",
938
+ name=f"{name}_kernel",
939
+ preamble=preamble)
940
+
941
+
942
+ @context_dependent_memoize
943
+ def get_float_binary_func_kernel(context, func_name, x_dtype, y_dtype,
944
+ out_dtype, preamble="", name=None):
945
+ if name is None:
946
+ name = func_name
947
+
948
+ if (np.array(0, x_dtype) * np.array(0, y_dtype)).itemsize > 4:
949
+ arg_type = "double"
950
+ preamble = """
951
+ #if __OPENCL_C_VERSION__ < 120
952
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
953
+ #endif
954
+ #define PYOPENCL_DEFINE_CDOUBLE
955
+ """ + preamble
956
+ else:
957
+ arg_type = "float"
958
+
959
+ return get_elwise_kernel(context, [
960
+ VectorArg(out_dtype, "z", with_offset=True),
961
+ VectorArg(x_dtype, "x", with_offset=True),
962
+ VectorArg(y_dtype, "y", with_offset=True),
963
+ ],
964
+ f"z[i] = {func_name}(({arg_type})x[i], ({arg_type})y[i])",
965
+ name=f"{name}_kernel",
966
+ preamble=preamble)
967
+
968
+
969
+ @context_dependent_memoize
970
+ def get_fmod_kernel(context, out_dtype=np.float32, arg_dtype=np.float32,
971
+ mod_dtype=np.float32):
972
+ return get_float_binary_func_kernel(context, "fmod", arg_dtype,
973
+ mod_dtype, out_dtype)
974
+
975
+
976
+ @context_dependent_memoize
977
+ def get_modf_kernel(context, int_dtype=np.float32,
978
+ frac_dtype=np.float32, x_dtype=np.float32):
979
+ return get_elwise_kernel(context, [
980
+ VectorArg(int_dtype, "intpart", with_offset=True),
981
+ VectorArg(frac_dtype, "fracpart", with_offset=True),
982
+ VectorArg(x_dtype, "x", with_offset=True),
983
+ ],
984
+ """
985
+ fracpart[i] = modf(x[i], &intpart[i])
986
+ """,
987
+ name="modf_kernel")
988
+
989
+
990
+ @context_dependent_memoize
991
+ def get_frexp_kernel(context, sign_dtype=np.float32, exp_dtype=np.float32,
992
+ x_dtype=np.float32):
993
+ return get_elwise_kernel(context, [
994
+ VectorArg(sign_dtype, "significand", with_offset=True),
995
+ VectorArg(exp_dtype, "exponent", with_offset=True),
996
+ VectorArg(x_dtype, "x", with_offset=True),
997
+ ],
998
+ """
999
+ int expt = 0;
1000
+ significand[i] = frexp(x[i], &expt);
1001
+ exponent[i] = expt;
1002
+ """,
1003
+ name="frexp_kernel")
1004
+
1005
+
1006
+ @context_dependent_memoize
1007
+ def get_ldexp_kernel(context, out_dtype=np.float32, sig_dtype=np.float32,
1008
+ expt_dtype=np.float32):
1009
+ return get_binary_func_kernel(
1010
+ context, "_PYOCL_LDEXP", sig_dtype, expt_dtype, out_dtype,
1011
+ preamble="#define _PYOCL_LDEXP(x, y) ldexp(x, (int)(y))",
1012
+ name="ldexp_kernel")
1013
+
1014
+
1015
+ @context_dependent_memoize
1016
+ def get_minmaximum_kernel(context, minmax, dtype_z, dtype_x, dtype_y,
1017
+ kind_x: ArgumentKind, kind_y: ArgumentKind):
1018
+ if dtype_z.kind == "f":
1019
+ reduce_func = f"f{minmax}_nanprop"
1020
+ elif dtype_z.kind in "iu":
1021
+ reduce_func = minmax
1022
+ else:
1023
+ raise TypeError("unsupported dtype specified")
1024
+
1025
+ tp_x = dtype_to_ctype(dtype_x)
1026
+ tp_y = dtype_to_ctype(dtype_y)
1027
+ tp_z = dtype_to_ctype(dtype_z)
1028
+ decl_x, acc_x = get_decl_and_access_for_kind("x", kind_x)
1029
+ decl_y, acc_y = get_decl_and_access_for_kind("y", kind_y)
1030
+
1031
+ return get_elwise_kernel(context,
1032
+ f"{tp_z} *z, {tp_x} {decl_x}, {tp_y} {decl_y}",
1033
+ f"z[i] = {reduce_func}({acc_x}, {acc_y})",
1034
+ name=f"{minmax}imum",
1035
+ preamble="""
1036
+ #define fmin_nanprop(a, b) (isnan(a) || isnan(b)) ? a+b : fmin(a, b)
1037
+ #define fmax_nanprop(a, b) (isnan(a) || isnan(b)) ? a+b : fmax(a, b)
1038
+ """)
1039
+
1040
+
1041
+ @context_dependent_memoize
1042
+ def get_bessel_kernel(context, which_func, out_dtype=np.float64,
1043
+ order_dtype=np.int32, x_dtype=np.float64):
1044
+ if x_dtype.kind != "c":
1045
+ return get_elwise_kernel(context, [
1046
+ VectorArg(out_dtype, "z", with_offset=True),
1047
+ ScalarArg(order_dtype, "ord_n"),
1048
+ VectorArg(x_dtype, "x", with_offset=True),
1049
+ ],
1050
+ f"z[i] = bessel_{which_func}n(ord_n, x[i])",
1051
+ name=f"bessel_{which_func}n_kernel",
1052
+ preamble=f"""
1053
+ #if __OPENCL_C_VERSION__ < 120
1054
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
1055
+ #endif
1056
+ #define PYOPENCL_DEFINE_CDOUBLE
1057
+ #include <pyopencl-bessel-{which_func}.cl>
1058
+ """)
1059
+ else:
1060
+ if which_func != "j":
1061
+ raise NotImplementedError("complex arguments for Bessel Y")
1062
+
1063
+ if x_dtype != np.complex128:
1064
+ raise NotImplementedError("non-complex double dtype")
1065
+ if x_dtype != out_dtype:
1066
+ raise NotImplementedError("different input/output types")
1067
+
1068
+ return get_elwise_kernel(context, [
1069
+ VectorArg(out_dtype, "z", with_offset=True),
1070
+ ScalarArg(order_dtype, "ord_n"),
1071
+ VectorArg(x_dtype, "x", with_offset=True),
1072
+ ],
1073
+ """
1074
+ cdouble_t jv_loc;
1075
+ cdouble_t jvp1_loc;
1076
+ bessel_j_complex(ord_n, x[i], &jv_loc, &jvp1_loc);
1077
+ z[i] = jv_loc;
1078
+ """,
1079
+ name="bessel_j_complex_kernel",
1080
+ preamble="""
1081
+ #if __OPENCL_C_VERSION__ < 120
1082
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
1083
+ #endif
1084
+ #define PYOPENCL_DEFINE_CDOUBLE
1085
+ #include <pyopencl-complex.h>
1086
+ #include <pyopencl-bessel-j-complex.cl>
1087
+ """)
1088
+
1089
+
1090
+ @context_dependent_memoize
1091
+ def get_hankel_01_kernel(context, out_dtype, x_dtype):
1092
+ if x_dtype != np.complex128:
1093
+ raise NotImplementedError("non-complex double dtype")
1094
+ if x_dtype != out_dtype:
1095
+ raise NotImplementedError("different input/output types")
1096
+
1097
+ return get_elwise_kernel(context, [
1098
+ VectorArg(out_dtype, "h0", with_offset=True),
1099
+ VectorArg(out_dtype, "h1", with_offset=True),
1100
+ VectorArg(x_dtype, "x", with_offset=True),
1101
+ ],
1102
+ """
1103
+ cdouble_t h0_loc;
1104
+ cdouble_t h1_loc;
1105
+ hankel_01_complex(x[i], &h0_loc, &h1_loc, 1);
1106
+ h0[i] = h0_loc;
1107
+ h1[i] = h1_loc;
1108
+ """,
1109
+ name="hankel_complex_kernel",
1110
+ preamble="""
1111
+ #if __OPENCL_C_VERSION__ < 120
1112
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
1113
+ #endif
1114
+ #define PYOPENCL_DEFINE_CDOUBLE
1115
+ #include <pyopencl-complex.h>
1116
+ #include <pyopencl-hankel-complex.cl>
1117
+ """)
1118
+
1119
+
1120
+ @context_dependent_memoize
1121
+ def get_diff_kernel(context, dtype):
1122
+ return get_elwise_kernel(context, [
1123
+ VectorArg(dtype, "result", with_offset=True),
1124
+ VectorArg(dtype, "array", with_offset=True),
1125
+ ],
1126
+ "result[i] = array[i+1] - array[i]",
1127
+ name="diff")
1128
+
1129
+
1130
+ @context_dependent_memoize
1131
+ def get_if_positive_kernel(
1132
+ context, crit_dtype, then_else_dtype,
1133
+ is_then_array, is_else_array,
1134
+ is_then_scalar, is_else_scalar):
1135
+ if is_then_array:
1136
+ then_ = "then_[0]" if is_then_scalar else "then_[i]"
1137
+ then_arg = VectorArg(then_else_dtype, "then_", with_offset=True)
1138
+ else:
1139
+ assert is_then_scalar
1140
+ then_ = "then_"
1141
+ then_arg = ScalarArg(then_else_dtype, "then_")
1142
+
1143
+ if is_else_array:
1144
+ else_ = "else_[0]" if is_else_scalar else "else_[i]"
1145
+ else_arg = VectorArg(then_else_dtype, "else_", with_offset=True)
1146
+ else:
1147
+ assert is_else_scalar
1148
+ else_ = "else_"
1149
+ else_arg = ScalarArg(then_else_dtype, "else_")
1150
+
1151
+ return get_elwise_kernel(context, [
1152
+ VectorArg(then_else_dtype, "result", with_offset=True),
1153
+ VectorArg(crit_dtype, "crit", with_offset=True),
1154
+ then_arg, else_arg,
1155
+ ],
1156
+ f"result[i] = crit[i] > 0 ? {then_} : {else_}",
1157
+ name="if_positive")
1158
+
1159
+
1160
+ @context_dependent_memoize
1161
+ def get_logical_not_kernel(context, in_dtype):
1162
+ return get_elwise_kernel(context, [
1163
+ VectorArg(np.int8, "z", with_offset=True),
1164
+ VectorArg(in_dtype, "y", with_offset=True),
1165
+ ],
1166
+ "z[i] = (y[i] == 0)",
1167
+ name="logical_not_kernel")
1168
+
1169
+ # }}}
1170
+
1171
+ # vim: fdm=marker