pyopencl 2026.1.1__cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. pyopencl/.libs/libOpenCL-34a55fe4.so.1.0.0 +0 -0
  2. pyopencl/__init__.py +1995 -0
  3. pyopencl/_cl.cpython-314t-aarch64-linux-gnu.so +0 -0
  4. pyopencl/_cl.pyi +2009 -0
  5. pyopencl/_cluda.py +57 -0
  6. pyopencl/_monkeypatch.py +1104 -0
  7. pyopencl/_mymako.py +17 -0
  8. pyopencl/algorithm.py +1454 -0
  9. pyopencl/array.py +3530 -0
  10. pyopencl/bitonic_sort.py +245 -0
  11. pyopencl/bitonic_sort_templates.py +597 -0
  12. pyopencl/cache.py +553 -0
  13. pyopencl/capture_call.py +200 -0
  14. pyopencl/characterize/__init__.py +461 -0
  15. pyopencl/characterize/performance.py +240 -0
  16. pyopencl/cl/pyopencl-airy.cl +324 -0
  17. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  18. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  19. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  20. pyopencl/cl/pyopencl-complex.h +303 -0
  21. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  22. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  23. pyopencl/cl/pyopencl-random123/array.h +325 -0
  24. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  25. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  26. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  27. pyopencl/clmath.py +281 -0
  28. pyopencl/clrandom.py +412 -0
  29. pyopencl/cltypes.py +217 -0
  30. pyopencl/compyte/.gitignore +21 -0
  31. pyopencl/compyte/__init__.py +0 -0
  32. pyopencl/compyte/array.py +211 -0
  33. pyopencl/compyte/dtypes.py +314 -0
  34. pyopencl/compyte/pyproject.toml +49 -0
  35. pyopencl/elementwise.py +1288 -0
  36. pyopencl/invoker.py +417 -0
  37. pyopencl/ipython_ext.py +70 -0
  38. pyopencl/py.typed +0 -0
  39. pyopencl/reduction.py +829 -0
  40. pyopencl/scan.py +1921 -0
  41. pyopencl/tools.py +1680 -0
  42. pyopencl/typing.py +61 -0
  43. pyopencl/version.py +11 -0
  44. pyopencl-2026.1.1.dist-info/METADATA +108 -0
  45. pyopencl-2026.1.1.dist-info/RECORD +47 -0
  46. pyopencl-2026.1.1.dist-info/WHEEL +6 -0
  47. pyopencl-2026.1.1.dist-info/licenses/LICENSE +104 -0
pyopencl/array.py ADDED
@@ -0,0 +1,3530 @@
1
+ """CL device arrays."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
7
+
8
+ __license__ = """
9
+ Permission is hereby granted, free of charge, to any person
10
+ obtaining a copy of this software and associated documentation
11
+ files (the "Software"), to deal in the Software without
12
+ restriction, including without limitation the rights to use,
13
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the
15
+ Software is furnished to do so, subject to the following
16
+ conditions:
17
+
18
+ The above copyright notice and this permission notice shall be
19
+ included in all copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28
+ OTHER DEALINGS IN THE SOFTWARE.
29
+ """
30
+
31
+ import builtins
32
+ from dataclasses import dataclass
33
+ from functools import reduce
34
+ from numbers import Number
35
+ from typing import (
36
+ TYPE_CHECKING,
37
+ Any,
38
+ ClassVar,
39
+ Concatenate,
40
+ Literal,
41
+ ParamSpec,
42
+ TypeAlias,
43
+ TypeVar,
44
+ cast,
45
+ )
46
+ from warnings import warn
47
+
48
+ import numpy as np
49
+ from typing_extensions import Self, TypeIs, override
50
+
51
+ import pyopencl as cl
52
+ import pyopencl.elementwise as elementwise
53
+ from pyopencl import cltypes
54
+ from pyopencl.characterize import has_double_support
55
+ from pyopencl.compyte.array import (
56
+ ArrayFlags as _ArrayFlags,
57
+ as_strided as _as_strided,
58
+ c_contiguous_strides as _c_contiguous_strides,
59
+ equal_strides as _equal_strides,
60
+ f_contiguous_strides as _f_contiguous_strides,
61
+ )
62
+ from pyopencl.typing import Allocator
63
+
64
+
65
+ if TYPE_CHECKING:
66
+ from collections.abc import Callable, Hashable
67
+
68
+ from numpy.typing import DTypeLike, NDArray
69
+
70
+
71
+ SCALAR_CLASSES = (Number, np.bool_, bool)
72
+
73
+ if cl.get_cl_header_version() >= (2, 0):
74
+ _SVMPointer_or_nothing = cl.SVMPointer
75
+ else:
76
+ _SVMPointer_or_nothing = ()
77
+
78
+
79
+ ArrayT = TypeVar("ArrayT", bound="Array")
80
+
81
+
82
+ class _NoValue:
83
+ pass
84
+
85
+
86
+ # {{{ _get_common_dtype
87
+
88
+ class DoubleDowncastWarning(UserWarning):
89
+ pass
90
+
91
+
92
+ _DOUBLE_DOWNCAST_WARNING = (
93
+ "The operation you requested would result in a double-precision "
94
+ "quantity according to numpy semantics. Since your device does not "
95
+ "support double precision, a single-precision quantity is being returned.")
96
+
97
+
98
+ def _get_common_dtype(obj1, obj2, queue):
99
+ if queue is None:
100
+ raise ValueError("PyOpenCL array has no queue; call .with_queue() to "
101
+ "add one in order to be able to perform operations")
102
+
103
+ # Note: We are calling np.result_type with pyopencl arrays here.
104
+ # Luckily, np.result_type only looks at the dtype of input arrays up until
105
+ # at least numpy v2.1.
106
+ result = np.result_type(obj1, obj2)
107
+
108
+ if not has_double_support(queue.device):
109
+ if result == np.float64:
110
+ result = np.dtype(np.float32)
111
+ warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
112
+ elif result == np.complex128:
113
+ result = np.dtype(np.complex64)
114
+ warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
115
+
116
+ return result
117
+
118
+ # }}}
119
+
120
+
121
+ # {{{ _get_truedivide_dtype
122
+
123
+ def _get_truedivide_dtype(obj1, obj2, queue):
124
+ # the dtype of the division result obj1 / obj2
125
+
126
+ allow_double = has_double_support(queue.device)
127
+
128
+ x1 = obj1 if np.isscalar(obj1) else np.ones(1, obj1.dtype)
129
+ x2 = obj2 if np.isscalar(obj2) else np.ones(1, obj2.dtype)
130
+
131
+ result = (x1/x2).dtype
132
+
133
+ if not allow_double:
134
+ if result == np.float64:
135
+ result = np.dtype(np.float32)
136
+ elif result == np.complex128:
137
+ result = np.dtype(np.complex64)
138
+
139
+ return result
140
+
141
+ # }}}
142
+
143
+
144
+ # {{{ _get_broadcasted_binary_op_result
145
+
146
+ def _get_broadcasted_binary_op_result(obj1, obj2, cq,
147
+ dtype_getter=_get_common_dtype):
148
+
149
+ if obj1.shape == obj2.shape:
150
+ return obj1._new_like_me(dtype_getter(obj1, obj2, cq),
151
+ cq)
152
+ elif obj1.shape == ():
153
+ return obj2._new_like_me(dtype_getter(obj1, obj2, cq),
154
+ cq)
155
+ elif obj2.shape == ():
156
+ return obj1._new_like_me(dtype_getter(obj1, obj2, cq),
157
+ cq)
158
+ else:
159
+ raise NotImplementedError("Broadcasting binary operator with shapes:"
160
+ f" {obj1.shape}, {obj2.shape}.")
161
+
162
+ # }}}
163
+
164
+
165
+ # {{{ VecLookupWarner
166
+
167
+ class VecLookupWarner:
168
+ def __getattr__(self, name):
169
+ warn("pyopencl.array.vec is deprecated. "
170
+ "Please use pyopencl.cltypes for OpenCL vector and scalar types",
171
+ DeprecationWarning, stacklevel=2)
172
+
173
+ if name == "types":
174
+ name = "vec_types"
175
+ elif name == "type_to_scalar_and_count":
176
+ name = "vec_type_to_scalar_and_count"
177
+
178
+ return getattr(cltypes, name)
179
+
180
+
181
+ vec = VecLookupWarner()
182
+
183
+ # }}}
184
+
185
+
186
+ # {{{ helper functionality
187
+
188
+ def _splay(
189
+ device: cl.Device,
190
+ n: int,
191
+ kernel_specific_max_wg_size: int | None = None,
192
+ ):
193
+ max_work_items = builtins.min(128, device.max_work_group_size)
194
+
195
+ if kernel_specific_max_wg_size is not None:
196
+ max_work_items = builtins.min(max_work_items, kernel_specific_max_wg_size)
197
+
198
+ min_work_items = builtins.min(32, max_work_items)
199
+ max_groups = device.max_compute_units * 4 * 8
200
+ # 4 to overfill the device
201
+ # 8 is an Nvidia constant--that's how many
202
+ # groups fit onto one compute device
203
+
204
+ if n < min_work_items:
205
+ group_count = 1
206
+ work_items_per_group = min_work_items
207
+ elif n < (max_groups * min_work_items):
208
+ group_count = (n + min_work_items - 1) // min_work_items
209
+ work_items_per_group = min_work_items
210
+ elif n < (max_groups * max_work_items):
211
+ group_count = max_groups
212
+ grp = (n + min_work_items - 1) // min_work_items
213
+ work_items_per_group = (
214
+ (grp + max_groups - 1) // max_groups) * min_work_items
215
+ else:
216
+ group_count = max_groups
217
+ work_items_per_group = max_work_items
218
+
219
+ # print(f"n:{n} gc:{group_count} wipg:{work_items_per_group}")
220
+ return (group_count*work_items_per_group,), (work_items_per_group,)
221
+
222
+
223
+ # deliberately undocumented for now
224
+ ARRAY_KERNEL_EXEC_HOOK = None
225
+
226
+
227
+ P = ParamSpec("P")
228
+
229
+
230
+ def elwise_kernel_runner(
231
+ kernel_getter: Callable[Concatenate[Array, P], cl.Kernel]
232
+ ) -> Callable[Concatenate[Array, P], cl.Event]:
233
+ """Take a kernel getter of the same signature as the kernel
234
+ and return a function that invokes that kernel.
235
+
236
+ Assumes that the zeroth entry in *args* is an :class:`Array`.
237
+ """
238
+ from functools import wraps
239
+
240
+ @wraps(kernel_getter)
241
+ def kernel_runner(out: Array, *args: P.args, **kwargs: P.kwargs) -> cl.Event:
242
+ assert isinstance(out, Array)
243
+
244
+ wait_for = cast("cl.WaitList", kwargs.pop("wait_for", None))
245
+ queue = cast("cl.CommandQueue | None", kwargs.pop("queue", None))
246
+ if queue is None:
247
+ queue = out.queue
248
+
249
+ assert queue is not None
250
+
251
+ knl = kernel_getter(out, *args, **kwargs)
252
+ gs, ls = out._get_sizes(queue, knl.get_work_group_info(
253
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
254
+ queue.device))
255
+
256
+ knl_args = (out, *args, out.size)
257
+ if ARRAY_KERNEL_EXEC_HOOK is not None:
258
+ return ARRAY_KERNEL_EXEC_HOOK(
259
+ knl, queue, gs, ls, *knl_args, wait_for=wait_for)
260
+ else:
261
+ return knl(queue, gs, ls, *knl_args, wait_for=wait_for)
262
+
263
+ return kernel_runner
264
+
265
+ # }}}
266
+
267
+
268
+ # {{{ array class
269
+
270
+ class InconsistentOpenCLQueueWarning(UserWarning):
271
+ pass
272
+
273
+
274
+ class ArrayHasOffsetError(ValueError):
275
+ """
276
+ .. versionadded:: 2013.1
277
+ """
278
+
279
+ def __init__(self, val="The operation you are attempting does not yet "
280
+ "support arrays that start at an offset from the beginning "
281
+ "of their buffer."):
282
+ ValueError.__init__(self, val)
283
+
284
+
285
+ class _copy_queue: # noqa: N801
286
+ pass
287
+
288
+
289
+ _ARRAY_GET_SIZES_CACHE: \
290
+ dict[Hashable, tuple[tuple[int, ...], tuple[int, ...]]] = {}
291
+ _BOOL_DTYPE = np.dtype(np.int8)
292
+ _NOT_PRESENT = object()
293
+
294
+ ScalarLike: TypeAlias = int | float | complex | np.number[Any]
295
+
296
+
297
+ def _is_scalar(s: object) -> TypeIs[ScalarLike]:
298
+ return isinstance(s, SCALAR_CLASSES)
299
+
300
+
301
+ class Array:
302
+ """A :class:`numpy.ndarray` work-alike that stores its data and performs
303
+ its computations on the compute device. :attr:`shape` and :attr:`dtype` work
304
+ exactly as in :mod:`numpy`. Arithmetic methods in :class:`Array` support the
305
+ broadcasting of scalars. (e.g. ``array + 5``).
306
+
307
+ *cq* must be a :class:`~pyopencl.CommandQueue` or a :class:`~pyopencl.Context`.
308
+
309
+ If it is a queue, *cq* specifies the queue in which the array carries out
310
+ its computations by default. If a default queue (and thereby overloaded
311
+ operators and many other niceties) are not desired, pass a
312
+ :class:`~pyopencl.Context`.
313
+
314
+ *allocator* may be *None* or a callable that, upon being called with an
315
+ argument of the number of bytes to be allocated, returns a
316
+ :class:`pyopencl.Buffer` object. (A :class:`pyopencl.tools.MemoryPool`
317
+ instance is one useful example of an object to pass here.)
318
+
319
+ .. versionchanged:: 2011.1
320
+
321
+ Renamed *context* to *cqa*, made it general-purpose.
322
+
323
+ All arguments beyond *order* should be considered keyword-only.
324
+
325
+ .. versionchanged:: 2015.2
326
+
327
+ Renamed *context* to *cq*, disallowed passing allocators through it.
328
+
329
+ .. attribute :: data
330
+
331
+ The :class:`pyopencl.MemoryObject` instance created for the memory that
332
+ backs this :class:`Array`.
333
+
334
+ .. versionchanged:: 2013.1
335
+
336
+ If a non-zero :attr:`offset` has been specified for this array,
337
+ this will fail with :exc:`ArrayHasOffsetError`.
338
+
339
+ .. attribute :: base_data
340
+
341
+ The :class:`pyopencl.MemoryObject` instance created for the memory that
342
+ backs this :class:`Array`. Unlike :attr:`data`, the base address of
343
+ *base_data* is allowed to be different from the beginning of the array.
344
+ The actual beginning is the base address of *base_data* plus
345
+ :attr:`offset` bytes.
346
+
347
+ Unlike :attr:`data`, retrieving :attr:`base_data` always succeeds.
348
+
349
+ .. versionadded:: 2013.1
350
+
351
+ .. attribute :: offset
352
+
353
+ See :attr:`base_data`.
354
+
355
+ .. versionadded:: 2013.1
356
+
357
+ .. attribute :: shape
358
+
359
+ A tuple of lengths of each dimension in the array.
360
+
361
+ .. attribute :: ndim
362
+
363
+ The number of dimensions in :attr:`shape`.
364
+
365
+ .. attribute :: dtype
366
+
367
+ The :class:`numpy.dtype` of the items in the GPU array.
368
+
369
+ .. attribute :: size
370
+
371
+ The number of meaningful entries in the array. Can also be computed by
372
+ multiplying up the numbers in :attr:`shape`.
373
+
374
+ .. attribute :: nbytes
375
+
376
+ The size of the entire array in bytes. Computed as :attr:`size` times
377
+ ``dtype.itemsize``.
378
+
379
+ .. attribute :: strides
380
+
381
+ A tuple of bytes to step in each dimension when traversing an array.
382
+
383
+ .. attribute :: flags
384
+
385
+ An object with attributes ``c_contiguous``, ``f_contiguous`` and
386
+ ``forc``, which may be used to query contiguity properties in analogy to
387
+ :attr:`numpy.ndarray.flags`.
388
+
389
+ .. rubric:: Methods
390
+
391
+ .. automethod :: with_queue
392
+
393
+ .. automethod :: __len__
394
+ .. automethod :: reshape
395
+ .. automethod :: ravel
396
+ .. automethod :: view
397
+ .. automethod :: squeeze
398
+ .. automethod :: transpose
399
+ .. attribute :: T
400
+ .. automethod :: set
401
+ .. automethod :: get
402
+ .. automethod :: get_async
403
+ .. automethod :: copy
404
+
405
+ .. automethod :: __str__
406
+ .. automethod :: __repr__
407
+
408
+ .. automethod :: mul_add
409
+ .. automethod :: __add__
410
+ .. automethod :: __sub__
411
+ .. automethod :: __iadd__
412
+ .. automethod :: __isub__
413
+ .. automethod :: __pos__
414
+ .. automethod :: __neg__
415
+ .. automethod :: __mul__
416
+ .. automethod :: __div__
417
+ .. automethod :: __rdiv__
418
+ .. automethod :: __pow__
419
+
420
+ .. automethod :: __and__
421
+ .. automethod :: __xor__
422
+ .. automethod :: __or__
423
+ .. automethod :: __iand__
424
+ .. automethod :: __ixor__
425
+ .. automethod :: __ior__
426
+
427
+ .. automethod :: __abs__
428
+ .. automethod :: __invert__
429
+
430
+ .. UNDOC reverse()
431
+
432
+ .. automethod :: fill
433
+
434
+ .. automethod :: astype
435
+
436
+ .. autoattribute :: real
437
+ .. autoattribute :: imag
438
+ .. automethod :: conj
439
+ .. automethod :: conjugate
440
+
441
+ .. automethod :: __getitem__
442
+ .. automethod :: __setitem__
443
+
444
+ .. automethod :: setitem
445
+
446
+ .. automethod :: map_to_host
447
+
448
+ .. rubric:: Comparisons, conditionals, any, all
449
+
450
+ .. versionadded:: 2013.2
451
+
452
+ Boolean arrays are stored as :class:`numpy.int8` because ``bool``
453
+ has an unspecified size in the OpenCL spec.
454
+
455
+ .. automethod :: __bool__
456
+
457
+ Only works for device scalars. (i.e. "arrays" with ``shape == ()``)
458
+
459
+ .. automethod :: any
460
+ .. automethod :: all
461
+
462
+ .. automethod :: __eq__
463
+ .. automethod :: __ne__
464
+ .. automethod :: __lt__
465
+ .. automethod :: __le__
466
+ .. automethod :: __gt__
467
+ .. automethod :: __ge__
468
+
469
+ .. rubric:: Event management
470
+
471
+ If an array is used from within an out-of-order queue, it needs to take
472
+ care of its own operation ordering. The facilities in this section make
473
+ this possible.
474
+
475
+ .. versionadded:: 2014.1.1
476
+
477
+ .. attribute:: events
478
+
479
+ A list of :class:`pyopencl.Event` instances that the current content of
480
+ this array depends on. User code may read, but should never modify this
481
+ list directly. To update this list, instead use the following methods.
482
+
483
+ .. automethod:: add_event
484
+ .. automethod:: finish
485
+ """
486
+
487
+ __array_priority__: ClassVar[int] = 100
488
+
489
+ def __init__(
490
+ self,
491
+ cq: cl.Context | cl.CommandQueue | None,
492
+ shape: tuple[int, ...] | int,
493
+ dtype: DTypeLike,
494
+ order: str = "C",
495
+ allocator: Allocator | None = None,
496
+ data: Any = None,
497
+ offset: int = 0,
498
+ strides: tuple[int, ...] | None = None,
499
+ events: list[cl.Event] | None = None,
500
+
501
+ # NOTE: following args are used for the fast constructor
502
+ _flags: Any = None,
503
+ _fast: bool = False,
504
+ _size: int | None = None,
505
+ _context: cl.Context | None = None,
506
+ _queue: cl.CommandQueue | None = None) -> None:
507
+ if _fast:
508
+ # Assumptions, should be disabled if not testing
509
+ if TYPE_CHECKING:
510
+ assert cq is None
511
+ assert isinstance(_context, cl.Context)
512
+ assert _queue is None or isinstance(_queue, cl.CommandQueue)
513
+ assert isinstance(shape, tuple)
514
+ assert isinstance(strides, tuple)
515
+ assert isinstance(dtype, np.dtype)
516
+ assert _size is not None
517
+
518
+ size = _size
519
+ context = _context
520
+ queue = _queue
521
+ alloc_nbytes = dtype.itemsize * size
522
+
523
+ else:
524
+ # {{{ backward compatibility
525
+
526
+ if cq is None:
527
+ context = _context
528
+ queue = _queue
529
+
530
+ elif isinstance(cq, cl.CommandQueue):
531
+ queue = cq
532
+ context = queue.context
533
+
534
+ elif isinstance(cq, cl.Context):
535
+ context = cq
536
+ queue = None
537
+
538
+ else:
539
+ raise TypeError(
540
+ f"cq may be a queue or a context, not '{type(cq).__name__}'")
541
+
542
+ if allocator is not None:
543
+ # "is" would be wrong because two Python objects are allowed
544
+ # to hold handles to the same context.
545
+
546
+ # FIXME It would be nice to check this. But it would require
547
+ # changing the allocator interface. Trust the user for now.
548
+
549
+ # assert allocator.context == context
550
+ pass
551
+
552
+ # Queue-less arrays do have a purpose in life.
553
+ # They don't do very much, but at least they don't run kernels
554
+ # in random queues.
555
+ #
556
+ # See also :meth:`with_queue`.
557
+
558
+ del cq
559
+
560
+ # }}}
561
+
562
+ # invariant here: allocator, queue set
563
+
564
+ # {{{ determine shape, size, and strides
565
+
566
+ dtype = np.dtype(dtype)
567
+
568
+ try:
569
+ shape = tuple(shape)
570
+ except TypeError as err:
571
+ if not isinstance(shape, (int, np.integer)):
572
+ raise TypeError(
573
+ "shape must either be iterable or castable to an integer: "
574
+ f"got a '{type(shape).__name__}'") from err
575
+
576
+ shape = (shape,)
577
+
578
+ shape_array = np.array(shape)
579
+
580
+ # Previously, the size was computed as
581
+ # "size = 1; size *= dim for dim in shape"
582
+ # However this can fail when using certain data types,
583
+ # eg numpy.uint64(1) * 2 returns 2.0 !
584
+ if np.any(shape_array < 0):
585
+ raise ValueError(f"negative dimensions are not allowed: {shape}")
586
+ if np.any([np.array([s]).dtype.kind not in ["u", "i"] for s in shape]):
587
+ raise ValueError(
588
+ f"invalid shape {shape} (all dimensions must be integers)")
589
+ size = np.prod(shape_array, dtype=np.uint64).item()
590
+
591
+ if strides is None:
592
+ if order in "cC":
593
+ # inlined from compyte.array.c_contiguous_strides
594
+ if shape:
595
+ strides_tmp = [dtype.itemsize]
596
+ for s in shape[:0:-1]:
597
+ # NOTE: https://github.com/inducer/compyte/pull/36
598
+ strides_tmp.append(strides_tmp[-1]*builtins.max(1, s))
599
+ strides = tuple(strides_tmp[::-1])
600
+ else:
601
+ strides = ()
602
+ elif order in "fF":
603
+ strides = _f_contiguous_strides(dtype.itemsize, shape)
604
+ else:
605
+ raise ValueError(f"invalid order: {order}")
606
+
607
+ else:
608
+ # FIXME: We should possibly perform some plausibility
609
+ # checking on 'strides' here.
610
+
611
+ strides = tuple(strides)
612
+
613
+ # }}}
614
+
615
+ assert dtype != object, \
616
+ "object arrays on the compute device are not allowed" # noqa: E721
617
+ assert isinstance(shape, tuple)
618
+ assert isinstance(strides, tuple)
619
+
620
+ alloc_nbytes = dtype.itemsize * size
621
+
622
+ if alloc_nbytes < 0:
623
+ raise ValueError("cannot allocate CL buffer with negative size")
624
+
625
+ base_data = None
626
+ if data is None:
627
+ if alloc_nbytes == 0:
628
+ base_data = None
629
+
630
+ else:
631
+ if allocator is None:
632
+ if context is None and queue is not None:
633
+ context = queue.context
634
+
635
+ assert context is not None
636
+ base_data = cl.Buffer(
637
+ context, cl.mem_flags.READ_WRITE, alloc_nbytes)
638
+ else:
639
+ base_data = allocator(alloc_nbytes)
640
+ else:
641
+ base_data = data
642
+
643
+ self.queue: cl.CommandQueue | None = queue
644
+ self.context: cl.Context | None = context
645
+ self.shape: tuple[int, ...] = shape
646
+ self.dtype: np.dtype[Any] = dtype
647
+ self.strides: tuple[int, ...] = strides
648
+ self.events: list[cl.Event] = [] if events is None else events
649
+ self.nbytes: int = alloc_nbytes
650
+ self.size: int = size
651
+ self.allocator: Allocator | None = allocator
652
+ self.base_data: cl.MemoryObjectHolder | cl.SVMPointer | None = base_data
653
+ self.offset: int = offset
654
+
655
+ self._flags: _ArrayFlags | None = _flags
656
+
657
+ if __debug__:
658
+ if queue is not None and isinstance(
659
+ self.base_data, _SVMPointer_or_nothing):
660
+ mem_queue = getattr(self.base_data, "_queue", _NOT_PRESENT)
661
+ if mem_queue is not _NOT_PRESENT and mem_queue != queue:
662
+ warn("Array has different queue from backing SVM memory. "
663
+ "This may lead to the array getting deallocated sooner "
664
+ "than expected, potentially leading to crashes.",
665
+ InconsistentOpenCLQueueWarning, stacklevel=2)
666
+
667
+ @property
668
+ def ndim(self) -> int:
669
+ return len(self.shape)
670
+
671
+ @property
672
+ def data(self) -> cl.MemoryObjectHolder | cl.SVMPointer | None:
673
+ if self.offset:
674
+ raise ArrayHasOffsetError()
675
+ else:
676
+ return self.base_data
677
+
678
+ @property
679
+ def flags(self):
680
+ f = self._flags
681
+ if f is None:
682
+ self._flags = f = _ArrayFlags(self)
683
+
684
+ return f
685
+
686
+ def _new_with_changes(self,
687
+ data: cl.MemoryObjectHolder | cl.SVMPointer | None,
688
+ offset: int | None,
689
+ shape: tuple[int, ...] | None = None,
690
+ dtype: np.dtype[Any] | None = None,
691
+ strides: tuple[int, ...] | None = None,
692
+ queue: cl.CommandQueue | type[_copy_queue] | None = _copy_queue,
693
+ allocator: Allocator | None = None,
694
+ ) -> Self:
695
+ """
696
+ :arg data: *None* means allocate a new array.
697
+ """
698
+ fast = True
699
+ size = self.size
700
+ if shape is None:
701
+ shape = self.shape
702
+ else:
703
+ fast = False
704
+ size = None
705
+
706
+ if dtype is None:
707
+ dtype = self.dtype
708
+ if strides is None:
709
+ strides = self.strides
710
+ if queue is _copy_queue:
711
+ queue = self.queue
712
+ if allocator is None:
713
+ allocator = self.allocator
714
+ if offset is None:
715
+ offset = self.offset
716
+
717
+ # If we're allocating new data, then there's not likely to be
718
+ # a data dependency. Otherwise, the two arrays should probably
719
+ # share the same events list.
720
+
721
+ if data is None:
722
+ events = None
723
+ else:
724
+ events = self.events
725
+
726
+ return self.__class__(None, shape, dtype, allocator=allocator,
727
+ strides=strides, data=data, offset=offset,
728
+ events=events,
729
+ _fast=fast, _context=self.context, _queue=queue, _size=size)
730
+
731
+ def with_queue(self, queue: cl.CommandQueue | None):
732
+ """Return a copy of *self* with the default queue set to *queue*.
733
+
734
+ *None* is allowed as a value for *queue*.
735
+
736
+ .. versionadded:: 2013.1
737
+ """
738
+
739
+ if queue is not None:
740
+ assert queue.context == self.context
741
+
742
+ return self._new_with_changes(self.base_data, self.offset,
743
+ queue=queue)
744
+
745
+ def _get_sizes(self,
746
+ queue: cl.CommandQueue,
747
+ kernel_specific_max_wg_size: int | None = None
748
+ ) -> tuple[tuple[int, ...], tuple[int, ...]]:
749
+ if not self.flags.forc:
750
+ raise NotImplementedError("cannot operate on non-contiguous array")
751
+ cache_key = (queue.device.int_ptr, self.size, kernel_specific_max_wg_size)
752
+ try:
753
+ return _ARRAY_GET_SIZES_CACHE[cache_key]
754
+ except KeyError:
755
+ sizes = _splay(queue.device, self.size,
756
+ kernel_specific_max_wg_size=kernel_specific_max_wg_size)
757
+ _ARRAY_GET_SIZES_CACHE[cache_key] = sizes
758
+ return sizes
759
+
760
+ def set(self,
761
+ ary: NDArray[Any],
762
+ queue: cl.CommandQueue | None = None,
763
+ async_: bool = False,
764
+ ):
765
+ """Transfer the contents the :class:`numpy.ndarray` object *ary*
766
+ onto the device.
767
+
768
+ *ary* must have the same dtype and size (not necessarily shape) as
769
+ *self*.
770
+
771
+ *async_* is a Boolean indicating whether the function is allowed
772
+ to return before the transfer completes. To avoid synchronization
773
+ bugs, this defaults to *False*.
774
+ """
775
+
776
+ assert ary.size == self.size
777
+ assert ary.dtype == self.dtype
778
+
779
+ if not ary.flags.forc:
780
+ raise RuntimeError("cannot set from non-contiguous array")
781
+
782
+ if not _equal_strides(ary.strides, self.strides, self.shape):
783
+ raise RuntimeError("Setting array from one with different "
784
+ "strides/storage order.")
785
+
786
+ if self.size:
787
+ queue = queue or self.queue
788
+ assert queue is not None
789
+ event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
790
+ dst_offset=self.offset,
791
+ is_blocking=not async_)
792
+
793
+ self.add_event(event1)
794
+
795
+ def _get(self,
796
+ queue: cl.CommandQueue | None = None,
797
+ ary: NDArray[Any] | None = None,
798
+ async_: bool = False,
799
+ ):
800
+ if ary is None:
801
+ ary = np.empty(self.shape, self.dtype)
802
+
803
+ if self.strides != ary.strides:
804
+ ary = _as_strided(ary, strides=self.strides)
805
+ else:
806
+ if ary.size != self.size:
807
+ raise TypeError("'ary' has non-matching size")
808
+ if ary.dtype != self.dtype:
809
+ raise TypeError("'ary' has non-matching type")
810
+
811
+ if self.shape != ary.shape:
812
+ warn("get() between arrays of different shape is deprecated "
813
+ "and will be removed in PyCUDA 2017.x",
814
+ DeprecationWarning, stacklevel=2)
815
+
816
+ assert self.flags.forc, "Array in get() must be contiguous"
817
+
818
+ queue = queue or self.queue
819
+ if queue is None:
820
+ raise ValueError("Cannot copy array to host. "
821
+ "Array has no queue. Use "
822
+ "'new_array = array.with_queue(queue)' "
823
+ "to associate one.")
824
+
825
+ if self.size:
826
+ assert self.base_data is not None
827
+ event1 = cast("cl.Event", cl.enqueue_copy(queue, ary, self.base_data,
828
+ src_offset=self.offset,
829
+ wait_for=self.events, is_blocking=not async_))
830
+
831
+ self.add_event(event1)
832
+ else:
833
+ event1 = cl.enqueue_marker(queue, wait_for=self.events)
834
+ if not async_:
835
+ event1.wait()
836
+
837
+ return ary, event1
838
+
839
+ def get(self,
840
+ queue: cl.CommandQueue | None = None,
841
+ ary: NDArray[Any] | None = None,
842
+ ) -> NDArray[Any]:
843
+ """Transfer the contents of *self* into *ary* or a newly allocated
844
+ :class:`numpy.ndarray`. If *ary* is given, it must have the same
845
+ shape and dtype.
846
+ """
847
+
848
+ ary, _event1 = self._get(queue=queue, ary=ary)
849
+
850
+ return ary
851
+
852
+ def get_async(self,
853
+ queue: cl.CommandQueue | None = None,
854
+ ary: NDArray[Any] | None = None,
855
+ ) -> tuple[NDArray[Any], cl.Event]:
856
+ """
857
+ Asynchronous version of :meth:`get` which returns a tuple ``(ary, event)``
858
+ containing the host array ``ary``
859
+ and the :class:`pyopencl.NannyEvent` ``event`` returned by
860
+ :meth:`pyopencl.enqueue_copy`.
861
+
862
+ .. versionadded:: 2019.1.2
863
+ """
864
+
865
+ return self._get(queue=queue, ary=ary, async_=True)
866
+
867
+ def copy(self, queue: cl.CommandQueue | type[_copy_queue] | None = _copy_queue):
868
+ """
869
+ :arg queue: The :class:`~pyopencl.CommandQueue` for the returned array.
870
+
871
+ .. versionchanged:: 2017.1.2
872
+
873
+ Updates the queue of the returned array.
874
+
875
+ .. versionadded:: 2013.1
876
+ """
877
+
878
+ if queue is _copy_queue:
879
+ queue_san = self.queue
880
+ else:
881
+ queue_san = cast("cl.CommandQueue | None", queue)
882
+
883
+ result = self._new_like_me(queue=queue_san)
884
+
885
+ # result.queue won't be the same as queue if queue is None.
886
+ # We force them to be the same here.
887
+ if result.queue is not queue:
888
+ result = result.with_queue(queue_san)
889
+
890
+ if not self.flags.forc:
891
+ raise RuntimeError("cannot copy non-contiguous array")
892
+
893
+ if self.nbytes:
894
+ queue_san = queue_san or self.queue
895
+ assert queue_san is not None
896
+ event1 = cl.enqueue_copy(queue_san,
897
+ result.base_data, self.base_data,
898
+ src_offset=self.offset, byte_count=self.nbytes,
899
+ wait_for=self.events)
900
+ result.add_event(event1)
901
+
902
+ return result
903
+
904
+ @override
905
+ def __str__(self) -> str:
906
+ if self.queue is None:
907
+ return (f"<cl.{type(self).__name__} {self.shape} of {self.dtype} "
908
+ "without queue, call with_queue()>")
909
+
910
+ return str(self.get())
911
+
912
+ @override
913
+ def __repr__(self) -> str:
914
+ if self.queue is None:
915
+ return (f"<cl.{type(self).__name__} {self.shape} of {self.dtype} "
916
+ f"at {id(self):x} without queue, call with_queue()>")
917
+
918
+ result = repr(self.get())
919
+ if result[:5] == "array":
920
+ result = f"cl.{type(self).__name__}" + result[5:]
921
+ else:
922
+ warn(
923
+ f"{type(result).__name__}.__repr__ was expected to return a "
924
+ f"string starting with 'array', got '{result[:10]!r}'",
925
+ stacklevel=2)
926
+
927
+ return result
928
+
929
+ def safely_stringify_for_pudb(self) -> str:
930
+ return f"cl.{type(self).__name__} {self.dtype} {self.shape}"
931
+
932
+ @override
933
+ def __hash__(self) -> int:
934
+ raise TypeError("pyopencl arrays are not hashable.")
935
+
936
+ # {{{ kernel invocation wrappers
937
+
938
+ @staticmethod
939
+ @elwise_kernel_runner
940
+ def _axpbyz(out: Array,
941
+ a: ScalarLike,
942
+ x: Array,
943
+ b: ScalarLike,
944
+ y: Array,
945
+ queue: cl.CommandQueue | None = None) -> cl.Kernel:
946
+ """Compute ``out = a*x + b*y``,
947
+ where *other* is an array."""
948
+ x_shape = x.shape
949
+ y_shape = y.shape
950
+ out_shape = out.shape
951
+
952
+ assert out.context is not None
953
+ assert (x_shape == y_shape == out_shape
954
+ or (x_shape == () and y_shape == out_shape)
955
+ or (y_shape == () and x_shape == out_shape))
956
+
957
+ return elementwise.get_axpbyz_kernel(
958
+ out.context, x.dtype, y.dtype, out.dtype,
959
+ x_is_scalar=(x_shape == ()),
960
+ y_is_scalar=(y_shape == ()))
961
+
962
+ @staticmethod
963
+ @elwise_kernel_runner
964
+ def _axpbz(out: Array,
965
+ a: ScalarLike,
966
+ x: Array,
967
+ b: ScalarLike,
968
+ queue: cl.CommandQueue | None = None) -> cl.Kernel:
969
+ """Compute ``z = a * x + b``, where *b* is a scalar."""
970
+ assert out.shape == x.shape
971
+ assert out.context is not None
972
+
973
+ return elementwise.get_axpbz_kernel(
974
+ out.context,
975
+ np.array(a).dtype, x.dtype, np.array(b).dtype, out.dtype)
976
+
977
+ @staticmethod
978
+ @elwise_kernel_runner
979
+ def _elwise_multiply(out: Array,
980
+ a: Array,
981
+ b: Array,
982
+ queue: cl.CommandQueue | None = None) -> cl.Kernel:
983
+ a_shape = a.shape
984
+ b_shape = b.shape
985
+ out_shape = out.shape
986
+
987
+ assert (a_shape == b_shape == out_shape
988
+ or (a_shape == () and b_shape == out_shape)
989
+ or (b_shape == () and a_shape == out_shape))
990
+ assert a.context is not None
991
+
992
+ return elementwise.get_multiply_kernel(
993
+ a.context, a.dtype, b.dtype, out.dtype,
994
+ x_is_scalar=(a_shape == ()),
995
+ y_is_scalar=(b_shape == ())
996
+ )
997
+
998
+ @staticmethod
999
+ @elwise_kernel_runner
1000
+ def _rdiv_scalar(out: Array,
1001
+ ary: Array,
1002
+ other: ScalarLike,
1003
+ queue: cl.CommandQueue | None = None) -> cl.Kernel:
1004
+ assert out.context is not None
1005
+ assert out.shape == ary.shape
1006
+
1007
+ return elementwise.get_rdivide_elwise_kernel(
1008
+ out.context, ary.dtype, np.array(other).dtype, out.dtype)
1009
+
1010
+ @staticmethod
1011
+ @elwise_kernel_runner
1012
+ def _div(out: Array,
1013
+ self: Array,
1014
+ other: Array,
1015
+ queue: cl.CommandQueue | None = None) -> cl.Kernel:
1016
+ """Divides an array by another array."""
1017
+ assert (self.shape == other.shape == out.shape
1018
+ or (self.shape == () and other.shape == out.shape)
1019
+ or (other.shape == () and self.shape == out.shape))
1020
+ assert self.context is not None
1021
+
1022
+ return elementwise.get_divide_kernel(self.context,
1023
+ self.dtype, other.dtype, out.dtype,
1024
+ x_is_scalar=(self.shape == ()),
1025
+ y_is_scalar=(other.shape == ()))
1026
+
1027
+ @staticmethod
1028
+ @elwise_kernel_runner
1029
+ def _fill(result: Array, scalar: ScalarLike) -> cl.Kernel:
1030
+ assert result.context is not None
1031
+ return elementwise.get_fill_kernel(result.context, result.dtype)
1032
+
1033
+ @staticmethod
1034
+ @elwise_kernel_runner
1035
+ def _abs(result: Array, arg: Array) -> cl.Kernel:
1036
+ assert arg.context is not None
1037
+
1038
+ if arg.dtype.kind == "c":
1039
+ from pyopencl.elementwise import complex_dtype_to_name
1040
+ fname = f"{complex_dtype_to_name(arg.dtype)}_abs"
1041
+ elif arg.dtype.kind == "f":
1042
+ fname = "fabs"
1043
+ elif arg.dtype.kind in ["u", "i"]:
1044
+ fname = "abs"
1045
+ else:
1046
+ raise TypeError("unsupported dtype in _abs()")
1047
+
1048
+ return elementwise.get_unary_func_kernel(
1049
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1050
+
1051
+ @staticmethod
1052
+ @elwise_kernel_runner
1053
+ def _real(result: Array, arg: Array) -> cl.Kernel:
1054
+ from pyopencl.elementwise import complex_dtype_to_name
1055
+
1056
+ assert arg.context is not None
1057
+ fname = f"{complex_dtype_to_name(arg.dtype)}_real"
1058
+
1059
+ return elementwise.get_unary_func_kernel(
1060
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1061
+
1062
+ @staticmethod
1063
+ @elwise_kernel_runner
1064
+ def _imag(result: Array, arg: Array) -> cl.Kernel:
1065
+ from pyopencl.elementwise import complex_dtype_to_name
1066
+
1067
+ assert arg.context is not None
1068
+ fname = f"{complex_dtype_to_name(arg.dtype)}_imag"
1069
+
1070
+ return elementwise.get_unary_func_kernel(
1071
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1072
+
1073
+ @staticmethod
1074
+ @elwise_kernel_runner
1075
+ def _conj(result: Array, arg: Array) -> cl.Kernel:
1076
+ from pyopencl.elementwise import complex_dtype_to_name
1077
+
1078
+ assert arg.context is not None
1079
+ fname = f"{complex_dtype_to_name(arg.dtype)}_conj"
1080
+
1081
+ return elementwise.get_unary_func_kernel(
1082
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1083
+
1084
+ @staticmethod
1085
+ @elwise_kernel_runner
1086
+ def _pow_scalar(result: Array,
1087
+ ary: Array,
1088
+ exponent: ScalarLike) -> cl.Kernel:
1089
+ assert result.context is not None
1090
+ return elementwise.get_pow_kernel(result.context,
1091
+ ary.dtype, np.array(exponent).dtype, result.dtype,
1092
+ is_base_array=True, is_exp_array=False)
1093
+
1094
+ @staticmethod
1095
+ @elwise_kernel_runner
1096
+ def _rpow_scalar(result: Array,
1097
+ base: ScalarLike,
1098
+ exponent: Array) -> cl.Kernel:
1099
+ assert result.context is not None
1100
+ return elementwise.get_pow_kernel(result.context,
1101
+ np.array(base).dtype, exponent.dtype, result.dtype,
1102
+ is_base_array=False, is_exp_array=True)
1103
+
1104
+ @staticmethod
1105
+ @elwise_kernel_runner
1106
+ def _pow_array(result: Array,
1107
+ base: Array,
1108
+ exponent: Array) -> cl.Kernel:
1109
+ assert result.context is not None
1110
+ return elementwise.get_pow_kernel(
1111
+ result.context, base.dtype, exponent.dtype, result.dtype,
1112
+ is_base_array=True, is_exp_array=True)
1113
+
1114
+ @staticmethod
1115
+ @elwise_kernel_runner
1116
+ def _reverse(result: Array, ary: Array) -> cl.Kernel:
1117
+ assert result.context is not None
1118
+ return elementwise.get_reverse_kernel(result.context, ary.dtype)
1119
+
1120
+ @staticmethod
1121
+ @elwise_kernel_runner
1122
+ def _copy(dest: Array, src: Array) -> cl.Kernel:
1123
+ assert dest.context is not None
1124
+ return elementwise.get_copy_kernel(
1125
+ dest.context, dest.dtype, src.dtype)
1126
+
1127
+ def _new_like_me(self,
1128
+ dtype: DTypeLike | None = None,
1129
+ queue: cl.CommandQueue | None = None) -> Self:
1130
+ if dtype is None:
1131
+ dtype = self.dtype
1132
+ strides = self.strides
1133
+ flags = self.flags
1134
+ fast = True
1135
+ else:
1136
+ strides = None
1137
+ flags = None
1138
+ if dtype == self.dtype:
1139
+ strides = self.strides
1140
+ flags = self.flags
1141
+ fast = True
1142
+ else:
1143
+ fast = False
1144
+
1145
+ queue = queue or self.queue
1146
+ return self.__class__(None, self.shape, dtype,
1147
+ allocator=self.allocator, strides=strides, _flags=flags,
1148
+ _fast=fast,
1149
+ _size=self.size, _queue=queue, _context=self.context)
1150
+
1151
+ @staticmethod
1152
+ @elwise_kernel_runner
1153
+ def _scalar_binop(out: Array, a: Array, b: ScalarLike, op: str,
1154
+ queue: cl.CommandQueue | None = None) -> cl.Kernel:
1155
+ assert out.context is not None
1156
+ return elementwise.get_array_scalar_binop_kernel(
1157
+ out.context, op, out.dtype, a.dtype,
1158
+ np.array(b).dtype)
1159
+
1160
+ @staticmethod
1161
+ @elwise_kernel_runner
1162
+ def _array_binop(out: Array, a: Array, b: Array, op: str,
1163
+ queue: cl.CommandQueue | None = None) -> cl.Kernel:
1164
+ a_shape = a.shape
1165
+ b_shape = b.shape
1166
+ out_shape = out.shape
1167
+
1168
+ assert (a_shape == b_shape == out_shape
1169
+ or (a_shape == () and b_shape == out_shape)
1170
+ or (b_shape == () and a_shape == out_shape))
1171
+ assert out.context is not None
1172
+
1173
+ return elementwise.get_array_binop_kernel(
1174
+ out.context, op, out.dtype, a.dtype, b.dtype,
1175
+ a_is_scalar=(a_shape == ()),
1176
+ b_is_scalar=(b_shape == ()))
1177
+
1178
+ @staticmethod
1179
+ @elwise_kernel_runner
1180
+ def _unop(out: Array, a: Array, op: str,
1181
+ queue: cl.CommandQueue | None = None) -> cl.Kernel:
1182
+ assert out.context is not None
1183
+ if out.shape != a.shape:
1184
+ raise ValueError("shapes of arguments do not match")
1185
+
1186
+ return elementwise.get_unop_kernel(
1187
+ out.context, op, a.dtype, out.dtype)
1188
+
1189
+ # }}}
1190
+
1191
+ # {{{ operators
1192
+
1193
+ def mul_add(self, selffac, other, otherfac, queue: cl.CommandQueue | None = None):
1194
+ """Return ``selffac * self + otherfac * other``.
1195
+ """
1196
+ queue = queue or self.queue
1197
+
1198
+ if isinstance(other, Array):
1199
+ result = _get_broadcasted_binary_op_result(self, other, queue)
1200
+ result.add_event(
1201
+ self._axpbyz(
1202
+ result, selffac, self, otherfac, other,
1203
+ queue=queue))
1204
+ return result
1205
+ elif np.isscalar(other):
1206
+ common_dtype = _get_common_dtype(self, other, queue)
1207
+ result = self._new_like_me(common_dtype, queue=queue)
1208
+ result.add_event(
1209
+ self._axpbz(result, selffac,
1210
+ self, common_dtype.type(otherfac * other),
1211
+ queue=queue))
1212
+ return result
1213
+ else:
1214
+ raise NotImplementedError
1215
+
1216
+ def __add__(self, other) -> Self:
1217
+ """Add an array with an array or an array with a scalar."""
1218
+
1219
+ if isinstance(other, Array):
1220
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1221
+ result.add_event(
1222
+ self._axpbyz(result,
1223
+ self.dtype.type(1), self,
1224
+ other.dtype.type(1), other))
1225
+
1226
+ return result
1227
+ elif np.isscalar(other):
1228
+ if other == 0:
1229
+ return self.copy()
1230
+ else:
1231
+ common_dtype = _get_common_dtype(self, other, self.queue)
1232
+ result = self._new_like_me(common_dtype)
1233
+ result.add_event(
1234
+ self._axpbz(result, self.dtype.type(1),
1235
+ self, common_dtype.type(other)))
1236
+ return result
1237
+ else:
1238
+ return NotImplemented
1239
+
1240
+ __radd__ = __add__
1241
+
1242
+ def __sub__(self, other) -> Self:
1243
+ """Subtract an array from an array or a scalar from an array."""
1244
+
1245
+ if isinstance(other, Array):
1246
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1247
+ result.add_event(
1248
+ self._axpbyz(result,
1249
+ self.dtype.type(1), self,
1250
+ result.dtype.type(-1), other))
1251
+
1252
+ return result
1253
+ elif np.isscalar(other):
1254
+ if other == 0:
1255
+ return self.copy()
1256
+ else:
1257
+ result = self._new_like_me(
1258
+ _get_common_dtype(self, other, self.queue))
1259
+ result.add_event(
1260
+ self._axpbz(result, self.dtype.type(1), self, -other))
1261
+ return result
1262
+ else:
1263
+ return NotImplemented
1264
+
1265
+ def __rsub__(self, other) -> Self:
1266
+ """Subtracts an array by a scalar or an array::
1267
+
1268
+ x = n - self
1269
+ """
1270
+ if np.isscalar(other):
1271
+ common_dtype = _get_common_dtype(self, other, self.queue)
1272
+ result = self._new_like_me(common_dtype)
1273
+ result.add_event(
1274
+ self._axpbz(result, result.dtype.type(-1), self,
1275
+ common_dtype.type(other)))
1276
+
1277
+ return result
1278
+ else:
1279
+ return NotImplemented
1280
+
1281
+ def __iadd__(self, other) -> Self:
1282
+ if isinstance(other, Array):
1283
+ if other.shape != self.shape and other.shape != ():
1284
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1285
+ f" {self.shape}, {other.shape}.")
1286
+ self.add_event(
1287
+ self._axpbyz(self,
1288
+ self.dtype.type(1), self,
1289
+ other.dtype.type(1), other))
1290
+
1291
+ return self
1292
+ elif np.isscalar(other):
1293
+ self.add_event(
1294
+ self._axpbz(self, self.dtype.type(1), self, other))
1295
+ return self
1296
+ else:
1297
+ return NotImplemented
1298
+
1299
+ def __isub__(self, other) -> Self:
1300
+ if isinstance(other, Array):
1301
+ if other.shape != self.shape and other.shape != ():
1302
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1303
+ f" {self.shape}, {other.shape}.")
1304
+ self.add_event(
1305
+ self._axpbyz(self, self.dtype.type(1), self,
1306
+ other.dtype.type(-1), other))
1307
+ return self
1308
+ elif np.isscalar(other):
1309
+ self._axpbz(self, self.dtype.type(1), self, -other)
1310
+ return self
1311
+ else:
1312
+ return NotImplemented
1313
+
1314
+ def __pos__(self) -> Self:
1315
+ return self
1316
+
1317
+ def __neg__(self) -> Self:
1318
+ result = self._new_like_me()
1319
+ result.add_event(self._axpbz(result, -1, self, 0))
1320
+ return result
1321
+
1322
+ def __mul__(self, other) -> Self:
1323
+ if isinstance(other, Array):
1324
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1325
+ result.add_event(
1326
+ self._elwise_multiply(result, self, other))
1327
+ return result
1328
+ elif np.isscalar(other):
1329
+ common_dtype = _get_common_dtype(self, other, self.queue)
1330
+ result = self._new_like_me(common_dtype)
1331
+ result.add_event(
1332
+ self._axpbz(result,
1333
+ common_dtype.type(other), self, self.dtype.type(0)))
1334
+ return result
1335
+ else:
1336
+ return NotImplemented
1337
+
1338
+ def __rmul__(self, other) -> Self:
1339
+ if np.isscalar(other):
1340
+ common_dtype = _get_common_dtype(self, other, self.queue)
1341
+ result = self._new_like_me(common_dtype)
1342
+ result.add_event(
1343
+ self._axpbz(result,
1344
+ common_dtype.type(other), self, self.dtype.type(0)))
1345
+ return result
1346
+ else:
1347
+ return NotImplemented
1348
+
1349
+ def __imul__(self, other) -> Self:
1350
+ if isinstance(other, Array):
1351
+ if other.shape != self.shape and other.shape != ():
1352
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1353
+ f" {self.shape}, {other.shape}.")
1354
+ self.add_event(
1355
+ self._elwise_multiply(self, self, other))
1356
+ return self
1357
+ elif np.isscalar(other):
1358
+ self.add_event(
1359
+ self._axpbz(self, other, self, self.dtype.type(0)))
1360
+ return self
1361
+ else:
1362
+ return NotImplemented
1363
+
1364
+ def __div__(self, other) -> Self:
1365
+ """Divides an array by an array or a scalar, i.e. ``self / other``.
1366
+ """
1367
+ if isinstance(other, Array):
1368
+ result = _get_broadcasted_binary_op_result(
1369
+ self, other, self.queue,
1370
+ dtype_getter=_get_truedivide_dtype)
1371
+ result.add_event(self._div(result, self, other))
1372
+
1373
+ return result
1374
+ elif np.isscalar(other):
1375
+ if other == 1:
1376
+ return self.copy()
1377
+ else:
1378
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1379
+ result = self._new_like_me(common_dtype)
1380
+ result.add_event(
1381
+ self._axpbz(result,
1382
+ np.true_divide(common_dtype.type(1), other),
1383
+ self, self.dtype.type(0)))
1384
+ return result
1385
+ else:
1386
+ return NotImplemented
1387
+
1388
+ __truediv__ = __div__
1389
+
1390
+ def __rdiv__(self, other) -> Self:
1391
+ """Divides an array by a scalar or an array, i.e. ``other / self``.
1392
+ """
1393
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1394
+
1395
+ if isinstance(other, Array):
1396
+ result = self._new_like_me(common_dtype)
1397
+ result.add_event(other._div(result, self))
1398
+ return result
1399
+ elif np.isscalar(other):
1400
+ result = self._new_like_me(common_dtype)
1401
+ result.add_event(
1402
+ self._rdiv_scalar(result, self, common_dtype.type(other)))
1403
+ return result
1404
+ else:
1405
+ return NotImplemented
1406
+
1407
+ __rtruediv__ = __rdiv__
1408
+
1409
+ def __itruediv__(self, other) -> Self:
1410
+ # raise an error if the result cannot be cast to self
1411
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1412
+ if not np.can_cast(common_dtype, self.dtype.type, "same_kind"):
1413
+ raise TypeError(
1414
+ "Cannot cast {!r} to {!r}".format(self.dtype, common_dtype))
1415
+
1416
+ if isinstance(other, Array):
1417
+ if other.shape != self.shape and other.shape != ():
1418
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1419
+ f" {self.shape}, {other.shape}.")
1420
+ self.add_event(
1421
+ self._div(self, self, other))
1422
+ return self
1423
+ elif np.isscalar(other):
1424
+ if other == 1:
1425
+ return self
1426
+ else:
1427
+ self.add_event(
1428
+ self._axpbz(self, common_dtype.type(np.true_divide(1, other)),
1429
+ self, self.dtype.type(0)))
1430
+ return self
1431
+ else:
1432
+ return NotImplemented
1433
+
1434
+ def __and__(self, other) -> Self:
1435
+ common_dtype = _get_common_dtype(self, other, self.queue)
1436
+
1437
+ if not np.issubdtype(common_dtype, np.integer):
1438
+ raise TypeError(f"Integral types only: {common_dtype}")
1439
+
1440
+ if isinstance(other, Array):
1441
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1442
+ result.add_event(self._array_binop(result, self, other, op="&"))
1443
+ return result
1444
+ elif np.isscalar(other):
1445
+ result = self._new_like_me(common_dtype)
1446
+ result.add_event(
1447
+ self._scalar_binop(result, self, other, op="&"))
1448
+ return result
1449
+ else:
1450
+ return NotImplemented
1451
+
1452
+ __rand__ = __and__ # commutes
1453
+
1454
+ def __or__(self, other) -> Self:
1455
+ common_dtype = _get_common_dtype(self, other, self.queue)
1456
+
1457
+ if not np.issubdtype(common_dtype, np.integer):
1458
+ raise TypeError("Integral types only")
1459
+
1460
+ if isinstance(other, Array):
1461
+ result = _get_broadcasted_binary_op_result(self, other,
1462
+ self.queue)
1463
+ result.add_event(self._array_binop(result, self, other, op="|"))
1464
+ return result
1465
+ elif np.isscalar(other):
1466
+ result = self._new_like_me(common_dtype)
1467
+ result.add_event(
1468
+ self._scalar_binop(result, self, other, op="|"))
1469
+ return result
1470
+ else:
1471
+ return NotImplemented
1472
+
1473
+ __ror__ = __or__ # commutes
1474
+
1475
+ def __xor__(self, other) -> Self:
1476
+ common_dtype = _get_common_dtype(self, other, self.queue)
1477
+
1478
+ if not np.issubdtype(common_dtype, np.integer):
1479
+ raise TypeError(f"Integral types only: {common_dtype}")
1480
+
1481
+ if isinstance(other, Array):
1482
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1483
+ result.add_event(self._array_binop(result, self, other, op="^"))
1484
+ return result
1485
+ elif np.isscalar(other):
1486
+ result = self._new_like_me(common_dtype)
1487
+ result.add_event(
1488
+ self._scalar_binop(result, self, other, op="^"))
1489
+ return result
1490
+ else:
1491
+ return NotImplemented
1492
+
1493
+ __rxor__ = __xor__ # commutes
1494
+
1495
+ def __iand__(self, other) -> Self:
1496
+ common_dtype = _get_common_dtype(self, other, self.queue)
1497
+
1498
+ if not np.issubdtype(common_dtype, np.integer):
1499
+ raise TypeError(f"Integral types only: {common_dtype}")
1500
+
1501
+ if isinstance(other, Array):
1502
+ if other.shape != self.shape and other.shape != ():
1503
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1504
+ f" {self.shape}, {other.shape}.")
1505
+ self.add_event(self._array_binop(self, self, other, op="&"))
1506
+ return self
1507
+ elif np.isscalar(other):
1508
+ self.add_event(
1509
+ self._scalar_binop(self, self, other, op="&"))
1510
+ return self
1511
+ else:
1512
+ return NotImplemented
1513
+
1514
+ def __ior__(self, other) -> Self:
1515
+ common_dtype = _get_common_dtype(self, other, self.queue)
1516
+
1517
+ if not np.issubdtype(common_dtype, np.integer):
1518
+ raise TypeError(f"Integral types only: {common_dtype}")
1519
+
1520
+ if isinstance(other, Array):
1521
+ if other.shape != self.shape and other.shape != ():
1522
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1523
+ f" {self.shape}, {other.shape}.")
1524
+ self.add_event(self._array_binop(self, self, other, op="|"))
1525
+ return self
1526
+ elif np.isscalar(other):
1527
+ self.add_event(
1528
+ self._scalar_binop(self, self, other, op="|"))
1529
+ return self
1530
+ else:
1531
+ return NotImplemented
1532
+
1533
+ def __ixor__(self, other) -> Self:
1534
+ common_dtype = _get_common_dtype(self, other, self.queue)
1535
+
1536
+ if not np.issubdtype(common_dtype, np.integer):
1537
+ raise TypeError(f"Integral types only: {common_dtype}")
1538
+
1539
+ if isinstance(other, Array):
1540
+ if other.shape != self.shape and other.shape != ():
1541
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1542
+ f" {self.shape}, {other.shape}.")
1543
+ self.add_event(self._array_binop(self, self, other, op="^"))
1544
+ return self
1545
+ elif np.isscalar(other):
1546
+ self.add_event(
1547
+ self._scalar_binop(self, self, other, op="^"))
1548
+ return self
1549
+ else:
1550
+ return NotImplemented
1551
+
1552
+ def _zero_fill(self,
1553
+ queue: cl.CommandQueue | None = None,
1554
+ wait_for: cl.WaitList = None) -> None:
1555
+ queue = queue or self.queue
1556
+
1557
+ if not self.size:
1558
+ return
1559
+
1560
+ cl_version_gtr_1_2 = (
1561
+ queue._get_cl_version() >= (1, 2)
1562
+ and cl.get_cl_header_version() >= (1, 2)
1563
+ )
1564
+ on_nvidia = queue.device.vendor.startswith("NVIDIA")
1565
+
1566
+ # circumvent bug with large buffers on NVIDIA
1567
+ # https://github.com/inducer/pyopencl/issues/395
1568
+ if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31):
1569
+ self.add_event(
1570
+ cl.enqueue_fill(queue, self.base_data, np.int8(0),
1571
+ self.nbytes, offset=self.offset, wait_for=wait_for))
1572
+ else:
1573
+ zero = np.zeros((), self.dtype)
1574
+ self.fill(zero, queue=queue)
1575
+
1576
+ def fill(self,
1577
+ value: object,
1578
+ queue: cl.CommandQueue | None = None,
1579
+ wait_for: cl.WaitList = None) -> Self:
1580
+ """Fill the array with *scalar*.
1581
+
1582
+ :returns: *self*.
1583
+ """
1584
+
1585
+ self.add_event(
1586
+ self._fill(self, value, queue=queue, wait_for=wait_for))
1587
+
1588
+ return self
1589
+
1590
+ def __len__(self) -> int:
1591
+ """Returns the size of the leading dimension of *self*."""
1592
+ if len(self.shape):
1593
+ return self.shape[0]
1594
+ else:
1595
+ return TypeError("len() of unsized object")
1596
+
1597
+ def __abs__(self) -> Self:
1598
+ """Return an ``Array`` of the absolute values of the elements
1599
+ of *self*.
1600
+ """
1601
+
1602
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1603
+ result.add_event(self._abs(result, self))
1604
+ return result
1605
+
1606
+ def __pow__(self, other) -> Self:
1607
+ """Exponentiation by a scalar or elementwise by another
1608
+ :class:`Array`.
1609
+ """
1610
+
1611
+ if isinstance(other, Array):
1612
+ assert self.shape == other.shape
1613
+
1614
+ result = self._new_like_me(
1615
+ _get_common_dtype(self, other, self.queue))
1616
+ result.add_event(
1617
+ self._pow_array(result, self, other))
1618
+ return result
1619
+ elif np.isscalar(other):
1620
+ result = self._new_like_me(
1621
+ _get_common_dtype(self, other, self.queue))
1622
+ result.add_event(self._pow_scalar(result, self, other))
1623
+ return result
1624
+ else:
1625
+ return NotImplemented
1626
+
1627
+ def __rpow__(self, other) -> Self:
1628
+ if np.isscalar(other):
1629
+ common_dtype = _get_common_dtype(self, other, self.queue)
1630
+ result = self._new_like_me(common_dtype)
1631
+ result.add_event(
1632
+ self._rpow_scalar(result, common_dtype.type(other), self))
1633
+ return result
1634
+ else:
1635
+ return NotImplemented
1636
+
1637
+ def __invert__(self):
1638
+ if not np.issubdtype(self.dtype, np.integer):
1639
+ raise TypeError(f"Integral types only: {self.dtype}")
1640
+
1641
+ result = self._new_like_me()
1642
+ result.add_event(self._unop(result, self, op="~"))
1643
+
1644
+ return result
1645
+
1646
+ # }}}
1647
+
1648
+ def reverse(self, queue: cl.CommandQueue | None = None) -> Self:
1649
+ """Return this array in reversed order. The array is treated
1650
+ as one-dimensional.
1651
+ """
1652
+
1653
+ result = self._new_like_me()
1654
+ result.add_event(self._reverse(result, self))
1655
+ return result
1656
+
1657
+ def astype(self, dtype: DTypeLike, queue: cl.CommandQueue | None = None):
1658
+ """Return a copy of *self*, cast to *dtype*."""
1659
+ if dtype == self.dtype:
1660
+ return self.copy()
1661
+
1662
+ result = self._new_like_me(dtype=dtype)
1663
+ result.add_event(self._copy(result, self, queue=queue))
1664
+ return result
1665
+
1666
+ # {{{ rich comparisons, any, all
1667
+
1668
+ def __bool__(self) -> bool:
1669
+ if self.shape == ():
1670
+ return bool(self.get())
1671
+ else:
1672
+ raise ValueError("The truth value of an array with "
1673
+ "more than one element is ambiguous. Use a.any() or a.all()")
1674
+
1675
+ def any(self,
1676
+ queue: cl.CommandQueue | None = None,
1677
+ wait_for: cl.WaitList = None
1678
+ ) -> Self:
1679
+ from pyopencl.reduction import get_any_kernel
1680
+ krnl = get_any_kernel(self.context, self.dtype)
1681
+ if wait_for is None:
1682
+ wait_for = []
1683
+ result, event1 = krnl(self, queue=queue,
1684
+ wait_for=[*wait_for, *self.events], return_event=True)
1685
+ result.add_event(event1)
1686
+ return result
1687
+
1688
+ def all(self,
1689
+ queue: cl.CommandQueue | None = None,
1690
+ wait_for: cl.WaitList = None
1691
+ ) -> Self:
1692
+ from pyopencl.reduction import get_all_kernel
1693
+ krnl = get_all_kernel(self.context, self.dtype)
1694
+ if wait_for is None:
1695
+ wait_for = []
1696
+ result, event1 = krnl(self, queue=queue,
1697
+ wait_for=[*wait_for, *self.events], return_event=True)
1698
+ result.add_event(event1)
1699
+ return result
1700
+
1701
+ @staticmethod
1702
+ @elwise_kernel_runner
1703
+ def _scalar_comparison(out: Array,
1704
+ a: Array,
1705
+ b: ScalarLike,
1706
+ op: str,
1707
+ queue: cl.CommandQueue | None = None) -> cl.Kernel:
1708
+ assert out.context is not None
1709
+ return elementwise.get_array_scalar_comparison_kernel(
1710
+ out.context, op, a.dtype)
1711
+
1712
+ @staticmethod
1713
+ @elwise_kernel_runner
1714
+ def _array_comparison(out: Array,
1715
+ a: Array,
1716
+ b: Array,
1717
+ op: str,
1718
+ queue: cl.CommandQueue | None = None) -> cl.Kernel:
1719
+ assert out.context is not None
1720
+ if a.shape != b.shape:
1721
+ raise ValueError("shapes of comparison arguments do not match")
1722
+
1723
+ return elementwise.get_array_comparison_kernel(
1724
+ out.context, op, a.dtype, b.dtype)
1725
+
1726
+ @override
1727
+ def __eq__(self, other: object) -> Self: # pyright: ignore[reportIncompatibleMethodOverride]
1728
+ if isinstance(other, Array):
1729
+ result = self._new_like_me(_BOOL_DTYPE)
1730
+ result.add_event(
1731
+ self._array_comparison(result, self, other, op="=="))
1732
+ return result
1733
+ elif np.isscalar(other):
1734
+ result = self._new_like_me(_BOOL_DTYPE)
1735
+ result.add_event(
1736
+ self._scalar_comparison(result, self, other, op="=="))
1737
+ return result
1738
+ else:
1739
+ return NotImplemented
1740
+
1741
+ @override
1742
+ def __ne__(self, other: object) -> Self: # pyright: ignore[reportIncompatibleMethodOverride]
1743
+ if isinstance(other, Array):
1744
+ result = self._new_like_me(_BOOL_DTYPE)
1745
+ result.add_event(
1746
+ self._array_comparison(result, self, other, op="!="))
1747
+ return result
1748
+ elif np.isscalar(other):
1749
+ result = self._new_like_me(_BOOL_DTYPE)
1750
+ result.add_event(
1751
+ self._scalar_comparison(result, self, other, op="!="))
1752
+ return result
1753
+ else:
1754
+ return NotImplemented
1755
+
1756
+ def __le__(self, other) -> Self:
1757
+ if isinstance(other, Array):
1758
+ result = self._new_like_me(_BOOL_DTYPE)
1759
+ result.add_event(
1760
+ self._array_comparison(result, self, other, op="<="))
1761
+ return result
1762
+ elif np.isscalar(other):
1763
+ result = self._new_like_me(_BOOL_DTYPE)
1764
+ self._scalar_comparison(result, self, other, op="<=")
1765
+ return result
1766
+ else:
1767
+ return NotImplemented
1768
+
1769
+ def __ge__(self, other) -> Self:
1770
+ if isinstance(other, Array):
1771
+ result = self._new_like_me(_BOOL_DTYPE)
1772
+ result.add_event(
1773
+ self._array_comparison(result, self, other, op=">="))
1774
+ return result
1775
+ elif np.isscalar(other):
1776
+ result = self._new_like_me(_BOOL_DTYPE)
1777
+ result.add_event(
1778
+ self._scalar_comparison(result, self, other, op=">="))
1779
+ return result
1780
+ else:
1781
+ return NotImplemented
1782
+
1783
+ def __lt__(self, other) -> Self:
1784
+ if isinstance(other, Array):
1785
+ result = self._new_like_me(_BOOL_DTYPE)
1786
+ result.add_event(
1787
+ self._array_comparison(result, self, other, op="<"))
1788
+ return result
1789
+ elif np.isscalar(other):
1790
+ result = self._new_like_me(_BOOL_DTYPE)
1791
+ result.add_event(
1792
+ self._scalar_comparison(result, self, other, op="<"))
1793
+ return result
1794
+ else:
1795
+ return NotImplemented
1796
+
1797
+ def __gt__(self, other) -> Self:
1798
+ if isinstance(other, Array):
1799
+ result = self._new_like_me(_BOOL_DTYPE)
1800
+ result.add_event(
1801
+ self._array_comparison(result, self, other, op=">"))
1802
+ return result
1803
+ elif np.isscalar(other):
1804
+ result = self._new_like_me(_BOOL_DTYPE)
1805
+ result.add_event(
1806
+ self._scalar_comparison(result, self, other, op=">"))
1807
+ return result
1808
+ else:
1809
+ return NotImplemented
1810
+
1811
+ # }}}
1812
+
1813
+ # {{{ complex-valued business
1814
+
1815
+ @property
1816
+ def real(self) -> Self:
1817
+ """
1818
+ .. versionadded:: 2012.1
1819
+ """
1820
+ if self.dtype.kind == "c":
1821
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1822
+ result.add_event(
1823
+ self._real(result, self))
1824
+ return result
1825
+ else:
1826
+ return self
1827
+
1828
+ @property
1829
+ def imag(self) -> Self:
1830
+ """
1831
+ .. versionadded:: 2012.1
1832
+ """
1833
+ if self.dtype.kind == "c":
1834
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1835
+ result.add_event(
1836
+ self._imag(result, self))
1837
+ return result
1838
+ else:
1839
+ return zeros_like(self)
1840
+
1841
+ def conj(self) -> Self:
1842
+ """
1843
+ .. versionadded:: 2012.1
1844
+ """
1845
+ if self.dtype.kind == "c":
1846
+ result = self._new_like_me()
1847
+ result.add_event(self._conj(result, self))
1848
+ return result
1849
+ else:
1850
+ return self
1851
+
1852
+ conjugate = conj
1853
+
1854
+ # }}}
1855
+
1856
+ # {{{ event management
1857
+
1858
+ def add_event(self, evt: cl.Event) -> None:
1859
+ """Add *evt* to :attr:`events`. If :attr:`events` is too long, this method
1860
+ may implicitly wait for a subset of :attr:`events` and clear them from the
1861
+ list.
1862
+ """
1863
+ n_wait = 4
1864
+
1865
+ self.events.append(evt)
1866
+
1867
+ if len(self.events) > 3*n_wait:
1868
+ wait_events = self.events[:n_wait]
1869
+ cl.wait_for_events(wait_events)
1870
+ del self.events[:n_wait]
1871
+
1872
+ def finish(self) -> None:
1873
+ """Wait for the entire contents of :attr:`events`, clear it."""
1874
+
1875
+ if self.events:
1876
+ cl.wait_for_events(self.events)
1877
+ del self.events[:]
1878
+
1879
+ # }}}
1880
+
1881
+ # {{{ views
1882
+
1883
+ def reshape(self, *shape, **kwargs):
1884
+ """Returns an array containing the same data with a new shape."""
1885
+
1886
+ order = kwargs.pop("order", "C")
1887
+ if kwargs:
1888
+ raise TypeError(f"unexpected keyword arguments: {list(kwargs)}")
1889
+
1890
+ if order not in "CF":
1891
+ raise ValueError("order must be either 'C' or 'F'")
1892
+
1893
+ # TODO: add more error-checking, perhaps
1894
+
1895
+ # FIXME: The following is overly conservative. As long as we don't change
1896
+ # our memory footprint, we're good.
1897
+
1898
+ # if not self.flags.forc:
1899
+ # raise RuntimeError("only contiguous arrays may "
1900
+ # "be used as arguments to this operation")
1901
+
1902
+ if isinstance(shape[0], tuple) or isinstance(shape[0], list):
1903
+ shape = tuple(shape[0])
1904
+
1905
+ if -1 in shape:
1906
+ shape = list(shape)
1907
+ idx = shape.index(-1)
1908
+ size = -reduce(lambda x, y: x * y, shape, 1)
1909
+ if size == 0:
1910
+ shape[idx] = 0
1911
+ else:
1912
+ shape[idx] = self.size // size
1913
+ if builtins.any(s < 0 for s in shape):
1914
+ raise ValueError("can only specify one unknown dimension")
1915
+ shape = tuple(shape)
1916
+
1917
+ if shape == self.shape:
1918
+ return self._new_with_changes(
1919
+ data=self.base_data, offset=self.offset, shape=shape,
1920
+ strides=self.strides)
1921
+
1922
+ import operator
1923
+ size = reduce(operator.mul, shape, 1)
1924
+ if size != self.size:
1925
+ raise ValueError("total size of new array must be unchanged")
1926
+
1927
+ if self.size == 0:
1928
+ return self._new_with_changes(
1929
+ data=None, offset=0, shape=shape,
1930
+ strides=(
1931
+ _f_contiguous_strides(self.dtype.itemsize, shape)
1932
+ if order == "F" else
1933
+ _c_contiguous_strides(self.dtype.itemsize, shape)
1934
+ ))
1935
+
1936
+ # {{{ determine reshaped strides
1937
+
1938
+ # copied and translated from
1939
+ # https://github.com/numpy/numpy/blob/4083883228d61a3b571dec640185b5a5d983bf59/numpy/core/src/multiarray/shape.c # noqa: E501
1940
+
1941
+ newdims = shape
1942
+ newnd = len(newdims)
1943
+
1944
+ # Remove axes with dimension 1 from the old array. They have no effect
1945
+ # but would need special cases since their strides do not matter.
1946
+
1947
+ olddims = []
1948
+ oldstrides = []
1949
+ for oi in range(len(self.shape)):
1950
+ s = self.shape[oi]
1951
+ if s != 1:
1952
+ olddims.append(s)
1953
+ oldstrides.append(self.strides[oi])
1954
+
1955
+ oldnd = len(olddims)
1956
+
1957
+ newstrides = [-1]*len(newdims)
1958
+
1959
+ # oi to oj and ni to nj give the axis ranges currently worked with
1960
+ oi = 0
1961
+ oj = 1
1962
+ ni = 0
1963
+ nj = 1
1964
+ while ni < newnd and oi < oldnd:
1965
+ np = newdims[ni]
1966
+ op = olddims[oi]
1967
+
1968
+ while np != op:
1969
+ if np < op:
1970
+ # Misses trailing 1s, these are handled later
1971
+ np *= newdims[nj]
1972
+ nj += 1
1973
+ else:
1974
+ op *= olddims[oj]
1975
+ oj += 1
1976
+
1977
+ # Check whether the original axes can be combined
1978
+ for ok in range(oi, oj-1):
1979
+ if order == "F":
1980
+ if oldstrides[ok+1] != olddims[ok]*oldstrides[ok]:
1981
+ raise ValueError("cannot reshape without copy")
1982
+ else:
1983
+ # C order
1984
+ if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]):
1985
+ raise ValueError("cannot reshape without copy")
1986
+
1987
+ # Calculate new strides for all axes currently worked with
1988
+ if order == "F":
1989
+ newstrides[ni] = oldstrides[oi]
1990
+ for nk in range(ni+1, nj):
1991
+ newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1]
1992
+ else:
1993
+ # C order
1994
+ newstrides[nj - 1] = oldstrides[oj - 1]
1995
+ for nk in range(nj-1, ni, -1):
1996
+ newstrides[nk - 1] = newstrides[nk]*newdims[nk]
1997
+
1998
+ ni = nj
1999
+ nj += 1
2000
+
2001
+ oi = oj
2002
+ oj += 1
2003
+
2004
+ # Set strides corresponding to trailing 1s of the new shape.
2005
+ if ni >= 1:
2006
+ last_stride = newstrides[ni - 1]
2007
+ else:
2008
+ last_stride = self.dtype.itemsize
2009
+
2010
+ if order == "F":
2011
+ last_stride *= newdims[ni - 1]
2012
+
2013
+ for nk in range(ni, len(shape)):
2014
+ newstrides[nk] = last_stride
2015
+
2016
+ # }}}
2017
+
2018
+ return self._new_with_changes(
2019
+ data=self.base_data, offset=self.offset, shape=shape,
2020
+ strides=tuple(newstrides))
2021
+
2022
+ def ravel(self, order="C"):
2023
+ """Returns flattened array containing the same data."""
2024
+ return self.reshape(self.size, order=order)
2025
+
2026
+ def view(self, dtype=None):
2027
+ """Returns view of array with the same data. If *dtype* is different
2028
+ from current dtype, the actual bytes of memory will be reinterpreted.
2029
+ """
2030
+
2031
+ if dtype is None:
2032
+ dtype = self.dtype
2033
+
2034
+ old_itemsize = self.dtype.itemsize
2035
+ itemsize = np.dtype(dtype).itemsize
2036
+
2037
+ from pytools import argmin2
2038
+ min_stride_axis = argmin2(
2039
+ (axis, abs(stride))
2040
+ for axis, stride in enumerate(self.strides))
2041
+
2042
+ if self.shape[min_stride_axis] * old_itemsize % itemsize != 0:
2043
+ raise ValueError("new type not compatible with array")
2044
+
2045
+ new_shape = (
2046
+ *self.shape[:min_stride_axis],
2047
+ self.shape[min_stride_axis] * old_itemsize // itemsize,
2048
+ *self.shape[min_stride_axis+1:])
2049
+ new_strides = (
2050
+ *self.strides[:min_stride_axis],
2051
+ self.strides[min_stride_axis] * itemsize // old_itemsize,
2052
+ *self.strides[min_stride_axis+1:])
2053
+
2054
+ return self._new_with_changes(
2055
+ self.base_data, self.offset,
2056
+ shape=new_shape, dtype=dtype,
2057
+ strides=new_strides)
2058
+
2059
+ def squeeze(self):
2060
+ """Returns a view of the array with dimensions of
2061
+ length 1 removed.
2062
+
2063
+ .. versionadded:: 2015.2
2064
+ """
2065
+ new_shape = tuple(dim for dim in self.shape if dim > 1)
2066
+ new_strides = tuple(
2067
+ self.strides[i] for i, dim in enumerate(self.shape)
2068
+ if dim > 1)
2069
+
2070
+ return self._new_with_changes(
2071
+ self.base_data, self.offset,
2072
+ shape=new_shape, strides=new_strides)
2073
+
2074
+ def transpose(self, axes=None):
2075
+ """Permute the dimensions of an array.
2076
+
2077
+ :arg axes: list of ints, optional.
2078
+ By default, reverse the dimensions, otherwise permute the axes
2079
+ according to the values given.
2080
+
2081
+ :returns: :class:`Array` A view of the array with its axes permuted.
2082
+
2083
+ .. versionadded:: 2015.2
2084
+ """
2085
+
2086
+ if axes is None:
2087
+ axes = range(self.ndim-1, -1, -1)
2088
+
2089
+ if len(axes) != len(self.shape):
2090
+ raise ValueError("axes don't match array")
2091
+
2092
+ new_shape = [self.shape[axes[i]] for i in range(len(axes))]
2093
+ new_strides = [self.strides[axes[i]] for i in range(len(axes))]
2094
+
2095
+ return self._new_with_changes(
2096
+ self.base_data, self.offset,
2097
+ shape=tuple(new_shape),
2098
+ strides=tuple(new_strides))
2099
+
2100
+ @property
2101
+ def T(self): # noqa: N802
2102
+ """
2103
+ .. versionadded:: 2015.2
2104
+ """
2105
+ return self.transpose()
2106
+
2107
+ # }}}
2108
+
2109
+ def map_to_host(self,
2110
+ queue: cl.CommandQueue | None = None,
2111
+ flags=None,
2112
+ is_blocking: bool = True,
2113
+ wait_for: cl.WaitList = None):
2114
+ """If *is_blocking*, return a :class:`numpy.ndarray` corresponding to the
2115
+ same memory as *self*.
2116
+
2117
+ If *is_blocking* is not true, return a tuple ``(ary, evt)``, where
2118
+ *ary* is the above-mentioned array.
2119
+
2120
+ The host array is obtained using :func:`pyopencl.enqueue_map_buffer`.
2121
+ See there for further details.
2122
+
2123
+ :arg flags: A combination of :class:`pyopencl.map_flags`.
2124
+ Defaults to read-write.
2125
+
2126
+ .. versionadded :: 2013.2
2127
+ """
2128
+
2129
+ if flags is None:
2130
+ flags = cl.map_flags.READ | cl.map_flags.WRITE
2131
+ if wait_for is None:
2132
+ wait_for = []
2133
+
2134
+ ary, evt = cl.enqueue_map_buffer(
2135
+ queue or self.queue, self.base_data, flags, self.offset,
2136
+ self.shape, self.dtype, strides=self.strides,
2137
+ wait_for=[*wait_for, *self.events], is_blocking=is_blocking)
2138
+
2139
+ if is_blocking:
2140
+ return ary
2141
+ else:
2142
+ return ary, evt
2143
+
2144
+ # {{{ getitem/setitem
2145
+
2146
+ def __getitem__(self, index):
2147
+ """
2148
+ .. versionadded:: 2013.1
2149
+ """
2150
+
2151
+ if isinstance(index, Array):
2152
+ if index.dtype.kind not in ("i", "u"):
2153
+ raise TypeError(
2154
+ "fancy indexing is only allowed with integers")
2155
+ if len(index.shape) != 1:
2156
+ raise NotImplementedError(
2157
+ "multidimensional fancy indexing is not supported")
2158
+ if len(self.shape) != 1:
2159
+ raise NotImplementedError(
2160
+ "fancy indexing into a multi-d array is not supported")
2161
+
2162
+ return take(self, index)
2163
+
2164
+ if not isinstance(index, tuple):
2165
+ index = (index,)
2166
+
2167
+ new_shape = []
2168
+ new_offset = self.offset
2169
+ new_strides = []
2170
+
2171
+ seen_ellipsis = False
2172
+
2173
+ index_axis = 0
2174
+ array_axis = 0
2175
+ while index_axis < len(index):
2176
+ index_entry = index[index_axis]
2177
+
2178
+ if array_axis > len(self.shape):
2179
+ raise IndexError("too many axes in index")
2180
+
2181
+ if isinstance(index_entry, slice):
2182
+ start, stop, idx_stride = index_entry.indices(
2183
+ self.shape[array_axis])
2184
+
2185
+ array_stride = self.strides[array_axis]
2186
+
2187
+ new_shape.append((abs(stop-start)-1)//abs(idx_stride)+1)
2188
+ new_strides.append(idx_stride*array_stride)
2189
+ new_offset += array_stride*start
2190
+
2191
+ index_axis += 1
2192
+ array_axis += 1
2193
+
2194
+ elif isinstance(index_entry, (int, np.integer)):
2195
+ array_shape = self.shape[array_axis]
2196
+ if index_entry < 0:
2197
+ index_entry += array_shape
2198
+
2199
+ if not (0 <= index_entry < array_shape):
2200
+ raise IndexError(f"subindex in axis {index_axis} out of range")
2201
+
2202
+ new_offset += self.strides[array_axis]*index_entry
2203
+
2204
+ index_axis += 1
2205
+ array_axis += 1
2206
+
2207
+ elif index_entry is Ellipsis:
2208
+ index_axis += 1
2209
+
2210
+ remaining_index_count = len(index) - index_axis
2211
+ new_array_axis = len(self.shape) - remaining_index_count
2212
+ if new_array_axis < array_axis:
2213
+ raise IndexError("invalid use of ellipsis in index")
2214
+ while array_axis < new_array_axis:
2215
+ new_shape.append(self.shape[array_axis])
2216
+ new_strides.append(self.strides[array_axis])
2217
+ array_axis += 1
2218
+
2219
+ if seen_ellipsis:
2220
+ raise IndexError(
2221
+ "more than one ellipsis not allowed in index")
2222
+ seen_ellipsis = True
2223
+
2224
+ elif index_entry is np.newaxis:
2225
+ new_shape.append(1)
2226
+ new_strides.append(0)
2227
+ index_axis += 1
2228
+
2229
+ else:
2230
+ raise IndexError(f"invalid subindex in axis {index_axis}")
2231
+
2232
+ while array_axis < len(self.shape):
2233
+ new_shape.append(self.shape[array_axis])
2234
+ new_strides.append(self.strides[array_axis])
2235
+
2236
+ array_axis += 1
2237
+
2238
+ return self._new_with_changes(
2239
+ self.base_data, offset=new_offset,
2240
+ shape=tuple(new_shape),
2241
+ strides=tuple(new_strides))
2242
+
2243
+ def setitem(self,
2244
+ subscript: Array | slice | int,
2245
+ value: object,
2246
+ queue: cl.CommandQueue | None = None,
2247
+ wait_for: cl.WaitList = None
2248
+ ):
2249
+ """Like :meth:`__setitem__`, but with the ability to specify
2250
+ a *queue* and *wait_for*.
2251
+
2252
+ .. versionadded:: 2013.1
2253
+
2254
+ .. versionchanged:: 2013.2
2255
+
2256
+ Added *wait_for*.
2257
+ """
2258
+
2259
+ queue = queue or self.queue
2260
+ assert queue is not None
2261
+ if wait_for is None:
2262
+ wait_for = []
2263
+ wait_for = [*wait_for, *self.events]
2264
+
2265
+ if isinstance(subscript, Array):
2266
+ if subscript.dtype.kind not in ("i", "u"):
2267
+ raise TypeError(
2268
+ "fancy indexing is only allowed with integers")
2269
+ if len(subscript.shape) != 1:
2270
+ raise NotImplementedError(
2271
+ "multidimensional fancy indexing is not supported")
2272
+ if len(self.shape) != 1:
2273
+ raise NotImplementedError(
2274
+ "fancy indexing into a multi-d array is not supported")
2275
+
2276
+ multi_put([value], subscript, out=[self], queue=queue,
2277
+ wait_for=wait_for)
2278
+ return
2279
+
2280
+ subarray = self[subscript]
2281
+
2282
+ if not subarray.size:
2283
+ # This prevents errors about mismatched strides that neither we
2284
+ # nor numpy worry about in the empty case.
2285
+ return
2286
+
2287
+ if isinstance(value, np.ndarray):
2288
+ if subarray.shape == value.shape and subarray.strides == value.strides:
2289
+ assert subarray.base_data is not None
2290
+ self.add_event(
2291
+ cl.enqueue_copy(queue, subarray.base_data,
2292
+ value, dst_offset=subarray.offset, wait_for=wait_for))
2293
+ return
2294
+ else:
2295
+ value = to_device(queue, value, self.allocator)
2296
+
2297
+ if isinstance(value, Array):
2298
+ if len(subarray.shape) != len(value.shape):
2299
+ raise NotImplementedError("broadcasting is not "
2300
+ "supported in __setitem__")
2301
+ if subarray.shape != value.shape:
2302
+ raise ValueError("cannot assign between arrays of "
2303
+ "differing shapes")
2304
+ if subarray.strides != value.strides:
2305
+ raise NotImplementedError("cannot assign between arrays of "
2306
+ "differing strides")
2307
+
2308
+ self.add_event(
2309
+ self._copy(subarray, value, queue=queue, wait_for=wait_for))
2310
+
2311
+ else:
2312
+ # Let's assume it's a scalar
2313
+ subarray.fill(value, queue=queue, wait_for=wait_for)
2314
+
2315
+ def __setitem__(self, subscript, value):
2316
+ """Set the slice of *self* identified *subscript* to *value*.
2317
+
2318
+ *value* is allowed to be:
2319
+
2320
+ * A :class:`Array` of the same :attr:`shape` and (for now) :attr:`strides`,
2321
+ but with potentially different :attr:`dtype`.
2322
+ * A :class:`numpy.ndarray` of the same :attr:`shape` and (for now)
2323
+ :attr:`strides`, but with potentially different :attr:`dtype`.
2324
+ * A scalar.
2325
+
2326
+ Non-scalar broadcasting is not currently supported.
2327
+
2328
+ .. versionadded:: 2013.1
2329
+ """
2330
+ self.setitem(subscript, value)
2331
+
2332
+ # }}}
2333
+
2334
+ # }}}
2335
+
2336
+
2337
+ # {{{ creation helpers
2338
+
2339
+ def as_strided(ary, shape=None, strides=None):
2340
+ """Make an :class:`Array` from the given array with the given
2341
+ shape and strides.
2342
+ """
2343
+
2344
+ # undocumented for the moment
2345
+
2346
+ if shape is None:
2347
+ shape = ary.shape
2348
+ if strides is None:
2349
+ strides = ary.strides
2350
+
2351
+ return Array(ary.queue, shape, ary.dtype, allocator=ary.allocator,
2352
+ data=ary.data, strides=strides)
2353
+
2354
+
2355
+ class _same_as_transfer: # noqa: N801
2356
+ pass
2357
+
2358
+
2359
+ def to_device(
2360
+ queue: cl.CommandQueue,
2361
+ ary: NDArray[Any],
2362
+ allocator: Allocator | None = None,
2363
+ async_: bool = False,
2364
+ array_queue=_same_as_transfer,
2365
+ ) -> Array:
2366
+ """Return a :class:`Array` that is an exact copy of the
2367
+ :class:`numpy.ndarray` instance *ary*.
2368
+
2369
+ :arg array_queue: The :class:`~pyopencl.CommandQueue` which will
2370
+ be stored in the resulting array. Useful
2371
+ to make sure there is no implicit queue associated
2372
+ with the array by passing *None*.
2373
+
2374
+ See :class:`Array` for the meaning of *allocator*.
2375
+
2376
+ .. versionchanged:: 2015.2
2377
+ *array_queue* argument was added.
2378
+ """
2379
+
2380
+ if ary.dtype == object:
2381
+ raise RuntimeError("to_device does not work on object arrays.")
2382
+
2383
+ if array_queue is _same_as_transfer:
2384
+ first_arg = queue
2385
+ else:
2386
+ first_arg = queue.context
2387
+
2388
+ result = Array(first_arg, ary.shape, ary.dtype,
2389
+ allocator=allocator, strides=ary.strides)
2390
+ result.set(ary, async_=async_, queue=queue)
2391
+ return result
2392
+
2393
+
2394
+ empty = Array
2395
+
2396
+
2397
+ def zeros(
2398
+ queue: cl.CommandQueue,
2399
+ shape: int | tuple[int, ...],
2400
+ dtype: DTypeLike,
2401
+ order: Literal["C"] | Literal["F"] = "C",
2402
+ allocator: Allocator | None = None,
2403
+ ) -> Array:
2404
+ """Same as :func:`empty`, but the :class:`Array` is zero-initialized before
2405
+ being returned.
2406
+
2407
+ .. versionchanged:: 2011.1
2408
+ *context* argument was deprecated.
2409
+ """
2410
+
2411
+ result = Array(None, shape, dtype,
2412
+ order=order, allocator=allocator,
2413
+ _context=queue.context, _queue=queue)
2414
+ result._zero_fill()
2415
+ return result
2416
+
2417
+
2418
+ def empty_like(
2419
+ ary: Array,
2420
+ queue: cl.CommandQueue | type[_copy_queue] | None = _copy_queue,
2421
+ allocator: Allocator | None = None,
2422
+ ):
2423
+ """Make a new, uninitialized :class:`Array` having the same properties
2424
+ as *other_ary*.
2425
+ """
2426
+
2427
+ return ary._new_with_changes(data=None, offset=0, queue=queue,
2428
+ allocator=allocator)
2429
+
2430
+
2431
+ def zeros_like(ary: ArrayT) -> ArrayT:
2432
+ """Make a new, zero-initialized :class:`Array` having the same properties
2433
+ as *other_ary*.
2434
+ """
2435
+
2436
+ result = ary._new_like_me()
2437
+ result._zero_fill()
2438
+ return result
2439
+
2440
+
2441
+ @dataclass
2442
+ class _ArangeInfo:
2443
+ start: int | None = None
2444
+ stop: int | None = None
2445
+ step: int | None = None
2446
+ dtype: np.dtype | None = None
2447
+ allocator: Any | None = None
2448
+
2449
+
2450
+ @elwise_kernel_runner
2451
+ def _arange_knl(result, start, step):
2452
+ return elementwise.get_arange_kernel(
2453
+ result.context, result.dtype)
2454
+
2455
+
2456
+ def arange(queue: cl.CommandQueue, *args: Any, **kwargs: Any) -> Array:
2457
+ """arange(queue, [start, ] stop [, step], **kwargs)
2458
+ Create a :class:`Array` filled with numbers spaced *step* apart,
2459
+ starting from *start* and ending at *stop*. If not given, *start*
2460
+ defaults to 0, *step* defaults to 1.
2461
+
2462
+ For floating point arguments, the length of the result is
2463
+ ``ceil((stop - start)/step)``. This rule may result in the last
2464
+ element of the result being greater than *stop*.
2465
+
2466
+ *dtype* is a required keyword argument.
2467
+
2468
+ .. versionchanged:: 2011.1
2469
+ *context* argument was deprecated.
2470
+
2471
+ .. versionchanged:: 2011.2
2472
+ *allocator* keyword argument was added.
2473
+ """
2474
+
2475
+ # {{{ argument processing
2476
+
2477
+ # Yuck. Thanks, numpy developers. ;)
2478
+
2479
+ explicit_dtype = False
2480
+ inf = _ArangeInfo()
2481
+
2482
+ if isinstance(args[-1], np.dtype):
2483
+ inf.dtype = args[-1]
2484
+ args = args[:-1]
2485
+ explicit_dtype = True
2486
+
2487
+ argc = len(args)
2488
+ if argc == 0:
2489
+ raise ValueError("stop argument required")
2490
+ elif argc == 1:
2491
+ inf.stop = args[0]
2492
+ elif argc == 2:
2493
+ inf.start = args[0]
2494
+ inf.stop = args[1]
2495
+ elif argc == 3:
2496
+ inf.start = args[0]
2497
+ inf.stop = args[1]
2498
+ inf.step = args[2]
2499
+ else:
2500
+ raise ValueError("too many arguments")
2501
+
2502
+ admissible_names = ["start", "stop", "step", "dtype", "allocator"]
2503
+ for k, v in kwargs.items():
2504
+ if k in admissible_names:
2505
+ if getattr(inf, k) is None:
2506
+ setattr(inf, k, v)
2507
+ if k == "dtype":
2508
+ explicit_dtype = True
2509
+ else:
2510
+ raise ValueError(f"may not specify '{k}' by position and keyword")
2511
+ else:
2512
+ raise ValueError(f"unexpected keyword argument '{k}'")
2513
+
2514
+ if inf.start is None:
2515
+ inf.start = 0
2516
+ if inf.step is None:
2517
+ inf.step = 1
2518
+ if inf.dtype is None:
2519
+ inf.dtype = np.array([inf.start, inf.stop, inf.step]).dtype
2520
+
2521
+ # }}}
2522
+
2523
+ # {{{ actual functionality
2524
+
2525
+ dtype = np.dtype(inf.dtype)
2526
+ start = dtype.type(inf.start)
2527
+ step = dtype.type(inf.step)
2528
+ stop = dtype.type(inf.stop)
2529
+
2530
+ if not explicit_dtype:
2531
+ raise TypeError("arange requires a dtype argument")
2532
+
2533
+ from math import ceil
2534
+ size = ceil((stop-start)/step)
2535
+
2536
+ result = Array(queue, (size,), dtype, allocator=inf.allocator)
2537
+ result.add_event(_arange_knl(result, start, step, queue=queue))
2538
+
2539
+ # }}}
2540
+
2541
+ return result
2542
+
2543
+ # }}}
2544
+
2545
+
2546
+ # {{{ take/put/concatenate/diff/(h?stack)
2547
+
2548
+ @elwise_kernel_runner
2549
+ def _take(result, ary, indices):
2550
+ return elementwise.get_take_kernel(
2551
+ result.context, result.dtype, indices.dtype)
2552
+
2553
+
2554
+ def take(
2555
+ a: Array,
2556
+ indices: Array,
2557
+ out: Array | None = None,
2558
+ queue: cl.CommandQueue | None = None,
2559
+ wait_for: cl.WaitList = None
2560
+ ) -> Array:
2561
+ """Return the :class:`Array` ``[a[indices[0]], ..., a[indices[n]]]``.
2562
+ For the moment, *a* must be a type that can be bound to a texture.
2563
+ """
2564
+
2565
+ queue = queue or a.queue
2566
+ if out is None:
2567
+ out = type(a)(queue, indices.shape, a.dtype, allocator=a.allocator)
2568
+
2569
+ assert len(indices.shape) == 1
2570
+ out.add_event(
2571
+ _take(out, a, indices, queue=queue, wait_for=wait_for))
2572
+ return out
2573
+
2574
+
2575
+ def multi_take(arrays, indices, out=None, queue: cl.CommandQueue | None = None):
2576
+ if not len(arrays):
2577
+ return []
2578
+
2579
+ assert len(indices.shape) == 1
2580
+
2581
+ from pytools import single_valued
2582
+ a_dtype = single_valued(a.dtype for a in arrays)
2583
+ a_allocator = arrays[0].dtype
2584
+ context = indices.context
2585
+ queue = queue or indices.queue
2586
+
2587
+ vec_count = len(arrays)
2588
+
2589
+ if out is None:
2590
+ out = [
2591
+ type(arrays[i])(
2592
+ context, queue, indices.shape, a_dtype,
2593
+ allocator=a_allocator)
2594
+ for i in range(vec_count)]
2595
+ else:
2596
+ if len(out) != len(arrays):
2597
+ raise ValueError("out and arrays must have the same length")
2598
+
2599
+ chunk_size = builtins.min(vec_count, 10)
2600
+
2601
+ def make_func_for_chunk_size(chunk_size):
2602
+ knl = elementwise.get_take_kernel(
2603
+ indices.context, a_dtype, indices.dtype,
2604
+ vec_count=chunk_size)
2605
+ knl.set_block_shape(*indices._block)
2606
+ return knl
2607
+
2608
+ knl = make_func_for_chunk_size(chunk_size)
2609
+
2610
+ for start_i in range(0, len(arrays), chunk_size):
2611
+ chunk_slice = slice(start_i, start_i+chunk_size)
2612
+
2613
+ if start_i + chunk_size > vec_count:
2614
+ knl = make_func_for_chunk_size(vec_count-start_i)
2615
+
2616
+ gs, ls = indices._get_sizes(queue,
2617
+ knl.get_work_group_info(
2618
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2619
+ queue.device))
2620
+
2621
+ wait_for_this = (
2622
+ *indices.events,
2623
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2624
+ *[evt for o in out[chunk_slice] for evt in o.events])
2625
+ evt = knl(queue, gs, ls,
2626
+ indices.data,
2627
+ *[o.data for o in out[chunk_slice]],
2628
+ *[i.data for i in arrays[chunk_slice]],
2629
+ *[indices.size],
2630
+ wait_for=wait_for_this)
2631
+ for o in out[chunk_slice]:
2632
+ o.add_event(evt)
2633
+
2634
+ return out
2635
+
2636
+
2637
+ def multi_take_put(arrays, dest_indices, src_indices, dest_shape=None,
2638
+ out=None, queue: cl.CommandQueue | None = None, src_offsets=None):
2639
+ if not len(arrays):
2640
+ return []
2641
+
2642
+ from pytools import single_valued
2643
+ a_dtype = single_valued(a.dtype for a in arrays)
2644
+ a_allocator = arrays[0].allocator
2645
+ context = src_indices.context
2646
+ queue = queue or src_indices.queue
2647
+
2648
+ vec_count = len(arrays)
2649
+
2650
+ if out is None:
2651
+ out = [type(arrays[i])(queue, dest_shape, a_dtype, allocator=a_allocator)
2652
+ for i in range(vec_count)]
2653
+ else:
2654
+ if a_dtype != single_valued(o.dtype for o in out):
2655
+ raise TypeError("arrays and out must have the same dtype")
2656
+ if len(out) != vec_count:
2657
+ raise ValueError("out and arrays must have the same length")
2658
+
2659
+ if src_indices.dtype != dest_indices.dtype:
2660
+ raise TypeError(
2661
+ "src_indices and dest_indices must have the same dtype")
2662
+
2663
+ if len(src_indices.shape) != 1:
2664
+ raise ValueError("src_indices must be 1D")
2665
+
2666
+ if src_indices.shape != dest_indices.shape:
2667
+ raise ValueError(
2668
+ "src_indices and dest_indices must have the same shape")
2669
+
2670
+ if src_offsets is None:
2671
+ src_offsets_list = []
2672
+ else:
2673
+ src_offsets_list = src_offsets
2674
+ if len(src_offsets) != vec_count:
2675
+ raise ValueError(
2676
+ "src_indices and src_offsets must have the same length")
2677
+
2678
+ max_chunk_size = 10
2679
+
2680
+ chunk_size = builtins.min(vec_count, max_chunk_size)
2681
+
2682
+ def make_func_for_chunk_size(chunk_size):
2683
+ return elementwise.get_take_put_kernel(context,
2684
+ a_dtype, src_indices.dtype,
2685
+ with_offsets=src_offsets is not None,
2686
+ vec_count=chunk_size)
2687
+
2688
+ knl = make_func_for_chunk_size(chunk_size)
2689
+
2690
+ for start_i in range(0, len(arrays), chunk_size):
2691
+ chunk_slice = slice(start_i, start_i+chunk_size)
2692
+
2693
+ if start_i + chunk_size > vec_count:
2694
+ knl = make_func_for_chunk_size(vec_count-start_i)
2695
+
2696
+ gs, ls = src_indices._get_sizes(queue,
2697
+ knl.get_work_group_info(
2698
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2699
+ queue.device))
2700
+
2701
+ wait_for_this = (
2702
+ *dest_indices.events,
2703
+ *src_indices.events,
2704
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2705
+ *[evt for o in out[chunk_slice] for evt in o.events])
2706
+ evt = knl(queue, gs, ls,
2707
+ *out[chunk_slice],
2708
+ dest_indices,
2709
+ src_indices,
2710
+ *arrays[chunk_slice],
2711
+ *src_offsets_list[chunk_slice],
2712
+ src_indices.size,
2713
+ wait_for=wait_for_this)
2714
+ for o in out[chunk_slice]:
2715
+ o.add_event(evt)
2716
+
2717
+ return out
2718
+
2719
+
2720
+ def multi_put(
2721
+ arrays,
2722
+ dest_indices: Array,
2723
+ dest_shape=None,
2724
+ out=None,
2725
+ queue: cl.CommandQueue | None = None,
2726
+ wait_for: cl.WaitList = None
2727
+ ):
2728
+ if not len(arrays):
2729
+ return []
2730
+
2731
+ from pytools import single_valued
2732
+
2733
+ a_dtype = single_valued(a.dtype for a in arrays)
2734
+ a_allocator = arrays[0].allocator
2735
+
2736
+ context = dest_indices.context
2737
+ queue = queue or dest_indices.queue
2738
+ assert queue is not None
2739
+ assert context is not None
2740
+
2741
+ if wait_for is None:
2742
+ wait_for = []
2743
+ wait_for = [*wait_for, *dest_indices.events]
2744
+
2745
+ vec_count = len(arrays)
2746
+
2747
+ if out is None:
2748
+ out = [type(arrays[i])(queue, dest_shape, a_dtype, allocator=a_allocator)
2749
+ for i in range(vec_count)]
2750
+ else:
2751
+ if a_dtype != single_valued(o.dtype for o in out):
2752
+ raise TypeError("arrays and out must have the same dtype")
2753
+ if len(out) != vec_count:
2754
+ raise ValueError("out and arrays must have the same length")
2755
+
2756
+ if len(dest_indices.shape) != 1:
2757
+ raise ValueError("dest_indices must be 1D")
2758
+
2759
+ chunk_size = builtins.min(vec_count, 10)
2760
+
2761
+ # array of bools to specify whether the array of same index in this chunk
2762
+ # will be filled with a single value.
2763
+ use_fill = np.ndarray((chunk_size,), dtype=np.uint8)
2764
+ array_lengths = np.ndarray((chunk_size,), dtype=np.int64)
2765
+
2766
+ def make_func_for_chunk_size(chunk_size):
2767
+ knl = elementwise.get_put_kernel(
2768
+ context, a_dtype, dest_indices.dtype,
2769
+ vec_count=chunk_size)
2770
+ return knl
2771
+
2772
+ knl = make_func_for_chunk_size(chunk_size)
2773
+
2774
+ for start_i in range(0, len(arrays), chunk_size):
2775
+ chunk_slice = slice(start_i, start_i+chunk_size)
2776
+ for fill_idx, ary in enumerate(arrays[chunk_slice]):
2777
+ # If there is only one value in the values array for this src array
2778
+ # in the chunk then fill every index in `dest_idx` array with it.
2779
+ use_fill[fill_idx] = 1 if ary.size == 1 else 0
2780
+ array_lengths[fill_idx] = len(ary)
2781
+ # Copy the populated `use_fill` array to a buffer on the device.
2782
+ use_fill_cla = to_device(queue, use_fill)
2783
+ array_lengths_cla = to_device(queue, array_lengths)
2784
+
2785
+ if start_i + chunk_size > vec_count:
2786
+ knl = make_func_for_chunk_size(vec_count-start_i)
2787
+
2788
+ gs, ls = dest_indices._get_sizes(queue,
2789
+ knl.get_work_group_info(
2790
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2791
+ queue.device))
2792
+
2793
+ wait_for_this = (
2794
+ *wait_for,
2795
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2796
+ *[evt for o in out[chunk_slice] for evt in o.events])
2797
+ evt = knl(queue, gs, ls,
2798
+ *out[chunk_slice],
2799
+ dest_indices,
2800
+ *arrays[chunk_slice],
2801
+ use_fill_cla, array_lengths_cla, dest_indices.size,
2802
+ wait_for=wait_for_this)
2803
+
2804
+ for o in out[chunk_slice]:
2805
+ o.add_event(evt)
2806
+
2807
+ return out
2808
+
2809
+
2810
+ def concatenate(arrays, axis=0, queue: cl.CommandQueue | None = None, allocator=None):
2811
+ """
2812
+ .. versionadded:: 2013.1
2813
+
2814
+ .. note::
2815
+
2816
+ The returned array is of the same type as the first array in the list.
2817
+ """
2818
+ if not arrays:
2819
+ raise ValueError("need at least one array to concatenate")
2820
+
2821
+ # {{{ find properties of result array
2822
+
2823
+ shape = None
2824
+
2825
+ for i_ary, ary in enumerate(arrays):
2826
+ queue = queue or ary.queue
2827
+ allocator = allocator or ary.allocator
2828
+
2829
+ if shape is None:
2830
+ # first array
2831
+ shape = list(ary.shape)
2832
+ else:
2833
+ if len(ary.shape) != len(shape):
2834
+ raise ValueError(
2835
+ f"{i_ary}-th array has different number of axes: "
2836
+ f"expected {len(ary.shape)}, got {len(shape)})")
2837
+
2838
+ ary_shape_list = list(ary.shape)
2839
+ if (ary_shape_list[:axis] != shape[:axis]
2840
+ or ary_shape_list[axis+1:] != shape[axis+1:]):
2841
+ raise ValueError(
2842
+ f"{i_ary}-th array has residual not matching other arrays")
2843
+
2844
+ shape[axis] += ary.shape[axis]
2845
+
2846
+ # }}}
2847
+
2848
+ shape = tuple(shape)
2849
+ dtype = np.result_type(*[ary.dtype for ary in arrays])
2850
+
2851
+ if __debug__:
2852
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2853
+ for ary in arrays[1:]):
2854
+ warn("Elements of 'arrays' not of the same type, returning "
2855
+ "an instance of the type of arrays[0]",
2856
+ stacklevel=2)
2857
+
2858
+ result = arrays[0].__class__(queue, shape, dtype, allocator=allocator)
2859
+
2860
+ full_slice = (slice(None),) * len(shape)
2861
+
2862
+ base_idx = 0
2863
+ for ary in arrays:
2864
+ my_len = ary.shape[axis]
2865
+ result.setitem(
2866
+ (*full_slice[:axis],
2867
+ slice(base_idx, base_idx+my_len),
2868
+ *full_slice[axis+1:]),
2869
+ ary)
2870
+
2871
+ base_idx += my_len
2872
+
2873
+ return result
2874
+
2875
+
2876
+ @elwise_kernel_runner
2877
+ def _diff(result, array):
2878
+ return elementwise.get_diff_kernel(array.context, array.dtype)
2879
+
2880
+
2881
+ def diff(array, queue: cl.CommandQueue | None = None, allocator=None):
2882
+ """
2883
+ .. versionadded:: 2013.2
2884
+ """
2885
+
2886
+ if len(array.shape) != 1:
2887
+ raise ValueError("multi-D arrays are not supported")
2888
+
2889
+ n, = array.shape
2890
+
2891
+ queue = queue or array.queue
2892
+ allocator = allocator or array.allocator
2893
+
2894
+ result = array.__class__(queue, (n-1,), array.dtype, allocator=allocator)
2895
+ event1 = _diff(result, array, queue=queue)
2896
+ result.add_event(event1)
2897
+ return result
2898
+
2899
+
2900
+ def hstack(arrays, queue: cl.CommandQueue | None = None):
2901
+ if len(arrays) == 0:
2902
+ raise ValueError("need at least one array to hstack")
2903
+
2904
+ if queue is None:
2905
+ for ary in arrays:
2906
+ if ary.queue is not None:
2907
+ queue = ary.queue
2908
+ break
2909
+
2910
+ from pytools import all_equal, single_valued
2911
+ if not all_equal(len(ary.shape) for ary in arrays):
2912
+ raise ValueError("arguments must all have the same number of axes")
2913
+
2914
+ lead_shape = single_valued(ary.shape[:-1] for ary in arrays)
2915
+
2916
+ w = builtins.sum(ary.shape[-1] for ary in arrays)
2917
+
2918
+ if __debug__:
2919
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2920
+ for ary in arrays[1:]):
2921
+ warn("Elements of 'arrays' not of the same type, returning "
2922
+ "an instance of the type of arrays[0]",
2923
+ stacklevel=2)
2924
+
2925
+ result = arrays[0].__class__(queue, (*lead_shape, w), arrays[0].dtype,
2926
+ allocator=arrays[0].allocator)
2927
+ index = 0
2928
+ for ary in arrays:
2929
+ result[..., index:index+ary.shape[-1]] = ary
2930
+ index += ary.shape[-1]
2931
+
2932
+ return result
2933
+
2934
+
2935
+ def stack(arrays, axis=0, queue: cl.CommandQueue | None = None):
2936
+ """
2937
+ Join a sequence of arrays along a new axis.
2938
+
2939
+ :arg arrays: A sequence of :class:`Array`.
2940
+ :arg axis: Index of the dimension of the new axis in the result array.
2941
+ Can be -1, for the new axis to be last dimension.
2942
+
2943
+ :returns: :class:`Array`
2944
+ """
2945
+ if not arrays:
2946
+ raise ValueError("need at least one array to stack")
2947
+
2948
+ input_shape = arrays[0].shape
2949
+ input_ndim = arrays[0].ndim
2950
+ axis = input_ndim if axis == -1 else axis
2951
+
2952
+ if queue is None:
2953
+ for ary in arrays:
2954
+ if ary.queue is not None:
2955
+ queue = ary.queue
2956
+ break
2957
+
2958
+ if not builtins.all(ary.shape == input_shape for ary in arrays[1:]):
2959
+ raise ValueError("arrays must have the same shape")
2960
+
2961
+ if not (0 <= axis <= input_ndim):
2962
+ raise ValueError("invalid axis")
2963
+
2964
+ if (axis == 0 and not builtins.all(
2965
+ ary.flags.c_contiguous for ary in arrays)):
2966
+ # pyopencl.Array.__setitem__ does not support non-contiguous assignments
2967
+ raise NotImplementedError
2968
+
2969
+ if (axis == input_ndim and not builtins.all(
2970
+ ary.flags.f_contiguous for ary in arrays)):
2971
+ # pyopencl.Array.__setitem__ does not support non-contiguous assignments
2972
+ raise NotImplementedError
2973
+
2974
+ result_shape = (*input_shape[:axis], len(arrays), *input_shape[axis:])
2975
+
2976
+ if __debug__:
2977
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2978
+ for ary in arrays[1:]):
2979
+ warn("Elements of 'arrays' not of the same type, returning "
2980
+ "an instance of the type of arrays[0]",
2981
+ stacklevel=2)
2982
+
2983
+ result = arrays[0].__class__(queue, result_shape,
2984
+ np.result_type(*(ary.dtype
2985
+ for ary in arrays)),
2986
+ # TODO: reconsider once arrays support
2987
+ # non-contiguous assignments
2988
+ order="C" if axis == 0 else "F",
2989
+ allocator=arrays[0].allocator)
2990
+ for i, ary in enumerate(arrays):
2991
+ idx = (slice(None),)*axis + (i,) + (slice(None),)*(input_ndim-axis)
2992
+ result[idx] = ary
2993
+
2994
+ return result
2995
+
2996
+ # }}}
2997
+
2998
+
2999
+ # {{{ shape manipulation
3000
+
3001
+ def transpose(a, axes=None):
3002
+ """Permute the dimensions of an array.
3003
+
3004
+ :arg a: :class:`Array`
3005
+ :arg axes: list of ints, optional.
3006
+ By default, reverse the dimensions, otherwise permute the axes
3007
+ according to the values given.
3008
+
3009
+ :returns: :class:`Array` A view of the array with its axes permuted.
3010
+ """
3011
+ return a.transpose(axes)
3012
+
3013
+
3014
+ def reshape(a, shape):
3015
+ """Gives a new shape to an array without changing its data.
3016
+
3017
+ .. versionadded:: 2015.2
3018
+ """
3019
+
3020
+ return a.reshape(shape)
3021
+
3022
+ # }}}
3023
+
3024
+
3025
+ # {{{ conditionals
3026
+
3027
+ @elwise_kernel_runner
3028
+ def _if_positive(result, criterion, then_, else_):
3029
+ return elementwise.get_if_positive_kernel(
3030
+ result.context, criterion.dtype, then_.dtype,
3031
+ is_then_array=isinstance(then_, Array),
3032
+ is_else_array=isinstance(else_, Array),
3033
+ is_then_scalar=then_.shape == (),
3034
+ is_else_scalar=else_.shape == (),
3035
+ )
3036
+
3037
+
3038
+ def if_positive(
3039
+ criterion: Array | ScalarLike,
3040
+ then_: Array | ScalarLike,
3041
+ else_: Array | ScalarLike,
3042
+ out: Array | None = None,
3043
+ queue: cl.CommandQueue | None = None):
3044
+ """Return an array like *then_*, which, for the element at index *i*,
3045
+ contains *then_[i]* if *criterion[i]>0*, else *else_[i]*.
3046
+ """
3047
+
3048
+ is_then_scalar = _is_scalar(then_)
3049
+ is_else_scalar = _is_scalar(else_)
3050
+ if _is_scalar(criterion) and is_then_scalar and is_else_scalar:
3051
+ result = np.where(criterion, then_, else_)
3052
+
3053
+ if out is not None:
3054
+ out[...] = result
3055
+ return out
3056
+
3057
+ return result
3058
+
3059
+ if is_then_scalar:
3060
+ then_ary = np.array(then_)
3061
+ else:
3062
+ then_ary = then_
3063
+
3064
+ assert not _is_scalar(criterion)
3065
+
3066
+ if is_else_scalar:
3067
+ else_ary = np.array(else_)
3068
+ else:
3069
+ else_ary = else_
3070
+
3071
+ if then_ary.dtype != else_ary.dtype:
3072
+ raise ValueError(
3073
+ f"dtypes do not match: then_ary is '{then_ary.dtype}' and "
3074
+ f"else_ary is '{else_ary.dtype}'")
3075
+
3076
+ if then_ary.shape == () and else_ary.shape == ():
3077
+ pass
3078
+ elif then_ary.shape != () and else_ary.shape != ():
3079
+ if not (criterion.shape == then_ary.shape == else_ary.shape):
3080
+ raise ValueError(
3081
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3082
+ f", 'then_ary' has shape {then_ary.shape} and 'else_ary' has shape "
3083
+ f"{else_ary.shape}")
3084
+ elif then_ary.shape == ():
3085
+ if criterion.shape != else_ary.shape:
3086
+ raise ValueError(
3087
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3088
+ f" and 'else_ary' has shape {else_ary.shape}")
3089
+ elif else_ary.shape == ():
3090
+ if criterion.shape != then_ary.shape:
3091
+ raise ValueError(
3092
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3093
+ f" and 'then_ary' has shape {then_ary.shape}")
3094
+ else:
3095
+ raise AssertionError()
3096
+
3097
+ if out is None:
3098
+ if then_ary.shape != ():
3099
+ assert isinstance(then_ary, Array)
3100
+ out = empty_like(
3101
+ then_ary, criterion.queue, allocator=criterion.allocator)
3102
+ else:
3103
+ # Use same strides as criterion
3104
+ cr_byte_strides = np.array(criterion.strides, dtype=np.int64)
3105
+ cr_item_strides = cr_byte_strides // criterion.dtype.itemsize
3106
+ out_strides = tuple(cr_item_strides*then_ary.dtype.itemsize)
3107
+
3108
+ out = type(criterion)(
3109
+ criterion.queue, criterion.shape, then_ary.dtype,
3110
+ allocator=criterion.allocator,
3111
+ strides=out_strides)
3112
+
3113
+ event1 = _if_positive(out, criterion, then_ary, else_ary, queue=queue)
3114
+ out.add_event(event1)
3115
+
3116
+ return out
3117
+
3118
+ # }}}
3119
+
3120
+
3121
+ # {{{ minimum/maximum
3122
+
3123
+ @elwise_kernel_runner
3124
+ def _minimum_maximum_backend(out, a, b, minmax):
3125
+ from pyopencl.elementwise import get_minmaximum_kernel
3126
+ return get_minmaximum_kernel(out.context, minmax,
3127
+ out.dtype,
3128
+ a.dtype if isinstance(a, Array) else np.dtype(type(a)),
3129
+ b.dtype if isinstance(b, Array) else np.dtype(type(b)),
3130
+ elementwise.get_argument_kind(a),
3131
+ elementwise.get_argument_kind(b))
3132
+
3133
+
3134
+ def maximum(a, b, out=None, queue: cl.CommandQueue | None = None):
3135
+ """Return the elementwise maximum of *a* and *b*."""
3136
+
3137
+ a_is_scalar = np.isscalar(a)
3138
+ b_is_scalar = np.isscalar(b)
3139
+ if a_is_scalar and b_is_scalar:
3140
+ result = np.maximum(a, b)
3141
+ if out is not None:
3142
+ out[...] = result
3143
+ return out
3144
+
3145
+ return result
3146
+
3147
+ queue = queue or a.queue or b.queue
3148
+
3149
+ if out is None:
3150
+ out_dtype = _get_common_dtype(a, b, queue)
3151
+ if not a_is_scalar:
3152
+ out = a._new_like_me(out_dtype, queue)
3153
+ elif not b_is_scalar:
3154
+ out = b._new_like_me(out_dtype, queue)
3155
+
3156
+ out.add_event(_minimum_maximum_backend(out, a, b, queue=queue, minmax="max"))
3157
+
3158
+ return out
3159
+
3160
+
3161
+ def minimum(a, b, out=None, queue: cl.CommandQueue | None = None):
3162
+ """Return the elementwise minimum of *a* and *b*."""
3163
+ a_is_scalar = np.isscalar(a)
3164
+ b_is_scalar = np.isscalar(b)
3165
+ if a_is_scalar and b_is_scalar:
3166
+ result = np.minimum(a, b)
3167
+ if out is not None:
3168
+ out[...] = result
3169
+ return out
3170
+
3171
+ return result
3172
+
3173
+ queue = queue or a.queue or b.queue
3174
+
3175
+ if out is None:
3176
+ out_dtype = _get_common_dtype(a, b, queue)
3177
+ if not a_is_scalar:
3178
+ out = a._new_like_me(out_dtype, queue)
3179
+ elif not b_is_scalar:
3180
+ out = b._new_like_me(out_dtype, queue)
3181
+
3182
+ out.add_event(_minimum_maximum_backend(out, a, b, queue=queue, minmax="min"))
3183
+
3184
+ return out
3185
+
3186
+ # }}}
3187
+
3188
+
3189
+ # {{{ logical ops
3190
+
3191
+ def _logical_op(x1: Array | ScalarLike,
3192
+ x2: Array | ScalarLike,
3193
+ out: Array | None,
3194
+ operator: str,
3195
+ queue: cl.CommandQueue | None = None) -> Array:
3196
+ # NOTE: Copied from pycuda.gpuarray
3197
+ assert operator in ["&&", "||"]
3198
+
3199
+ if np.isscalar(x1) and np.isscalar(x2):
3200
+ if out is None:
3201
+ out = empty(queue, shape=(), dtype=np.int8)
3202
+
3203
+ if operator == "&&":
3204
+ out[:] = np.logical_and(x1, x2)
3205
+ else:
3206
+ out[:] = np.logical_or(x1, x2)
3207
+ elif np.isscalar(x1) or np.isscalar(x2):
3208
+ scalar_arg, = (x for x in (x1, x2) if np.isscalar(x))
3209
+ ary_arg, = (x for x in (x1, x2) if not np.isscalar(x))
3210
+ queue = queue or ary_arg.queue
3211
+ allocator = ary_arg.allocator
3212
+
3213
+ if not isinstance(ary_arg, Array):
3214
+ raise ValueError("logical_and can take either scalar or Array"
3215
+ " as inputs")
3216
+
3217
+ out = out or ary_arg._new_like_me(dtype=np.int8)
3218
+
3219
+ assert queue is not None
3220
+ assert out.shape == ary_arg.shape and out.dtype == np.int8
3221
+
3222
+ knl = elementwise.get_array_scalar_binop_kernel(
3223
+ queue.context,
3224
+ operator,
3225
+ out.dtype,
3226
+ ary_arg.dtype,
3227
+ np.dtype(type(scalar_arg))
3228
+ )
3229
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, ary_arg, scalar_arg)
3230
+ else:
3231
+ if not (isinstance(x1, Array) and isinstance(x2, Array)):
3232
+ raise ValueError("logical_or/logical_and can take either scalar"
3233
+ " or Arrays as inputs")
3234
+ if x1.shape != x2.shape:
3235
+ raise NotImplementedError("Broadcasting not supported")
3236
+
3237
+ queue = queue or x1.queue or x2.queue
3238
+ allocator = x1.allocator or x2.allocator
3239
+
3240
+ if out is None:
3241
+ out = empty(queue, allocator=allocator,
3242
+ shape=x1.shape, dtype=np.int8)
3243
+
3244
+ assert queue is not None
3245
+ assert out.shape == x1.shape and out.dtype == np.int8
3246
+
3247
+ knl = elementwise.get_array_binop_kernel(
3248
+ queue.context,
3249
+ operator,
3250
+ out.dtype,
3251
+ x1.dtype, x2.dtype)
3252
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, x1, x2)
3253
+
3254
+ return out
3255
+
3256
+
3257
+ def logical_and(x1, x2, /, out=None, queue: cl.CommandQueue | None = None):
3258
+ """
3259
+ Returns the element-wise logical AND of *x1* and *x2*.
3260
+ """
3261
+ return _logical_op(x1, x2, out, "&&", queue=queue)
3262
+
3263
+
3264
+ def logical_or(x1, x2, /, out=None, queue: cl.CommandQueue | None = None):
3265
+ """
3266
+ Returns the element-wise logical OR of *x1* and *x2*.
3267
+ """
3268
+ return _logical_op(x1, x2, out, "||", queue=queue)
3269
+
3270
+
3271
+ def logical_not(x, /, out=None, queue: cl.CommandQueue | None = None):
3272
+ """
3273
+ Returns the element-wise logical NOT of *x*.
3274
+ """
3275
+ if np.isscalar(x):
3276
+ out = out or empty(queue, shape=(), dtype=np.int8)
3277
+ out[:] = np.logical_not(x)
3278
+ else:
3279
+ queue = queue or x.queue
3280
+ out = out or empty(queue, shape=x.shape, dtype=np.int8,
3281
+ allocator=x.allocator)
3282
+ knl = elementwise.get_logical_not_kernel(queue.context,
3283
+ x.dtype)
3284
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, x)
3285
+
3286
+ return out
3287
+
3288
+ # }}}
3289
+
3290
+
3291
+ # {{{ reductions
3292
+
3293
+ def sum(
3294
+ a,
3295
+ dtype=None,
3296
+ queue: cl.CommandQueue | None = None,
3297
+ slice=None,
3298
+ initial=_NoValue):
3299
+ """
3300
+ .. versionadded:: 2011.1
3301
+ """
3302
+ if initial is not _NoValue and not isinstance(initial, SCALAR_CLASSES):
3303
+ raise ValueError("'initial' is not a scalar")
3304
+
3305
+ if dtype is not None:
3306
+ dtype = np.dtype(dtype)
3307
+
3308
+ from pyopencl.reduction import get_sum_kernel
3309
+ krnl = get_sum_kernel(a.context, dtype, a.dtype)
3310
+ result, event1 = krnl(a, queue=queue, slice=slice, wait_for=a.events,
3311
+ return_event=True)
3312
+ result.add_event(event1)
3313
+
3314
+ # NOTE: neutral element in `get_sum_kernel` is 0 by default
3315
+ if initial is not _NoValue:
3316
+ result += a.dtype.type(initial)
3317
+
3318
+ return result
3319
+
3320
+
3321
+ def any(a, queue: cl.CommandQueue | None = None, wait_for: cl.WaitList = None):
3322
+ if len(a) == 0:
3323
+ return _BOOL_DTYPE.type(False)
3324
+
3325
+ return a.any(queue=queue, wait_for=wait_for)
3326
+
3327
+
3328
+ def all(a, queue: cl.CommandQueue | None = None, wait_for: cl.WaitList = None):
3329
+ if len(a) == 0:
3330
+ return _BOOL_DTYPE.type(True)
3331
+
3332
+ return a.all(queue=queue, wait_for=wait_for)
3333
+
3334
+
3335
+ def dot(a, b, dtype=None, queue: cl.CommandQueue | None = None, slice=None):
3336
+ """
3337
+ .. versionadded:: 2011.1
3338
+ """
3339
+ if dtype is not None:
3340
+ dtype = np.dtype(dtype)
3341
+
3342
+ from pyopencl.reduction import get_dot_kernel
3343
+ krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype)
3344
+
3345
+ result, event1 = krnl(a, b, queue=queue, slice=slice,
3346
+ wait_for=a.events + b.events, return_event=True)
3347
+ result.add_event(event1)
3348
+
3349
+ return result
3350
+
3351
+
3352
+ def vdot(a, b, dtype=None, queue: cl.CommandQueue | None = None, slice=None):
3353
+ """Like :func:`numpy.vdot`.
3354
+
3355
+ .. versionadded:: 2013.1
3356
+ """
3357
+ if dtype is not None:
3358
+ dtype = np.dtype(dtype)
3359
+
3360
+ from pyopencl.reduction import get_dot_kernel
3361
+ krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype,
3362
+ conjugate_first=True)
3363
+
3364
+ result, event1 = krnl(a, b, queue=queue, slice=slice,
3365
+ wait_for=a.events + b.events, return_event=True)
3366
+ result.add_event(event1)
3367
+
3368
+ return result
3369
+
3370
+
3371
+ def subset_dot(
3372
+ subset,
3373
+ a,
3374
+ b,
3375
+ dtype=None,
3376
+ queue: cl.CommandQueue | None = None,
3377
+ slice=None):
3378
+ """
3379
+ .. versionadded:: 2011.1
3380
+ """
3381
+ if dtype is not None:
3382
+ dtype = np.dtype(dtype)
3383
+
3384
+ from pyopencl.reduction import get_subset_dot_kernel
3385
+ krnl = get_subset_dot_kernel(
3386
+ a.context, dtype, subset.dtype, a.dtype, b.dtype)
3387
+
3388
+ result, event1 = krnl(subset, a, b, queue=queue, slice=slice,
3389
+ wait_for=subset.events + a.events + b.events, return_event=True)
3390
+ result.add_event(event1)
3391
+
3392
+ return result
3393
+
3394
+
3395
+ def _make_minmax_kernel(what):
3396
+ def f(a, queue: cl.CommandQueue | None = None, initial=_NoValue):
3397
+ if isinstance(a, SCALAR_CLASSES):
3398
+ return np.array(a).dtype.type(a)
3399
+
3400
+ if len(a) == 0:
3401
+ if initial is _NoValue:
3402
+ raise ValueError(
3403
+ f"zero-size array to reduction '{what}' "
3404
+ "which has no identity")
3405
+ else:
3406
+ return initial
3407
+
3408
+ if initial is not _NoValue and not isinstance(initial, SCALAR_CLASSES):
3409
+ raise ValueError("'initial' is not a scalar")
3410
+
3411
+ from pyopencl.reduction import get_minmax_kernel
3412
+ krnl = get_minmax_kernel(a.context, what, a.dtype)
3413
+ result, event1 = krnl(a, queue=queue, wait_for=a.events,
3414
+ return_event=True)
3415
+ result.add_event(event1)
3416
+
3417
+ if initial is not _NoValue:
3418
+ initial = a.dtype.type(initial)
3419
+ if what == "min":
3420
+ result = minimum(result, initial, queue=queue)
3421
+ elif what == "max":
3422
+ result = maximum(result, initial, queue=queue)
3423
+ else:
3424
+ raise ValueError(f"unknown minmax reduction type: '{what}'")
3425
+
3426
+ return result
3427
+
3428
+ return f
3429
+
3430
+
3431
+ min = _make_minmax_kernel("min")
3432
+ min.__name__ = "min"
3433
+ min.__doc__ = """
3434
+ .. versionadded:: 2011.1
3435
+ """
3436
+
3437
+ max = _make_minmax_kernel("max")
3438
+ max.__name__ = "max"
3439
+ max.__doc__ = """
3440
+ .. versionadded:: 2011.1
3441
+ """
3442
+
3443
+
3444
+ def _make_subset_minmax_kernel(what):
3445
+ def f(subset, a, queue: cl.CommandQueue | None = None, slice=None):
3446
+ from pyopencl.reduction import get_subset_minmax_kernel
3447
+ krnl = get_subset_minmax_kernel(a.context, what, a.dtype, subset.dtype)
3448
+ result, event1 = krnl(subset, a, queue=queue, slice=slice,
3449
+ wait_for=a.events + subset.events, return_event=True)
3450
+ result.add_event(event1)
3451
+ return result
3452
+ return f
3453
+
3454
+
3455
+ subset_min = _make_subset_minmax_kernel("min")
3456
+ subset_min.__doc__ = """.. versionadded:: 2011.1"""
3457
+ subset_max = _make_subset_minmax_kernel("max")
3458
+ subset_max.__doc__ = """.. versionadded:: 2011.1"""
3459
+
3460
+ # }}}
3461
+
3462
+
3463
+ # {{{ scans
3464
+
3465
+ def cumsum(a, output_dtype=None, queue: cl.CommandQueue | None = None,
3466
+ wait_for: cl.WaitList = None, return_event=False):
3467
+ # undocumented for now
3468
+
3469
+ """
3470
+ .. versionadded:: 2013.1
3471
+ """
3472
+
3473
+ if output_dtype is None:
3474
+ output_dtype = a.dtype
3475
+ else:
3476
+ output_dtype = np.dtype(output_dtype)
3477
+
3478
+ if wait_for is None:
3479
+ wait_for = []
3480
+
3481
+ result = a._new_like_me(output_dtype)
3482
+
3483
+ from pyopencl.scan import get_cumsum_kernel
3484
+ krnl = get_cumsum_kernel(a.context, a.dtype, output_dtype)
3485
+ evt = krnl(a, result, queue=queue, wait_for=wait_for + a.events)
3486
+ result.add_event(evt)
3487
+
3488
+ if return_event:
3489
+ return evt, result
3490
+ else:
3491
+ return result
3492
+
3493
+ # }}}
3494
+
3495
+
3496
+ __all__ = [
3497
+ "Allocator",
3498
+ "Array",
3499
+ "all",
3500
+ "any",
3501
+ "arange",
3502
+ "as_strided",
3503
+ "concatenate",
3504
+ "cumsum",
3505
+ "diff",
3506
+ "dot",
3507
+ "empty_like",
3508
+ "hstack",
3509
+ "if_positive",
3510
+ "logical_and",
3511
+ "logical_not",
3512
+ "logical_or",
3513
+ "maximum",
3514
+ "minimum",
3515
+ "multi_put",
3516
+ "multi_take",
3517
+ "multi_take_put",
3518
+ "reshape",
3519
+ "stack",
3520
+ "subset_dot",
3521
+ "sum",
3522
+ "take",
3523
+ "to_device",
3524
+ "transpose",
3525
+ "vdot",
3526
+ "zeros",
3527
+ "zeros_like",
3528
+ ]
3529
+
3530
+ # vim: foldmethod=marker