pyopencl 2025.2.5__cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (47) hide show
  1. pyopencl/.libs/libOpenCL-83a5a7fd.so.1.0.0 +0 -0
  2. pyopencl/__init__.py +1995 -0
  3. pyopencl/_cl.cpython-312-x86_64-linux-gnu.so +0 -0
  4. pyopencl/_cl.pyi +2006 -0
  5. pyopencl/_cluda.py +57 -0
  6. pyopencl/_monkeypatch.py +1069 -0
  7. pyopencl/_mymako.py +17 -0
  8. pyopencl/algorithm.py +1454 -0
  9. pyopencl/array.py +3441 -0
  10. pyopencl/bitonic_sort.py +245 -0
  11. pyopencl/bitonic_sort_templates.py +597 -0
  12. pyopencl/cache.py +535 -0
  13. pyopencl/capture_call.py +200 -0
  14. pyopencl/characterize/__init__.py +463 -0
  15. pyopencl/characterize/performance.py +240 -0
  16. pyopencl/cl/pyopencl-airy.cl +324 -0
  17. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  18. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  19. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  20. pyopencl/cl/pyopencl-complex.h +303 -0
  21. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  22. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  23. pyopencl/cl/pyopencl-random123/array.h +325 -0
  24. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  25. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  26. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  27. pyopencl/clmath.py +282 -0
  28. pyopencl/clrandom.py +412 -0
  29. pyopencl/cltypes.py +202 -0
  30. pyopencl/compyte/.gitignore +21 -0
  31. pyopencl/compyte/__init__.py +0 -0
  32. pyopencl/compyte/array.py +241 -0
  33. pyopencl/compyte/dtypes.py +316 -0
  34. pyopencl/compyte/pyproject.toml +52 -0
  35. pyopencl/elementwise.py +1178 -0
  36. pyopencl/invoker.py +417 -0
  37. pyopencl/ipython_ext.py +70 -0
  38. pyopencl/py.typed +0 -0
  39. pyopencl/reduction.py +815 -0
  40. pyopencl/scan.py +1916 -0
  41. pyopencl/tools.py +1565 -0
  42. pyopencl/typing.py +61 -0
  43. pyopencl/version.py +11 -0
  44. pyopencl-2025.2.5.dist-info/METADATA +109 -0
  45. pyopencl-2025.2.5.dist-info/RECORD +47 -0
  46. pyopencl-2025.2.5.dist-info/WHEEL +6 -0
  47. pyopencl-2025.2.5.dist-info/licenses/LICENSE +104 -0
pyopencl/array.py ADDED
@@ -0,0 +1,3441 @@
1
+ """CL device arrays."""
2
+
3
+ # NOTE: for elwise_kernel_runner which adds keyword arguments
4
+ # pylint:disable=unexpected-keyword-arg
5
+ from __future__ import annotations
6
+
7
+
8
+ __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
9
+
10
+ __license__ = """
11
+ Permission is hereby granted, free of charge, to any person
12
+ obtaining a copy of this software and associated documentation
13
+ files (the "Software"), to deal in the Software without
14
+ restriction, including without limitation the rights to use,
15
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the
17
+ Software is furnished to do so, subject to the following
18
+ conditions:
19
+
20
+ The above copyright notice and this permission notice shall be
21
+ included in all copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
25
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
27
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
28
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
30
+ OTHER DEALINGS IN THE SOFTWARE.
31
+ """
32
+
33
+ import builtins
34
+ from dataclasses import dataclass
35
+ from functools import reduce
36
+ from numbers import Number
37
+ from typing import (
38
+ TYPE_CHECKING,
39
+ Any,
40
+ ClassVar,
41
+ Concatenate,
42
+ Literal,
43
+ ParamSpec,
44
+ cast,
45
+ )
46
+ from warnings import warn
47
+
48
+ import numpy as np
49
+ from typing_extensions import Self, override
50
+
51
+ import pyopencl as cl
52
+ import pyopencl.elementwise as elementwise
53
+ from pyopencl import cltypes
54
+ from pyopencl.characterize import has_double_support
55
+ from pyopencl.compyte.array import (
56
+ ArrayFlags as _ArrayFlags,
57
+ as_strided as _as_strided,
58
+ c_contiguous_strides as _c_contiguous_strides,
59
+ equal_strides as _equal_strides,
60
+ f_contiguous_strides as _f_contiguous_strides,
61
+ )
62
+ from pyopencl.typing import Allocator
63
+
64
+
65
+ if TYPE_CHECKING:
66
+ from collections.abc import Callable, Hashable
67
+
68
+ from numpy.typing import DTypeLike, NDArray
69
+
70
+
71
+ SCALAR_CLASSES = (Number, np.bool_, bool)
72
+
73
+ if cl.get_cl_header_version() >= (2, 0):
74
+ _SVMPointer_or_nothing = cl.SVMPointer
75
+ else:
76
+ _SVMPointer_or_nothing = ()
77
+
78
+
79
+ class _NoValue:
80
+ pass
81
+
82
+
83
+ # {{{ _get_common_dtype
84
+
85
+ class DoubleDowncastWarning(UserWarning):
86
+ pass
87
+
88
+
89
+ _DOUBLE_DOWNCAST_WARNING = (
90
+ "The operation you requested would result in a double-precision "
91
+ "quantity according to numpy semantics. Since your device does not "
92
+ "support double precision, a single-precision quantity is being returned.")
93
+
94
+
95
+ def _get_common_dtype(obj1, obj2, queue):
96
+ if queue is None:
97
+ raise ValueError("PyOpenCL array has no queue; call .with_queue() to "
98
+ "add one in order to be able to perform operations")
99
+
100
+ # Note: We are calling np.result_type with pyopencl arrays here.
101
+ # Luckily, np.result_type only looks at the dtype of input arrays up until
102
+ # at least numpy v2.1.
103
+ result = np.result_type(obj1, obj2)
104
+
105
+ if not has_double_support(queue.device):
106
+ if result == np.float64:
107
+ result = np.dtype(np.float32)
108
+ warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
109
+ elif result == np.complex128:
110
+ result = np.dtype(np.complex64)
111
+ warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
112
+
113
+ return result
114
+
115
+ # }}}
116
+
117
+
118
+ # {{{ _get_truedivide_dtype
119
+
120
+ def _get_truedivide_dtype(obj1, obj2, queue):
121
+ # the dtype of the division result obj1 / obj2
122
+
123
+ allow_double = has_double_support(queue.device)
124
+
125
+ x1 = obj1 if np.isscalar(obj1) else np.ones(1, obj1.dtype)
126
+ x2 = obj2 if np.isscalar(obj2) else np.ones(1, obj2.dtype)
127
+
128
+ result = (x1/x2).dtype
129
+
130
+ if not allow_double:
131
+ if result == np.float64:
132
+ result = np.dtype(np.float32)
133
+ elif result == np.complex128:
134
+ result = np.dtype(np.complex64)
135
+
136
+ return result
137
+
138
+ # }}}
139
+
140
+
141
+ # {{{ _get_broadcasted_binary_op_result
142
+
143
+ def _get_broadcasted_binary_op_result(obj1, obj2, cq,
144
+ dtype_getter=_get_common_dtype):
145
+
146
+ if obj1.shape == obj2.shape:
147
+ return obj1._new_like_me(dtype_getter(obj1, obj2, cq),
148
+ cq)
149
+ elif obj1.shape == ():
150
+ return obj2._new_like_me(dtype_getter(obj1, obj2, cq),
151
+ cq)
152
+ elif obj2.shape == ():
153
+ return obj1._new_like_me(dtype_getter(obj1, obj2, cq),
154
+ cq)
155
+ else:
156
+ raise NotImplementedError("Broadcasting binary operator with shapes:"
157
+ f" {obj1.shape}, {obj2.shape}.")
158
+
159
+ # }}}
160
+
161
+
162
+ # {{{ VecLookupWarner
163
+
164
+ class VecLookupWarner:
165
+ def __getattr__(self, name):
166
+ warn("pyopencl.array.vec is deprecated. "
167
+ "Please use pyopencl.cltypes for OpenCL vector and scalar types",
168
+ DeprecationWarning, stacklevel=2)
169
+
170
+ if name == "types":
171
+ name = "vec_types"
172
+ elif name == "type_to_scalar_and_count":
173
+ name = "vec_type_to_scalar_and_count"
174
+
175
+ return getattr(cltypes, name)
176
+
177
+
178
+ vec = VecLookupWarner()
179
+
180
+ # }}}
181
+
182
+
183
+ # {{{ helper functionality
184
+
185
+ def _splay(
186
+ device: cl.Device,
187
+ n: int,
188
+ kernel_specific_max_wg_size: int | None = None,
189
+ ):
190
+ max_work_items = builtins.min(128, device.max_work_group_size)
191
+
192
+ if kernel_specific_max_wg_size is not None:
193
+ max_work_items = builtins.min(max_work_items, kernel_specific_max_wg_size)
194
+
195
+ min_work_items = builtins.min(32, max_work_items)
196
+ max_groups = device.max_compute_units * 4 * 8
197
+ # 4 to overfill the device
198
+ # 8 is an Nvidia constant--that's how many
199
+ # groups fit onto one compute device
200
+
201
+ if n < min_work_items:
202
+ group_count = 1
203
+ work_items_per_group = min_work_items
204
+ elif n < (max_groups * min_work_items):
205
+ group_count = (n + min_work_items - 1) // min_work_items
206
+ work_items_per_group = min_work_items
207
+ elif n < (max_groups * max_work_items):
208
+ group_count = max_groups
209
+ grp = (n + min_work_items - 1) // min_work_items
210
+ work_items_per_group = (
211
+ (grp + max_groups - 1) // max_groups) * min_work_items
212
+ else:
213
+ group_count = max_groups
214
+ work_items_per_group = max_work_items
215
+
216
+ # print("n:%d gc:%d wipg:%d" % (n, group_count, work_items_per_group))
217
+ return (group_count*work_items_per_group,), (work_items_per_group,)
218
+
219
+
220
+ # deliberately undocumented for now
221
+ ARRAY_KERNEL_EXEC_HOOK = None
222
+
223
+
224
+ P = ParamSpec("P")
225
+
226
+
227
+ def elwise_kernel_runner(
228
+ kernel_getter: Callable[Concatenate[Array, P], cl.Kernel]
229
+ ) -> Callable[Concatenate[Array, P], cl.Event]:
230
+ """Take a kernel getter of the same signature as the kernel
231
+ and return a function that invokes that kernel.
232
+
233
+ Assumes that the zeroth entry in *args* is an :class:`Array`.
234
+ """
235
+ from functools import wraps
236
+
237
+ @wraps(kernel_getter)
238
+ def kernel_runner(out: Array, *args: P.args, **kwargs: P.kwargs) -> cl.Event:
239
+ assert isinstance(out, Array)
240
+
241
+ wait_for = cast("cl.WaitList", kwargs.pop("wait_for", None))
242
+ queue = cast("cl.CommandQueue | None", kwargs.pop("queue", None))
243
+ if queue is None:
244
+ queue = out.queue
245
+
246
+ assert queue is not None
247
+
248
+ knl = kernel_getter(out, *args, **kwargs)
249
+ work_group_info = cast("int", knl.get_work_group_info(
250
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
251
+ queue.device))
252
+ gs, ls = out._get_sizes(queue, work_group_info)
253
+
254
+ knl_args = (out, *args, out.size)
255
+ if ARRAY_KERNEL_EXEC_HOOK is not None:
256
+ return ARRAY_KERNEL_EXEC_HOOK( # pylint: disable=not-callable
257
+ knl, queue, gs, ls, *knl_args, wait_for=wait_for)
258
+ else:
259
+ return knl(queue, gs, ls, *knl_args, wait_for=wait_for)
260
+
261
+ return kernel_runner
262
+
263
+ # }}}
264
+
265
+
266
+ # {{{ array class
267
+
268
+ class InconsistentOpenCLQueueWarning(UserWarning):
269
+ pass
270
+
271
+
272
+ class ArrayHasOffsetError(ValueError):
273
+ """
274
+ .. versionadded:: 2013.1
275
+ """
276
+
277
+ def __init__(self, val="The operation you are attempting does not yet "
278
+ "support arrays that start at an offset from the beginning "
279
+ "of their buffer."):
280
+ ValueError.__init__(self, val)
281
+
282
+
283
+ class _copy_queue: # noqa: N801
284
+ pass
285
+
286
+
287
+ _ARRAY_GET_SIZES_CACHE: \
288
+ dict[Hashable, tuple[tuple[int, ...], tuple[int, ...]]] = {}
289
+ _BOOL_DTYPE = np.dtype(np.int8)
290
+ _NOT_PRESENT = object()
291
+
292
+
293
+ class Array:
294
+ """A :class:`numpy.ndarray` work-alike that stores its data and performs
295
+ its computations on the compute device. :attr:`shape` and :attr:`dtype` work
296
+ exactly as in :mod:`numpy`. Arithmetic methods in :class:`Array` support the
297
+ broadcasting of scalars. (e.g. ``array + 5``).
298
+
299
+ *cq* must be a :class:`~pyopencl.CommandQueue` or a :class:`~pyopencl.Context`.
300
+
301
+ If it is a queue, *cq* specifies the queue in which the array carries out
302
+ its computations by default. If a default queue (and thereby overloaded
303
+ operators and many other niceties) are not desired, pass a
304
+ :class:`~pyopencl.Context`.
305
+
306
+ *allocator* may be *None* or a callable that, upon being called with an
307
+ argument of the number of bytes to be allocated, returns a
308
+ :class:`pyopencl.Buffer` object. (A :class:`pyopencl.tools.MemoryPool`
309
+ instance is one useful example of an object to pass here.)
310
+
311
+ .. versionchanged:: 2011.1
312
+
313
+ Renamed *context* to *cqa*, made it general-purpose.
314
+
315
+ All arguments beyond *order* should be considered keyword-only.
316
+
317
+ .. versionchanged:: 2015.2
318
+
319
+ Renamed *context* to *cq*, disallowed passing allocators through it.
320
+
321
+ .. attribute :: data
322
+
323
+ The :class:`pyopencl.MemoryObject` instance created for the memory that
324
+ backs this :class:`Array`.
325
+
326
+ .. versionchanged:: 2013.1
327
+
328
+ If a non-zero :attr:`offset` has been specified for this array,
329
+ this will fail with :exc:`ArrayHasOffsetError`.
330
+
331
+ .. attribute :: base_data
332
+
333
+ The :class:`pyopencl.MemoryObject` instance created for the memory that
334
+ backs this :class:`Array`. Unlike :attr:`data`, the base address of
335
+ *base_data* is allowed to be different from the beginning of the array.
336
+ The actual beginning is the base address of *base_data* plus
337
+ :attr:`offset` bytes.
338
+
339
+ Unlike :attr:`data`, retrieving :attr:`base_data` always succeeds.
340
+
341
+ .. versionadded:: 2013.1
342
+
343
+ .. attribute :: offset
344
+
345
+ See :attr:`base_data`.
346
+
347
+ .. versionadded:: 2013.1
348
+
349
+ .. attribute :: shape
350
+
351
+ A tuple of lengths of each dimension in the array.
352
+
353
+ .. attribute :: ndim
354
+
355
+ The number of dimensions in :attr:`shape`.
356
+
357
+ .. attribute :: dtype
358
+
359
+ The :class:`numpy.dtype` of the items in the GPU array.
360
+
361
+ .. attribute :: size
362
+
363
+ The number of meaningful entries in the array. Can also be computed by
364
+ multiplying up the numbers in :attr:`shape`.
365
+
366
+ .. attribute :: nbytes
367
+
368
+ The size of the entire array in bytes. Computed as :attr:`size` times
369
+ ``dtype.itemsize``.
370
+
371
+ .. attribute :: strides
372
+
373
+ A tuple of bytes to step in each dimension when traversing an array.
374
+
375
+ .. attribute :: flags
376
+
377
+ An object with attributes ``c_contiguous``, ``f_contiguous`` and
378
+ ``forc``, which may be used to query contiguity properties in analogy to
379
+ :attr:`numpy.ndarray.flags`.
380
+
381
+ .. rubric:: Methods
382
+
383
+ .. automethod :: with_queue
384
+
385
+ .. automethod :: __len__
386
+ .. automethod :: reshape
387
+ .. automethod :: ravel
388
+ .. automethod :: view
389
+ .. automethod :: squeeze
390
+ .. automethod :: transpose
391
+ .. attribute :: T
392
+ .. automethod :: set
393
+ .. automethod :: get
394
+ .. automethod :: get_async
395
+ .. automethod :: copy
396
+
397
+ .. automethod :: __str__
398
+ .. automethod :: __repr__
399
+
400
+ .. automethod :: mul_add
401
+ .. automethod :: __add__
402
+ .. automethod :: __sub__
403
+ .. automethod :: __iadd__
404
+ .. automethod :: __isub__
405
+ .. automethod :: __pos__
406
+ .. automethod :: __neg__
407
+ .. automethod :: __mul__
408
+ .. automethod :: __div__
409
+ .. automethod :: __rdiv__
410
+ .. automethod :: __pow__
411
+
412
+ .. automethod :: __and__
413
+ .. automethod :: __xor__
414
+ .. automethod :: __or__
415
+ .. automethod :: __iand__
416
+ .. automethod :: __ixor__
417
+ .. automethod :: __ior__
418
+
419
+ .. automethod :: __abs__
420
+ .. automethod :: __invert__
421
+
422
+ .. UNDOC reverse()
423
+
424
+ .. automethod :: fill
425
+
426
+ .. automethod :: astype
427
+
428
+ .. autoattribute :: real
429
+ .. autoattribute :: imag
430
+ .. automethod :: conj
431
+ .. automethod :: conjugate
432
+
433
+ .. automethod :: __getitem__
434
+ .. automethod :: __setitem__
435
+
436
+ .. automethod :: setitem
437
+
438
+ .. automethod :: map_to_host
439
+
440
+ .. rubric:: Comparisons, conditionals, any, all
441
+
442
+ .. versionadded:: 2013.2
443
+
444
+ Boolean arrays are stored as :class:`numpy.int8` because ``bool``
445
+ has an unspecified size in the OpenCL spec.
446
+
447
+ .. automethod :: __bool__
448
+
449
+ Only works for device scalars. (i.e. "arrays" with ``shape == ()``)
450
+
451
+ .. automethod :: any
452
+ .. automethod :: all
453
+
454
+ .. automethod :: __eq__
455
+ .. automethod :: __ne__
456
+ .. automethod :: __lt__
457
+ .. automethod :: __le__
458
+ .. automethod :: __gt__
459
+ .. automethod :: __ge__
460
+
461
+ .. rubric:: Event management
462
+
463
+ If an array is used from within an out-of-order queue, it needs to take
464
+ care of its own operation ordering. The facilities in this section make
465
+ this possible.
466
+
467
+ .. versionadded:: 2014.1.1
468
+
469
+ .. attribute:: events
470
+
471
+ A list of :class:`pyopencl.Event` instances that the current content of
472
+ this array depends on. User code may read, but should never modify this
473
+ list directly. To update this list, instead use the following methods.
474
+
475
+ .. automethod:: add_event
476
+ .. automethod:: finish
477
+ """
478
+
479
+ __array_priority__: ClassVar[int] = 100
480
+
481
+ queue: cl.CommandQueue | None
482
+ shape: tuple[int, ...]
483
+ dtype: np.dtype[Any]
484
+ strides: tuple[int, ...]
485
+ events: list[cl.Event]
486
+ nbytes: int
487
+ size: int
488
+ allocator: Allocator | None
489
+ base_data: cl.MemoryObjectHolder | cl.SVMPointer | None
490
+
491
+ def __init__(
492
+ self,
493
+ cq: cl.Context | cl.CommandQueue | None,
494
+ shape: tuple[int, ...] | int,
495
+ dtype: DTypeLike,
496
+ order: str = "C",
497
+ allocator: Allocator | None = None,
498
+ data: Any = None,
499
+ offset: int = 0,
500
+ strides: tuple[int, ...] | None = None,
501
+ events: list[cl.Event] | None = None,
502
+
503
+ # NOTE: following args are used for the fast constructor
504
+ _flags: Any = None,
505
+ _fast: bool = False,
506
+ _size: int | None = None,
507
+ _context: cl.Context | None = None,
508
+ _queue: cl.CommandQueue | None = None) -> None:
509
+ if _fast:
510
+ # Assumptions, should be disabled if not testing
511
+ if TYPE_CHECKING:
512
+ assert cq is None
513
+ assert isinstance(_context, cl.Context)
514
+ assert _queue is None or isinstance(_queue, cl.CommandQueue)
515
+ assert isinstance(shape, tuple)
516
+ assert isinstance(strides, tuple)
517
+ assert isinstance(dtype, np.dtype)
518
+ assert _size is not None
519
+
520
+ size = _size
521
+ context = _context
522
+ queue = _queue
523
+ alloc_nbytes = dtype.itemsize * size
524
+
525
+ else:
526
+ # {{{ backward compatibility
527
+
528
+ if cq is None:
529
+ context = _context
530
+ queue = _queue
531
+
532
+ elif isinstance(cq, cl.CommandQueue):
533
+ queue = cq
534
+ context = queue.context
535
+
536
+ elif isinstance(cq, cl.Context):
537
+ context = cq
538
+ queue = None
539
+
540
+ else:
541
+ raise TypeError(
542
+ f"cq may be a queue or a context, not '{type(cq).__name__}'")
543
+
544
+ if allocator is not None:
545
+ # "is" would be wrong because two Python objects are allowed
546
+ # to hold handles to the same context.
547
+
548
+ # FIXME It would be nice to check this. But it would require
549
+ # changing the allocator interface. Trust the user for now.
550
+
551
+ # assert allocator.context == context
552
+ pass
553
+
554
+ # Queue-less arrays do have a purpose in life.
555
+ # They don't do very much, but at least they don't run kernels
556
+ # in random queues.
557
+ #
558
+ # See also :meth:`with_queue`.
559
+
560
+ del cq
561
+
562
+ # }}}
563
+
564
+ # invariant here: allocator, queue set
565
+
566
+ # {{{ determine shape, size, and strides
567
+
568
+ dtype = np.dtype(dtype)
569
+
570
+ try:
571
+ shape = tuple(shape) # type: ignore[arg-type]
572
+ except TypeError as err:
573
+ if not isinstance(shape, (int, np.integer)):
574
+ raise TypeError(
575
+ "shape must either be iterable or castable to an integer: "
576
+ f"got a '{type(shape).__name__}'") from err
577
+
578
+ shape = (shape,)
579
+
580
+ shape_array = np.array(shape)
581
+
582
+ # Previously, the size was computed as
583
+ # "size = 1; size *= dim for dim in shape"
584
+ # However this can fail when using certain data types,
585
+ # eg numpy.uint64(1) * 2 returns 2.0 !
586
+ if np.any(shape_array < 0):
587
+ raise ValueError(f"negative dimensions are not allowed: {shape}")
588
+ if np.any([np.array([s]).dtype.kind not in ["u", "i"] for s in shape]):
589
+ raise ValueError(
590
+ "Invalid shape %s ; dimensions, must be integer" % (str(shape)))
591
+ size = np.prod(shape_array, dtype=np.uint64).item()
592
+
593
+ if strides is None:
594
+ if order in "cC":
595
+ # inlined from compyte.array.c_contiguous_strides
596
+ if shape:
597
+ strides_tmp = [dtype.itemsize]
598
+ for s in shape[:0:-1]:
599
+ # NOTE: https://github.com/inducer/compyte/pull/36
600
+ strides_tmp.append(strides_tmp[-1]*builtins.max(1, s))
601
+ strides = tuple(strides_tmp[::-1])
602
+ else:
603
+ strides = ()
604
+ elif order in "fF":
605
+ strides = _f_contiguous_strides(dtype.itemsize, shape)
606
+ else:
607
+ raise ValueError(f"invalid order: {order}")
608
+
609
+ else:
610
+ # FIXME: We should possibly perform some plausibility
611
+ # checking on 'strides' here.
612
+
613
+ strides = tuple(strides)
614
+
615
+ # }}}
616
+
617
+ assert dtype != object, \
618
+ "object arrays on the compute device are not allowed" # noqa: E721
619
+ assert isinstance(shape, tuple)
620
+ assert isinstance(strides, tuple)
621
+
622
+ alloc_nbytes = dtype.itemsize * size
623
+
624
+ if alloc_nbytes < 0:
625
+ raise ValueError("cannot allocate CL buffer with negative size")
626
+
627
+ self.queue = queue
628
+ self.shape = shape
629
+ self.dtype = dtype
630
+ self.strides = strides
631
+ self.events = [] if events is None else events
632
+ self.nbytes = alloc_nbytes
633
+ self.size = size
634
+ self.allocator = allocator
635
+
636
+ if data is None:
637
+ if alloc_nbytes == 0:
638
+ self.base_data = None
639
+
640
+ else:
641
+ if self.allocator is None:
642
+ if context is None and queue is not None:
643
+ context = queue.context
644
+
645
+ self.base_data = cl.Buffer(
646
+ context, cl.mem_flags.READ_WRITE, alloc_nbytes)
647
+ else:
648
+ self.base_data = self.allocator(alloc_nbytes)
649
+ else:
650
+ self.base_data = data
651
+
652
+ self.offset = offset
653
+ self.context = context
654
+ self._flags = _flags
655
+
656
+ if __debug__:
657
+ if queue is not None and isinstance(
658
+ self.base_data, _SVMPointer_or_nothing):
659
+ mem_queue = getattr(self.base_data, "_queue", _NOT_PRESENT)
660
+ if mem_queue is not _NOT_PRESENT and mem_queue != queue:
661
+ warn("Array has different queue from backing SVM memory. "
662
+ "This may lead to the array getting deallocated sooner "
663
+ "than expected, potentially leading to crashes.",
664
+ InconsistentOpenCLQueueWarning, stacklevel=2)
665
+
666
+ @property
667
+ def ndim(self):
668
+ return len(self.shape)
669
+
670
+ @property
671
+ def data(self):
672
+ if self.offset:
673
+ raise ArrayHasOffsetError()
674
+ else:
675
+ return self.base_data
676
+
677
+ @property
678
+ def flags(self):
679
+ f = self._flags
680
+ if f is None:
681
+ self._flags = f = _ArrayFlags(self)
682
+ return f
683
+
684
+ def _new_with_changes(self,
685
+ data: cl.MemoryObjectHolder | cl.SVMPointer | None,
686
+ offset: int | None,
687
+ shape: tuple[int, ...] | None = None,
688
+ dtype: np.dtype[Any] | None = None,
689
+ strides: tuple[int, ...] | None = None,
690
+ queue: cl.CommandQueue | type[_copy_queue] | None = _copy_queue,
691
+ allocator: Allocator | None = None,
692
+ ) -> Self:
693
+ """
694
+ :arg data: *None* means allocate a new array.
695
+ """
696
+ fast = True
697
+ size = self.size
698
+ if shape is None:
699
+ shape = self.shape
700
+ else:
701
+ fast = False
702
+ size = None
703
+
704
+ if dtype is None:
705
+ dtype = self.dtype
706
+ if strides is None:
707
+ strides = self.strides
708
+ if queue is _copy_queue:
709
+ queue = self.queue
710
+ if allocator is None:
711
+ allocator = self.allocator
712
+ if offset is None:
713
+ offset = self.offset
714
+
715
+ # If we're allocating new data, then there's not likely to be
716
+ # a data dependency. Otherwise, the two arrays should probably
717
+ # share the same events list.
718
+
719
+ if data is None:
720
+ events = None
721
+ else:
722
+ events = self.events
723
+
724
+ return self.__class__(None, shape, dtype, allocator=allocator,
725
+ strides=strides, data=data, offset=offset,
726
+ events=events,
727
+ _fast=fast, _context=self.context, _queue=queue, _size=size)
728
+
729
+ def with_queue(self, queue: cl.CommandQueue | None):
730
+ """Return a copy of *self* with the default queue set to *queue*.
731
+
732
+ *None* is allowed as a value for *queue*.
733
+
734
+ .. versionadded:: 2013.1
735
+ """
736
+
737
+ if queue is not None:
738
+ assert queue.context == self.context
739
+
740
+ return self._new_with_changes(self.base_data, self.offset,
741
+ queue=queue)
742
+
743
+ def _get_sizes(self,
744
+ queue: cl.CommandQueue,
745
+ kernel_specific_max_wg_size: int | None = None
746
+ ) -> tuple[tuple[int, ...], tuple[int, ...]]:
747
+ if not self.flags.forc:
748
+ raise NotImplementedError("cannot operate on non-contiguous array")
749
+ cache_key = (queue.device.int_ptr, self.size, kernel_specific_max_wg_size)
750
+ try:
751
+ return _ARRAY_GET_SIZES_CACHE[cache_key]
752
+ except KeyError:
753
+ sizes = _splay(queue.device, self.size,
754
+ kernel_specific_max_wg_size=kernel_specific_max_wg_size)
755
+ _ARRAY_GET_SIZES_CACHE[cache_key] = sizes
756
+ return sizes
757
+
758
+ def set(self,
759
+ ary: NDArray[Any],
760
+ queue: cl.CommandQueue | None = None,
761
+ async_: bool = False,
762
+ ):
763
+ """Transfer the contents the :class:`numpy.ndarray` object *ary*
764
+ onto the device.
765
+
766
+ *ary* must have the same dtype and size (not necessarily shape) as
767
+ *self*.
768
+
769
+ *async_* is a Boolean indicating whether the function is allowed
770
+ to return before the transfer completes. To avoid synchronization
771
+ bugs, this defaults to *False*.
772
+ """
773
+
774
+ assert ary.size == self.size
775
+ assert ary.dtype == self.dtype
776
+
777
+ if not ary.flags.forc:
778
+ raise RuntimeError("cannot set from non-contiguous array")
779
+
780
+ if not _equal_strides(ary.strides, self.strides, self.shape):
781
+ raise RuntimeError("Setting array from one with different "
782
+ "strides/storage order.")
783
+
784
+ if self.size:
785
+ queue = queue or self.queue
786
+ assert queue is not None
787
+ event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
788
+ dst_offset=self.offset,
789
+ is_blocking=not async_)
790
+
791
+ self.add_event(event1)
792
+
793
+ def _get(self,
794
+ queue: cl.CommandQueue | None = None,
795
+ ary: NDArray[Any] | None = None,
796
+ async_: bool = False,
797
+ ):
798
+ if ary is None:
799
+ ary = np.empty(self.shape, self.dtype)
800
+
801
+ if self.strides != ary.strides:
802
+ ary = _as_strided(ary, strides=self.strides)
803
+ else:
804
+ if ary.size != self.size:
805
+ raise TypeError("'ary' has non-matching size")
806
+ if ary.dtype != self.dtype:
807
+ raise TypeError("'ary' has non-matching type")
808
+
809
+ if self.shape != ary.shape:
810
+ warn("get() between arrays of different shape is deprecated "
811
+ "and will be removed in PyCUDA 2017.x",
812
+ DeprecationWarning, stacklevel=2)
813
+
814
+ assert self.flags.forc, "Array in get() must be contiguous"
815
+
816
+ queue = queue or self.queue
817
+ if queue is None:
818
+ raise ValueError("Cannot copy array to host. "
819
+ "Array has no queue. Use "
820
+ "'new_array = array.with_queue(queue)' "
821
+ "to associate one.")
822
+
823
+ if self.size:
824
+ assert self.base_data is not None
825
+ event1 = cast("cl.Event", cl.enqueue_copy(queue, ary, self.base_data,
826
+ src_offset=self.offset,
827
+ wait_for=self.events, is_blocking=not async_))
828
+
829
+ self.add_event(event1)
830
+ else:
831
+ event1 = cl.enqueue_marker(queue, wait_for=self.events)
832
+ if not async_:
833
+ event1.wait()
834
+
835
+ return ary, event1
836
+
837
+ def get(self,
838
+ queue: cl.CommandQueue | None = None,
839
+ ary: NDArray[Any] | None = None,
840
+ ) -> NDArray[Any]:
841
+ """Transfer the contents of *self* into *ary* or a newly allocated
842
+ :class:`numpy.ndarray`. If *ary* is given, it must have the same
843
+ shape and dtype.
844
+ """
845
+
846
+ ary, _event1 = self._get(queue=queue, ary=ary)
847
+
848
+ return ary
849
+
850
+ def get_async(self,
851
+ queue: cl.CommandQueue | None = None,
852
+ ary: NDArray[Any] | None = None,
853
+ ) -> tuple[NDArray[Any], cl.Event]:
854
+ """
855
+ Asynchronous version of :meth:`get` which returns a tuple ``(ary, event)``
856
+ containing the host array ``ary``
857
+ and the :class:`pyopencl.NannyEvent` ``event`` returned by
858
+ :meth:`pyopencl.enqueue_copy`.
859
+
860
+ .. versionadded:: 2019.1.2
861
+ """
862
+
863
+ return self._get(queue=queue, ary=ary, async_=True)
864
+
865
+ def copy(self, queue: cl.CommandQueue | type[_copy_queue] | None = _copy_queue):
866
+ """
867
+ :arg queue: The :class:`~pyopencl.CommandQueue` for the returned array.
868
+
869
+ .. versionchanged:: 2017.1.2
870
+
871
+ Updates the queue of the returned array.
872
+
873
+ .. versionadded:: 2013.1
874
+ """
875
+
876
+ if queue is _copy_queue:
877
+ queue_san = self.queue
878
+ else:
879
+ queue_san = cast("cl.CommandQueue | None", queue)
880
+
881
+ result = self._new_like_me(queue=queue_san)
882
+
883
+ # result.queue won't be the same as queue if queue is None.
884
+ # We force them to be the same here.
885
+ if result.queue is not queue:
886
+ result = result.with_queue(queue_san)
887
+
888
+ if not self.flags.forc:
889
+ raise RuntimeError("cannot copy non-contiguous array")
890
+
891
+ if self.nbytes:
892
+ queue_san = queue_san or self.queue
893
+ assert queue_san is not None
894
+ event1 = cl.enqueue_copy(queue_san,
895
+ result.base_data, self.base_data,
896
+ src_offset=self.offset, byte_count=self.nbytes,
897
+ wait_for=self.events)
898
+ result.add_event(event1)
899
+
900
+ return result
901
+
902
+ def __str__(self):
903
+ if self.queue is None:
904
+ return (f"<cl.{type(self).__name__} {self.shape} of {self.dtype} "
905
+ "without queue, call with_queue()>")
906
+
907
+ return str(self.get())
908
+
909
+ def __repr__(self):
910
+ if self.queue is None:
911
+ return (f"<cl.{type(self).__name__} {self.shape} of {self.dtype} "
912
+ f"at {id(self):x} without queue, call with_queue()>")
913
+
914
+ result = repr(self.get())
915
+ if result[:5] == "array":
916
+ result = f"cl.{type(self).__name__}" + result[5:]
917
+ else:
918
+ warn(
919
+ f"{type(result).__name__}.__repr__ was expected to return a "
920
+ f"string starting with 'array', got '{result[:10]!r}'",
921
+ stacklevel=2)
922
+
923
+ return result
924
+
925
+ def safely_stringify_for_pudb(self):
926
+ return f"cl.{type(self).__name__} {self.dtype} {self.shape}"
927
+
928
+ def __hash__(self):
929
+ raise TypeError("pyopencl arrays are not hashable.")
930
+
931
+ # {{{ kernel invocation wrappers
932
+
933
+ @staticmethod
934
+ @elwise_kernel_runner
935
+ def _axpbyz(out, afac, a, bfac, b, queue: cl.CommandQueue | None = None):
936
+ """Compute ``out = selffac * self + otherfac*other``,
937
+ where *other* is an array."""
938
+ a_shape = a.shape
939
+ b_shape = b.shape
940
+ out_shape = out.shape
941
+ assert (a_shape == b_shape == out_shape
942
+ or (a_shape == () and b_shape == out_shape)
943
+ or (b_shape == () and a_shape == out_shape))
944
+ return elementwise.get_axpbyz_kernel(
945
+ out.context, a.dtype, b.dtype, out.dtype,
946
+ x_is_scalar=(a_shape == ()),
947
+ y_is_scalar=(b_shape == ()))
948
+
949
+ @staticmethod
950
+ @elwise_kernel_runner
951
+ def _axpbz(out, a, x, b, queue: cl.CommandQueue | None = None):
952
+ """Compute ``z = a * x + b``, where *b* is a scalar."""
953
+ a = np.array(a)
954
+ b = np.array(b)
955
+ assert out.shape == x.shape
956
+ return elementwise.get_axpbz_kernel(out.context,
957
+ a.dtype, x.dtype, b.dtype, out.dtype)
958
+
959
+ @staticmethod
960
+ @elwise_kernel_runner
961
+ def _elwise_multiply(out, a, b, queue: cl.CommandQueue | None = None):
962
+ a_shape = a.shape
963
+ b_shape = b.shape
964
+ out_shape = out.shape
965
+ assert (a_shape == b_shape == out_shape
966
+ or (a_shape == () and b_shape == out_shape)
967
+ or (b_shape == () and a_shape == out_shape))
968
+ return elementwise.get_multiply_kernel(
969
+ a.context, a.dtype, b.dtype, out.dtype,
970
+ x_is_scalar=(a_shape == ()),
971
+ y_is_scalar=(b_shape == ())
972
+ )
973
+
974
+ @staticmethod
975
+ @elwise_kernel_runner
976
+ def _rdiv_scalar(out, ary, other, queue: cl.CommandQueue | None = None):
977
+ other = np.array(other)
978
+ assert out.shape == ary.shape
979
+ return elementwise.get_rdivide_elwise_kernel(
980
+ out.context, ary.dtype, other.dtype, out.dtype)
981
+
982
+ @staticmethod
983
+ @elwise_kernel_runner
984
+ def _div(out, self, other, queue: cl.CommandQueue | None = None):
985
+ """Divides an array by another array."""
986
+ assert (self.shape == other.shape == out.shape
987
+ or (self.shape == () and other.shape == out.shape)
988
+ or (other.shape == () and self.shape == out.shape))
989
+
990
+ return elementwise.get_divide_kernel(self.context,
991
+ self.dtype, other.dtype, out.dtype,
992
+ x_is_scalar=(self.shape == ()),
993
+ y_is_scalar=(other.shape == ()))
994
+
995
+ @staticmethod
996
+ @elwise_kernel_runner
997
+ def _fill(result, scalar):
998
+ return elementwise.get_fill_kernel(result.context, result.dtype)
999
+
1000
+ @staticmethod
1001
+ @elwise_kernel_runner
1002
+ def _abs(result, arg):
1003
+ if arg.dtype.kind == "c":
1004
+ from pyopencl.elementwise import complex_dtype_to_name
1005
+ fname = "%s_abs" % complex_dtype_to_name(arg.dtype)
1006
+ elif arg.dtype.kind == "f":
1007
+ fname = "fabs"
1008
+ elif arg.dtype.kind in ["u", "i"]:
1009
+ fname = "abs"
1010
+ else:
1011
+ raise TypeError("unsupported dtype in _abs()")
1012
+
1013
+ return elementwise.get_unary_func_kernel(
1014
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1015
+
1016
+ @staticmethod
1017
+ @elwise_kernel_runner
1018
+ def _real(result, arg):
1019
+ from pyopencl.elementwise import complex_dtype_to_name
1020
+ fname = "%s_real" % complex_dtype_to_name(arg.dtype)
1021
+ return elementwise.get_unary_func_kernel(
1022
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1023
+
1024
+ @staticmethod
1025
+ @elwise_kernel_runner
1026
+ def _imag(result, arg):
1027
+ from pyopencl.elementwise import complex_dtype_to_name
1028
+ fname = "%s_imag" % complex_dtype_to_name(arg.dtype)
1029
+ return elementwise.get_unary_func_kernel(
1030
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1031
+
1032
+ @staticmethod
1033
+ @elwise_kernel_runner
1034
+ def _conj(result, arg):
1035
+ from pyopencl.elementwise import complex_dtype_to_name
1036
+ fname = "%s_conj" % complex_dtype_to_name(arg.dtype)
1037
+ return elementwise.get_unary_func_kernel(
1038
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1039
+
1040
+ @staticmethod
1041
+ @elwise_kernel_runner
1042
+ def _pow_scalar(result, ary, exponent):
1043
+ exponent = np.array(exponent)
1044
+ return elementwise.get_pow_kernel(result.context,
1045
+ ary.dtype, exponent.dtype, result.dtype,
1046
+ is_base_array=True, is_exp_array=False)
1047
+
1048
+ @staticmethod
1049
+ @elwise_kernel_runner
1050
+ def _rpow_scalar(result, base, exponent):
1051
+ base = np.array(base)
1052
+ return elementwise.get_pow_kernel(result.context,
1053
+ base.dtype, exponent.dtype, result.dtype,
1054
+ is_base_array=False, is_exp_array=True)
1055
+
1056
+ @staticmethod
1057
+ @elwise_kernel_runner
1058
+ def _pow_array(result, base, exponent):
1059
+ return elementwise.get_pow_kernel(
1060
+ result.context, base.dtype, exponent.dtype, result.dtype,
1061
+ is_base_array=True, is_exp_array=True)
1062
+
1063
+ @staticmethod
1064
+ @elwise_kernel_runner
1065
+ def _reverse(result, ary):
1066
+ return elementwise.get_reverse_kernel(result.context, ary.dtype)
1067
+
1068
+ @staticmethod
1069
+ @elwise_kernel_runner
1070
+ def _copy(dest, src):
1071
+ return elementwise.get_copy_kernel(
1072
+ dest.context, dest.dtype, src.dtype)
1073
+
1074
+ def _new_like_me(self, dtype=None, queue: cl.CommandQueue | None = None):
1075
+ if dtype is None:
1076
+ dtype = self.dtype
1077
+ strides = self.strides
1078
+ flags = self.flags
1079
+ fast = True
1080
+ else:
1081
+ strides = None
1082
+ flags = None
1083
+ if dtype == self.dtype:
1084
+ strides = self.strides
1085
+ flags = self.flags
1086
+ fast = True
1087
+ else:
1088
+ fast = False
1089
+
1090
+ queue = queue or self.queue
1091
+ return self.__class__(None, self.shape, dtype,
1092
+ allocator=self.allocator, strides=strides, _flags=flags,
1093
+ _fast=fast,
1094
+ _size=self.size, _queue=queue, _context=self.context)
1095
+
1096
+ @staticmethod
1097
+ @elwise_kernel_runner
1098
+ def _scalar_binop(out, a, b, queue: cl.CommandQueue | None = None, op=None):
1099
+ return elementwise.get_array_scalar_binop_kernel(
1100
+ out.context, op, out.dtype, a.dtype,
1101
+ np.array(b).dtype)
1102
+
1103
+ @staticmethod
1104
+ @elwise_kernel_runner
1105
+ def _array_binop(out, a, b, queue: cl.CommandQueue | None = None, op=None):
1106
+ a_shape = a.shape
1107
+ b_shape = b.shape
1108
+ out_shape = out.shape
1109
+ assert (a_shape == b_shape == out_shape
1110
+ or (a_shape == () and b_shape == out_shape)
1111
+ or (b_shape == () and a_shape == out_shape))
1112
+ return elementwise.get_array_binop_kernel(
1113
+ out.context, op, out.dtype, a.dtype, b.dtype,
1114
+ a_is_scalar=(a_shape == ()),
1115
+ b_is_scalar=(b_shape == ()))
1116
+
1117
+ @staticmethod
1118
+ @elwise_kernel_runner
1119
+ def _unop(out, a, queue: cl.CommandQueue | None = None, op=None):
1120
+ if out.shape != a.shape:
1121
+ raise ValueError("shapes of arguments do not match")
1122
+ return elementwise.get_unop_kernel(
1123
+ out.context, op, a.dtype, out.dtype)
1124
+
1125
+ # }}}
1126
+
1127
+ # {{{ operators
1128
+
1129
+ def mul_add(self, selffac, other, otherfac, queue: cl.CommandQueue | None = None):
1130
+ """Return ``selffac * self + otherfac * other``.
1131
+ """
1132
+ queue = queue or self.queue
1133
+
1134
+ if isinstance(other, Array):
1135
+ result = _get_broadcasted_binary_op_result(self, other, queue)
1136
+ result.add_event(
1137
+ self._axpbyz(
1138
+ result, selffac, self, otherfac, other,
1139
+ queue=queue))
1140
+ return result
1141
+ elif np.isscalar(other):
1142
+ common_dtype = _get_common_dtype(self, other, queue)
1143
+ result = self._new_like_me(common_dtype, queue=queue)
1144
+ result.add_event(
1145
+ self._axpbz(result, selffac,
1146
+ self, common_dtype.type(otherfac * other),
1147
+ queue=queue))
1148
+ return result
1149
+ else:
1150
+ raise NotImplementedError
1151
+
1152
+ def __add__(self, other) -> Self:
1153
+ """Add an array with an array or an array with a scalar."""
1154
+
1155
+ if isinstance(other, Array):
1156
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1157
+ result.add_event(
1158
+ self._axpbyz(result,
1159
+ self.dtype.type(1), self,
1160
+ other.dtype.type(1), other))
1161
+
1162
+ return result
1163
+ elif np.isscalar(other):
1164
+ if other == 0:
1165
+ return self.copy()
1166
+ else:
1167
+ common_dtype = _get_common_dtype(self, other, self.queue)
1168
+ result = self._new_like_me(common_dtype)
1169
+ result.add_event(
1170
+ self._axpbz(result, self.dtype.type(1),
1171
+ self, common_dtype.type(other)))
1172
+ return result
1173
+ else:
1174
+ return NotImplemented
1175
+
1176
+ __radd__ = __add__
1177
+
1178
+ def __sub__(self, other) -> Self:
1179
+ """Subtract an array from an array or a scalar from an array."""
1180
+
1181
+ if isinstance(other, Array):
1182
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1183
+ result.add_event(
1184
+ self._axpbyz(result,
1185
+ self.dtype.type(1), self,
1186
+ result.dtype.type(-1), other))
1187
+
1188
+ return result
1189
+ elif np.isscalar(other):
1190
+ if other == 0:
1191
+ return self.copy()
1192
+ else:
1193
+ result = self._new_like_me(
1194
+ _get_common_dtype(self, other, self.queue))
1195
+ result.add_event(
1196
+ self._axpbz(result, self.dtype.type(1), self, -other))
1197
+ return result
1198
+ else:
1199
+ return NotImplemented
1200
+
1201
+ def __rsub__(self, other) -> Self:
1202
+ """Subtracts an array by a scalar or an array::
1203
+
1204
+ x = n - self
1205
+ """
1206
+ if np.isscalar(other):
1207
+ common_dtype = _get_common_dtype(self, other, self.queue)
1208
+ result = self._new_like_me(common_dtype)
1209
+ result.add_event(
1210
+ self._axpbz(result, result.dtype.type(-1), self,
1211
+ common_dtype.type(other)))
1212
+
1213
+ return result
1214
+ else:
1215
+ return NotImplemented
1216
+
1217
+ def __iadd__(self, other) -> Self:
1218
+ if isinstance(other, Array):
1219
+ if other.shape != self.shape and other.shape != ():
1220
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1221
+ f" {self.shape}, {other.shape}.")
1222
+ self.add_event(
1223
+ self._axpbyz(self,
1224
+ self.dtype.type(1), self,
1225
+ other.dtype.type(1), other))
1226
+
1227
+ return self
1228
+ elif np.isscalar(other):
1229
+ self.add_event(
1230
+ self._axpbz(self, self.dtype.type(1), self, other))
1231
+ return self
1232
+ else:
1233
+ return NotImplemented
1234
+
1235
+ def __isub__(self, other) -> Self:
1236
+ if isinstance(other, Array):
1237
+ if other.shape != self.shape and other.shape != ():
1238
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1239
+ f" {self.shape}, {other.shape}.")
1240
+ self.add_event(
1241
+ self._axpbyz(self, self.dtype.type(1), self,
1242
+ other.dtype.type(-1), other))
1243
+ return self
1244
+ elif np.isscalar(other):
1245
+ self._axpbz(self, self.dtype.type(1), self, -other)
1246
+ return self
1247
+ else:
1248
+ return NotImplemented
1249
+
1250
+ def __pos__(self) -> Self:
1251
+ return self
1252
+
1253
+ def __neg__(self) -> Self:
1254
+ result = self._new_like_me()
1255
+ result.add_event(self._axpbz(result, -1, self, 0))
1256
+ return result
1257
+
1258
+ def __mul__(self, other) -> Self:
1259
+ if isinstance(other, Array):
1260
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1261
+ result.add_event(
1262
+ self._elwise_multiply(result, self, other))
1263
+ return result
1264
+ elif np.isscalar(other):
1265
+ common_dtype = _get_common_dtype(self, other, self.queue)
1266
+ result = self._new_like_me(common_dtype)
1267
+ result.add_event(
1268
+ self._axpbz(result,
1269
+ common_dtype.type(other), self, self.dtype.type(0)))
1270
+ return result
1271
+ else:
1272
+ return NotImplemented
1273
+
1274
+ def __rmul__(self, other) -> Self:
1275
+ if np.isscalar(other):
1276
+ common_dtype = _get_common_dtype(self, other, self.queue)
1277
+ result = self._new_like_me(common_dtype)
1278
+ result.add_event(
1279
+ self._axpbz(result,
1280
+ common_dtype.type(other), self, self.dtype.type(0)))
1281
+ return result
1282
+ else:
1283
+ return NotImplemented
1284
+
1285
+ def __imul__(self, other) -> Self:
1286
+ if isinstance(other, Array):
1287
+ if other.shape != self.shape and other.shape != ():
1288
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1289
+ f" {self.shape}, {other.shape}.")
1290
+ self.add_event(
1291
+ self._elwise_multiply(self, self, other))
1292
+ return self
1293
+ elif np.isscalar(other):
1294
+ self.add_event(
1295
+ self._axpbz(self, other, self, self.dtype.type(0)))
1296
+ return self
1297
+ else:
1298
+ return NotImplemented
1299
+
1300
+ def __div__(self, other) -> Self:
1301
+ """Divides an array by an array or a scalar, i.e. ``self / other``.
1302
+ """
1303
+ if isinstance(other, Array):
1304
+ result = _get_broadcasted_binary_op_result(
1305
+ self, other, self.queue,
1306
+ dtype_getter=_get_truedivide_dtype)
1307
+ result.add_event(self._div(result, self, other))
1308
+
1309
+ return result
1310
+ elif np.isscalar(other):
1311
+ if other == 1:
1312
+ return self.copy()
1313
+ else:
1314
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1315
+ result = self._new_like_me(common_dtype)
1316
+ result.add_event(
1317
+ self._axpbz(result,
1318
+ np.true_divide(common_dtype.type(1), other),
1319
+ self, self.dtype.type(0)))
1320
+ return result
1321
+ else:
1322
+ return NotImplemented
1323
+
1324
+ __truediv__ = __div__
1325
+
1326
+ def __rdiv__(self, other) -> Self:
1327
+ """Divides an array by a scalar or an array, i.e. ``other / self``.
1328
+ """
1329
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1330
+
1331
+ if isinstance(other, Array):
1332
+ result = self._new_like_me(common_dtype)
1333
+ result.add_event(other._div(result, self))
1334
+ return result
1335
+ elif np.isscalar(other):
1336
+ result = self._new_like_me(common_dtype)
1337
+ result.add_event(
1338
+ self._rdiv_scalar(result, self, common_dtype.type(other)))
1339
+ return result
1340
+ else:
1341
+ return NotImplemented
1342
+
1343
+ __rtruediv__ = __rdiv__
1344
+
1345
+ def __itruediv__(self, other) -> Self:
1346
+ # raise an error if the result cannot be cast to self
1347
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1348
+ if not np.can_cast(common_dtype, self.dtype.type, "same_kind"):
1349
+ raise TypeError(
1350
+ "Cannot cast {!r} to {!r}".format(self.dtype, common_dtype))
1351
+
1352
+ if isinstance(other, Array):
1353
+ if other.shape != self.shape and other.shape != ():
1354
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1355
+ f" {self.shape}, {other.shape}.")
1356
+ self.add_event(
1357
+ self._div(self, self, other))
1358
+ return self
1359
+ elif np.isscalar(other):
1360
+ if other == 1:
1361
+ return self
1362
+ else:
1363
+ self.add_event(
1364
+ self._axpbz(self, common_dtype.type(np.true_divide(1, other)),
1365
+ self, self.dtype.type(0)))
1366
+ return self
1367
+ else:
1368
+ return NotImplemented
1369
+
1370
+ def __and__(self, other) -> Self:
1371
+ common_dtype = _get_common_dtype(self, other, self.queue)
1372
+
1373
+ if not np.issubdtype(common_dtype, np.integer):
1374
+ raise TypeError(f"Integral types only: {common_dtype}")
1375
+
1376
+ if isinstance(other, Array):
1377
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1378
+ result.add_event(self._array_binop(result, self, other, op="&"))
1379
+ return result
1380
+ elif np.isscalar(other):
1381
+ result = self._new_like_me(common_dtype)
1382
+ result.add_event(
1383
+ self._scalar_binop(result, self, other, op="&"))
1384
+ return result
1385
+ else:
1386
+ return NotImplemented
1387
+
1388
+ __rand__ = __and__ # commutes
1389
+
1390
+ def __or__(self, other) -> Self:
1391
+ common_dtype = _get_common_dtype(self, other, self.queue)
1392
+
1393
+ if not np.issubdtype(common_dtype, np.integer):
1394
+ raise TypeError("Integral types only")
1395
+
1396
+ if isinstance(other, Array):
1397
+ result = _get_broadcasted_binary_op_result(self, other,
1398
+ self.queue)
1399
+ result.add_event(self._array_binop(result, self, other, op="|"))
1400
+ return result
1401
+ elif np.isscalar(other):
1402
+ result = self._new_like_me(common_dtype)
1403
+ result.add_event(
1404
+ self._scalar_binop(result, self, other, op="|"))
1405
+ return result
1406
+ else:
1407
+ return NotImplemented
1408
+
1409
+ __ror__ = __or__ # commutes
1410
+
1411
+ def __xor__(self, other) -> Self:
1412
+ common_dtype = _get_common_dtype(self, other, self.queue)
1413
+
1414
+ if not np.issubdtype(common_dtype, np.integer):
1415
+ raise TypeError(f"Integral types only: {common_dtype}")
1416
+
1417
+ if isinstance(other, Array):
1418
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1419
+ result.add_event(self._array_binop(result, self, other, op="^"))
1420
+ return result
1421
+ elif np.isscalar(other):
1422
+ result = self._new_like_me(common_dtype)
1423
+ result.add_event(
1424
+ self._scalar_binop(result, self, other, op="^"))
1425
+ return result
1426
+ else:
1427
+ return NotImplemented
1428
+
1429
+ __rxor__ = __xor__ # commutes
1430
+
1431
+ def __iand__(self, other) -> Self:
1432
+ common_dtype = _get_common_dtype(self, other, self.queue)
1433
+
1434
+ if not np.issubdtype(common_dtype, np.integer):
1435
+ raise TypeError(f"Integral types only: {common_dtype}")
1436
+
1437
+ if isinstance(other, Array):
1438
+ if other.shape != self.shape and other.shape != ():
1439
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1440
+ f" {self.shape}, {other.shape}.")
1441
+ self.add_event(self._array_binop(self, self, other, op="&"))
1442
+ return self
1443
+ elif np.isscalar(other):
1444
+ self.add_event(
1445
+ self._scalar_binop(self, self, other, op="&"))
1446
+ return self
1447
+ else:
1448
+ return NotImplemented
1449
+
1450
+ def __ior__(self, other) -> Self:
1451
+ common_dtype = _get_common_dtype(self, other, self.queue)
1452
+
1453
+ if not np.issubdtype(common_dtype, np.integer):
1454
+ raise TypeError(f"Integral types only: {common_dtype}")
1455
+
1456
+ if isinstance(other, Array):
1457
+ if other.shape != self.shape and other.shape != ():
1458
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1459
+ f" {self.shape}, {other.shape}.")
1460
+ self.add_event(self._array_binop(self, self, other, op="|"))
1461
+ return self
1462
+ elif np.isscalar(other):
1463
+ self.add_event(
1464
+ self._scalar_binop(self, self, other, op="|"))
1465
+ return self
1466
+ else:
1467
+ return NotImplemented
1468
+
1469
+ def __ixor__(self, other) -> Self:
1470
+ common_dtype = _get_common_dtype(self, other, self.queue)
1471
+
1472
+ if not np.issubdtype(common_dtype, np.integer):
1473
+ raise TypeError(f"Integral types only: {common_dtype}")
1474
+
1475
+ if isinstance(other, Array):
1476
+ if other.shape != self.shape and other.shape != ():
1477
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1478
+ f" {self.shape}, {other.shape}.")
1479
+ self.add_event(self._array_binop(self, self, other, op="^"))
1480
+ return self
1481
+ elif np.isscalar(other):
1482
+ self.add_event(
1483
+ self._scalar_binop(self, self, other, op="^"))
1484
+ return self
1485
+ else:
1486
+ return NotImplemented
1487
+
1488
+ def _zero_fill(self,
1489
+ queue: cl.CommandQueue | None = None,
1490
+ wait_for: cl.WaitList = None) -> None:
1491
+ queue = queue or self.queue
1492
+
1493
+ if not self.size:
1494
+ return
1495
+
1496
+ cl_version_gtr_1_2 = (
1497
+ queue._get_cl_version() >= (1, 2)
1498
+ and cl.get_cl_header_version() >= (1, 2)
1499
+ )
1500
+ on_nvidia = queue.device.vendor.startswith("NVIDIA")
1501
+
1502
+ # circumvent bug with large buffers on NVIDIA
1503
+ # https://github.com/inducer/pyopencl/issues/395
1504
+ if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31):
1505
+ self.add_event(
1506
+ cl.enqueue_fill(queue, self.base_data, np.int8(0),
1507
+ self.nbytes, offset=self.offset, wait_for=wait_for))
1508
+ else:
1509
+ zero = np.zeros((), self.dtype)
1510
+ self.fill(zero, queue=queue)
1511
+
1512
+ def fill(self,
1513
+ value: object,
1514
+ queue: cl.CommandQueue | None = None,
1515
+ wait_for: cl.WaitList = None) -> Self:
1516
+ """Fill the array with *scalar*.
1517
+
1518
+ :returns: *self*.
1519
+ """
1520
+
1521
+ self.add_event(
1522
+ self._fill(self, value, queue=queue, wait_for=wait_for))
1523
+
1524
+ return self
1525
+
1526
+ def __len__(self) -> int:
1527
+ """Returns the size of the leading dimension of *self*."""
1528
+ if len(self.shape):
1529
+ return self.shape[0]
1530
+ else:
1531
+ return TypeError("len() of unsized object")
1532
+
1533
+ def __abs__(self) -> Self:
1534
+ """Return an ``Array`` of the absolute values of the elements
1535
+ of *self*.
1536
+ """
1537
+
1538
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1539
+ result.add_event(self._abs(result, self))
1540
+ return result
1541
+
1542
+ def __pow__(self, other) -> Self:
1543
+ """Exponentiation by a scalar or elementwise by another
1544
+ :class:`Array`.
1545
+ """
1546
+
1547
+ if isinstance(other, Array):
1548
+ assert self.shape == other.shape
1549
+
1550
+ result = self._new_like_me(
1551
+ _get_common_dtype(self, other, self.queue))
1552
+ result.add_event(
1553
+ self._pow_array(result, self, other))
1554
+ return result
1555
+ elif np.isscalar(other):
1556
+ result = self._new_like_me(
1557
+ _get_common_dtype(self, other, self.queue))
1558
+ result.add_event(self._pow_scalar(result, self, other))
1559
+ return result
1560
+ else:
1561
+ return NotImplemented
1562
+
1563
+ def __rpow__(self, other) -> Self:
1564
+ if np.isscalar(other):
1565
+ common_dtype = _get_common_dtype(self, other, self.queue)
1566
+ result = self._new_like_me(common_dtype)
1567
+ result.add_event(
1568
+ self._rpow_scalar(result, common_dtype.type(other), self))
1569
+ return result
1570
+ else:
1571
+ return NotImplemented
1572
+
1573
+ def __invert__(self):
1574
+ if not np.issubdtype(self.dtype, np.integer):
1575
+ raise TypeError(f"Integral types only: {self.dtype}")
1576
+
1577
+ result = self._new_like_me()
1578
+ result.add_event(self._unop(result, self, op="~"))
1579
+
1580
+ return result
1581
+
1582
+ # }}}
1583
+
1584
+ def reverse(self, queue: cl.CommandQueue | None = None) -> Self:
1585
+ """Return this array in reversed order. The array is treated
1586
+ as one-dimensional.
1587
+ """
1588
+
1589
+ result = self._new_like_me()
1590
+ result.add_event(self._reverse(result, self))
1591
+ return result
1592
+
1593
+ def astype(self, dtype, queue: cl.CommandQueue | None = None):
1594
+ """Return a copy of *self*, cast to *dtype*."""
1595
+ if dtype == self.dtype:
1596
+ return self.copy()
1597
+
1598
+ result = self._new_like_me(dtype=dtype)
1599
+ result.add_event(self._copy(result, self, queue=queue))
1600
+ return result
1601
+
1602
+ # {{{ rich comparisons, any, all
1603
+
1604
+ def __bool__(self) -> bool:
1605
+ if self.shape == ():
1606
+ return bool(self.get())
1607
+ else:
1608
+ raise ValueError("The truth value of an array with "
1609
+ "more than one element is ambiguous. Use a.any() or a.all()")
1610
+
1611
+ def any(self,
1612
+ queue: cl.CommandQueue | None = None,
1613
+ wait_for: cl.WaitList = None
1614
+ ) -> Self:
1615
+ from pyopencl.reduction import get_any_kernel
1616
+ krnl = get_any_kernel(self.context, self.dtype)
1617
+ if wait_for is None:
1618
+ wait_for = []
1619
+ result, event1 = krnl(self, queue=queue,
1620
+ wait_for=[*wait_for, *self.events], return_event=True)
1621
+ result.add_event(event1)
1622
+ return result
1623
+
1624
+ def all(self,
1625
+ queue: cl.CommandQueue | None = None,
1626
+ wait_for: cl.WaitList = None
1627
+ ) -> Self:
1628
+ from pyopencl.reduction import get_all_kernel
1629
+ krnl = get_all_kernel(self.context, self.dtype)
1630
+ if wait_for is None:
1631
+ wait_for = []
1632
+ result, event1 = krnl(self, queue=queue,
1633
+ wait_for=[*wait_for, *self.events], return_event=True)
1634
+ result.add_event(event1)
1635
+ return result
1636
+
1637
+ @staticmethod
1638
+ @elwise_kernel_runner
1639
+ def _scalar_comparison(out, a, b, queue: cl.CommandQueue | None = None, op=None):
1640
+ return elementwise.get_array_scalar_comparison_kernel(
1641
+ out.context, op, a.dtype)
1642
+
1643
+ @staticmethod
1644
+ @elwise_kernel_runner
1645
+ def _array_comparison(out, a, b, queue: cl.CommandQueue | None = None, op=None):
1646
+ if a.shape != b.shape:
1647
+ raise ValueError("shapes of comparison arguments do not match")
1648
+ return elementwise.get_array_comparison_kernel(
1649
+ out.context, op, a.dtype, b.dtype)
1650
+
1651
+ @override
1652
+ def __eq__(self, other: object) -> Self: # pyright: ignore[reportIncompatibleMethodOverride]
1653
+ if isinstance(other, Array):
1654
+ result = self._new_like_me(_BOOL_DTYPE)
1655
+ result.add_event(
1656
+ self._array_comparison(result, self, other, op="=="))
1657
+ return result
1658
+ elif np.isscalar(other):
1659
+ result = self._new_like_me(_BOOL_DTYPE)
1660
+ result.add_event(
1661
+ self._scalar_comparison(result, self, other, op="=="))
1662
+ return result
1663
+ else:
1664
+ return NotImplemented
1665
+
1666
+ @override
1667
+ def __ne__(self, other: object) -> Self: # pyright: ignore[reportIncompatibleMethodOverride]
1668
+ if isinstance(other, Array):
1669
+ result = self._new_like_me(_BOOL_DTYPE)
1670
+ result.add_event(
1671
+ self._array_comparison(result, self, other, op="!="))
1672
+ return result
1673
+ elif np.isscalar(other):
1674
+ result = self._new_like_me(_BOOL_DTYPE)
1675
+ result.add_event(
1676
+ self._scalar_comparison(result, self, other, op="!="))
1677
+ return result
1678
+ else:
1679
+ return NotImplemented
1680
+
1681
+ def __le__(self, other) -> Self:
1682
+ if isinstance(other, Array):
1683
+ result = self._new_like_me(_BOOL_DTYPE)
1684
+ result.add_event(
1685
+ self._array_comparison(result, self, other, op="<="))
1686
+ return result
1687
+ elif np.isscalar(other):
1688
+ result = self._new_like_me(_BOOL_DTYPE)
1689
+ self._scalar_comparison(result, self, other, op="<=")
1690
+ return result
1691
+ else:
1692
+ return NotImplemented
1693
+
1694
+ def __ge__(self, other) -> Self:
1695
+ if isinstance(other, Array):
1696
+ result = self._new_like_me(_BOOL_DTYPE)
1697
+ result.add_event(
1698
+ self._array_comparison(result, self, other, op=">="))
1699
+ return result
1700
+ elif np.isscalar(other):
1701
+ result = self._new_like_me(_BOOL_DTYPE)
1702
+ result.add_event(
1703
+ self._scalar_comparison(result, self, other, op=">="))
1704
+ return result
1705
+ else:
1706
+ return NotImplemented
1707
+
1708
+ def __lt__(self, other) -> Self:
1709
+ if isinstance(other, Array):
1710
+ result = self._new_like_me(_BOOL_DTYPE)
1711
+ result.add_event(
1712
+ self._array_comparison(result, self, other, op="<"))
1713
+ return result
1714
+ elif np.isscalar(other):
1715
+ result = self._new_like_me(_BOOL_DTYPE)
1716
+ result.add_event(
1717
+ self._scalar_comparison(result, self, other, op="<"))
1718
+ return result
1719
+ else:
1720
+ return NotImplemented
1721
+
1722
+ def __gt__(self, other) -> Self:
1723
+ if isinstance(other, Array):
1724
+ result = self._new_like_me(_BOOL_DTYPE)
1725
+ result.add_event(
1726
+ self._array_comparison(result, self, other, op=">"))
1727
+ return result
1728
+ elif np.isscalar(other):
1729
+ result = self._new_like_me(_BOOL_DTYPE)
1730
+ result.add_event(
1731
+ self._scalar_comparison(result, self, other, op=">"))
1732
+ return result
1733
+ else:
1734
+ return NotImplemented
1735
+
1736
+ # }}}
1737
+
1738
+ # {{{ complex-valued business
1739
+
1740
+ @property
1741
+ def real(self) -> Self:
1742
+ """
1743
+ .. versionadded:: 2012.1
1744
+ """
1745
+ if self.dtype.kind == "c":
1746
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1747
+ result.add_event(
1748
+ self._real(result, self))
1749
+ return result
1750
+ else:
1751
+ return self
1752
+
1753
+ @property
1754
+ def imag(self) -> Self:
1755
+ """
1756
+ .. versionadded:: 2012.1
1757
+ """
1758
+ if self.dtype.kind == "c":
1759
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1760
+ result.add_event(
1761
+ self._imag(result, self))
1762
+ return result
1763
+ else:
1764
+ return zeros_like(self)
1765
+
1766
+ def conj(self) -> Self:
1767
+ """
1768
+ .. versionadded:: 2012.1
1769
+ """
1770
+ if self.dtype.kind == "c":
1771
+ result = self._new_like_me()
1772
+ result.add_event(self._conj(result, self))
1773
+ return result
1774
+ else:
1775
+ return self
1776
+
1777
+ conjugate = conj
1778
+
1779
+ # }}}
1780
+
1781
+ # {{{ event management
1782
+
1783
+ def add_event(self, evt: cl.Event) -> None:
1784
+ """Add *evt* to :attr:`events`. If :attr:`events` is too long, this method
1785
+ may implicitly wait for a subset of :attr:`events` and clear them from the
1786
+ list.
1787
+ """
1788
+ n_wait = 4
1789
+
1790
+ self.events.append(evt)
1791
+
1792
+ if len(self.events) > 3*n_wait:
1793
+ wait_events = self.events[:n_wait]
1794
+ cl.wait_for_events(wait_events)
1795
+ del self.events[:n_wait]
1796
+
1797
+ def finish(self) -> None:
1798
+ """Wait for the entire contents of :attr:`events`, clear it."""
1799
+
1800
+ if self.events:
1801
+ cl.wait_for_events(self.events)
1802
+ del self.events[:]
1803
+
1804
+ # }}}
1805
+
1806
+ # {{{ views
1807
+
1808
+ def reshape(self, *shape, **kwargs):
1809
+ """Returns an array containing the same data with a new shape."""
1810
+
1811
+ order = kwargs.pop("order", "C")
1812
+ if kwargs:
1813
+ raise TypeError("unexpected keyword arguments: %s"
1814
+ % list(kwargs.keys()))
1815
+
1816
+ if order not in "CF":
1817
+ raise ValueError("order must be either 'C' or 'F'")
1818
+
1819
+ # TODO: add more error-checking, perhaps
1820
+
1821
+ # FIXME: The following is overly conservative. As long as we don't change
1822
+ # our memory footprint, we're good.
1823
+
1824
+ # if not self.flags.forc:
1825
+ # raise RuntimeError("only contiguous arrays may "
1826
+ # "be used as arguments to this operation")
1827
+
1828
+ if isinstance(shape[0], tuple) or isinstance(shape[0], list):
1829
+ shape = tuple(shape[0])
1830
+
1831
+ if -1 in shape:
1832
+ shape = list(shape)
1833
+ idx = shape.index(-1)
1834
+ size = -reduce(lambda x, y: x * y, shape, 1)
1835
+ if size == 0:
1836
+ shape[idx] = 0
1837
+ else:
1838
+ shape[idx] = self.size // size
1839
+ if builtins.any(s < 0 for s in shape):
1840
+ raise ValueError("can only specify one unknown dimension")
1841
+ shape = tuple(shape)
1842
+
1843
+ if shape == self.shape:
1844
+ return self._new_with_changes(
1845
+ data=self.base_data, offset=self.offset, shape=shape,
1846
+ strides=self.strides)
1847
+
1848
+ import operator
1849
+ size = reduce(operator.mul, shape, 1)
1850
+ if size != self.size:
1851
+ raise ValueError("total size of new array must be unchanged")
1852
+
1853
+ if self.size == 0:
1854
+ return self._new_with_changes(
1855
+ data=None, offset=0, shape=shape,
1856
+ strides=(
1857
+ _f_contiguous_strides(self.dtype.itemsize, shape)
1858
+ if order == "F" else
1859
+ _c_contiguous_strides(self.dtype.itemsize, shape)
1860
+ ))
1861
+
1862
+ # {{{ determine reshaped strides
1863
+
1864
+ # copied and translated from
1865
+ # https://github.com/numpy/numpy/blob/4083883228d61a3b571dec640185b5a5d983bf59/numpy/core/src/multiarray/shape.c # noqa: E501
1866
+
1867
+ newdims = shape
1868
+ newnd = len(newdims)
1869
+
1870
+ # Remove axes with dimension 1 from the old array. They have no effect
1871
+ # but would need special cases since their strides do not matter.
1872
+
1873
+ olddims = []
1874
+ oldstrides = []
1875
+ for oi in range(len(self.shape)):
1876
+ s = self.shape[oi]
1877
+ if s != 1:
1878
+ olddims.append(s)
1879
+ oldstrides.append(self.strides[oi])
1880
+
1881
+ oldnd = len(olddims)
1882
+
1883
+ newstrides = [-1]*len(newdims)
1884
+
1885
+ # oi to oj and ni to nj give the axis ranges currently worked with
1886
+ oi = 0
1887
+ oj = 1
1888
+ ni = 0
1889
+ nj = 1
1890
+ while ni < newnd and oi < oldnd:
1891
+ np = newdims[ni]
1892
+ op = olddims[oi]
1893
+
1894
+ while np != op:
1895
+ if np < op:
1896
+ # Misses trailing 1s, these are handled later
1897
+ np *= newdims[nj]
1898
+ nj += 1
1899
+ else:
1900
+ op *= olddims[oj]
1901
+ oj += 1
1902
+
1903
+ # Check whether the original axes can be combined
1904
+ for ok in range(oi, oj-1):
1905
+ if order == "F":
1906
+ if oldstrides[ok+1] != olddims[ok]*oldstrides[ok]:
1907
+ raise ValueError("cannot reshape without copy")
1908
+ else:
1909
+ # C order
1910
+ if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]):
1911
+ raise ValueError("cannot reshape without copy")
1912
+
1913
+ # Calculate new strides for all axes currently worked with
1914
+ if order == "F":
1915
+ newstrides[ni] = oldstrides[oi]
1916
+ for nk in range(ni+1, nj):
1917
+ newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1]
1918
+ else:
1919
+ # C order
1920
+ newstrides[nj - 1] = oldstrides[oj - 1]
1921
+ for nk in range(nj-1, ni, -1):
1922
+ newstrides[nk - 1] = newstrides[nk]*newdims[nk]
1923
+
1924
+ ni = nj
1925
+ nj += 1
1926
+
1927
+ oi = oj
1928
+ oj += 1
1929
+
1930
+ # Set strides corresponding to trailing 1s of the new shape.
1931
+ if ni >= 1:
1932
+ last_stride = newstrides[ni - 1]
1933
+ else:
1934
+ last_stride = self.dtype.itemsize
1935
+
1936
+ if order == "F":
1937
+ last_stride *= newdims[ni - 1]
1938
+
1939
+ for nk in range(ni, len(shape)):
1940
+ newstrides[nk] = last_stride
1941
+
1942
+ # }}}
1943
+
1944
+ return self._new_with_changes(
1945
+ data=self.base_data, offset=self.offset, shape=shape,
1946
+ strides=tuple(newstrides))
1947
+
1948
+ def ravel(self, order="C"):
1949
+ """Returns flattened array containing the same data."""
1950
+ return self.reshape(self.size, order=order)
1951
+
1952
+ def view(self, dtype=None):
1953
+ """Returns view of array with the same data. If *dtype* is different
1954
+ from current dtype, the actual bytes of memory will be reinterpreted.
1955
+ """
1956
+
1957
+ if dtype is None:
1958
+ dtype = self.dtype
1959
+
1960
+ old_itemsize = self.dtype.itemsize
1961
+ itemsize = np.dtype(dtype).itemsize
1962
+
1963
+ from pytools import argmin2
1964
+ min_stride_axis = argmin2(
1965
+ (axis, abs(stride))
1966
+ for axis, stride in enumerate(self.strides))
1967
+
1968
+ if self.shape[min_stride_axis] * old_itemsize % itemsize != 0:
1969
+ raise ValueError("new type not compatible with array")
1970
+
1971
+ new_shape = (
1972
+ *self.shape[:min_stride_axis],
1973
+ self.shape[min_stride_axis] * old_itemsize // itemsize,
1974
+ *self.shape[min_stride_axis+1:])
1975
+ new_strides = (
1976
+ *self.strides[:min_stride_axis],
1977
+ self.strides[min_stride_axis] * itemsize // old_itemsize,
1978
+ *self.strides[min_stride_axis+1:])
1979
+
1980
+ return self._new_with_changes(
1981
+ self.base_data, self.offset,
1982
+ shape=new_shape, dtype=dtype,
1983
+ strides=new_strides)
1984
+
1985
+ def squeeze(self):
1986
+ """Returns a view of the array with dimensions of
1987
+ length 1 removed.
1988
+
1989
+ .. versionadded:: 2015.2
1990
+ """
1991
+ new_shape = tuple(dim for dim in self.shape if dim > 1)
1992
+ new_strides = tuple(
1993
+ self.strides[i] for i, dim in enumerate(self.shape)
1994
+ if dim > 1)
1995
+
1996
+ return self._new_with_changes(
1997
+ self.base_data, self.offset,
1998
+ shape=new_shape, strides=new_strides)
1999
+
2000
+ def transpose(self, axes=None):
2001
+ """Permute the dimensions of an array.
2002
+
2003
+ :arg axes: list of ints, optional.
2004
+ By default, reverse the dimensions, otherwise permute the axes
2005
+ according to the values given.
2006
+
2007
+ :returns: :class:`Array` A view of the array with its axes permuted.
2008
+
2009
+ .. versionadded:: 2015.2
2010
+ """
2011
+
2012
+ if axes is None:
2013
+ axes = range(self.ndim-1, -1, -1)
2014
+
2015
+ if len(axes) != len(self.shape):
2016
+ raise ValueError("axes don't match array")
2017
+
2018
+ new_shape = [self.shape[axes[i]] for i in range(len(axes))]
2019
+ new_strides = [self.strides[axes[i]] for i in range(len(axes))]
2020
+
2021
+ return self._new_with_changes(
2022
+ self.base_data, self.offset,
2023
+ shape=tuple(new_shape),
2024
+ strides=tuple(new_strides))
2025
+
2026
+ @property
2027
+ def T(self): # noqa: N802
2028
+ """
2029
+ .. versionadded:: 2015.2
2030
+ """
2031
+ return self.transpose()
2032
+
2033
+ # }}}
2034
+
2035
+ def map_to_host(self,
2036
+ queue: cl.CommandQueue | None = None,
2037
+ flags=None,
2038
+ is_blocking: bool = True,
2039
+ wait_for: cl.WaitList = None):
2040
+ """If *is_blocking*, return a :class:`numpy.ndarray` corresponding to the
2041
+ same memory as *self*.
2042
+
2043
+ If *is_blocking* is not true, return a tuple ``(ary, evt)``, where
2044
+ *ary* is the above-mentioned array.
2045
+
2046
+ The host array is obtained using :func:`pyopencl.enqueue_map_buffer`.
2047
+ See there for further details.
2048
+
2049
+ :arg flags: A combination of :class:`pyopencl.map_flags`.
2050
+ Defaults to read-write.
2051
+
2052
+ .. versionadded :: 2013.2
2053
+ """
2054
+
2055
+ if flags is None:
2056
+ flags = cl.map_flags.READ | cl.map_flags.WRITE
2057
+ if wait_for is None:
2058
+ wait_for = []
2059
+
2060
+ ary, evt = cl.enqueue_map_buffer(
2061
+ queue or self.queue, self.base_data, flags, self.offset,
2062
+ self.shape, self.dtype, strides=self.strides,
2063
+ wait_for=[*wait_for, *self.events], is_blocking=is_blocking)
2064
+
2065
+ if is_blocking:
2066
+ return ary
2067
+ else:
2068
+ return ary, evt
2069
+
2070
+ # {{{ getitem/setitem
2071
+
2072
+ def __getitem__(self, index):
2073
+ """
2074
+ .. versionadded:: 2013.1
2075
+ """
2076
+
2077
+ if isinstance(index, Array):
2078
+ if index.dtype.kind not in ("i", "u"):
2079
+ raise TypeError(
2080
+ "fancy indexing is only allowed with integers")
2081
+ if len(index.shape) != 1:
2082
+ raise NotImplementedError(
2083
+ "multidimensional fancy indexing is not supported")
2084
+ if len(self.shape) != 1:
2085
+ raise NotImplementedError(
2086
+ "fancy indexing into a multi-d array is not supported")
2087
+
2088
+ return take(self, index)
2089
+
2090
+ if not isinstance(index, tuple):
2091
+ index = (index,)
2092
+
2093
+ new_shape = []
2094
+ new_offset = self.offset
2095
+ new_strides = []
2096
+
2097
+ seen_ellipsis = False
2098
+
2099
+ index_axis = 0
2100
+ array_axis = 0
2101
+ while index_axis < len(index):
2102
+ index_entry = index[index_axis]
2103
+
2104
+ if array_axis > len(self.shape):
2105
+ raise IndexError("too many axes in index")
2106
+
2107
+ if isinstance(index_entry, slice):
2108
+ start, stop, idx_stride = index_entry.indices(
2109
+ self.shape[array_axis])
2110
+
2111
+ array_stride = self.strides[array_axis]
2112
+
2113
+ new_shape.append((abs(stop-start)-1)//abs(idx_stride)+1)
2114
+ new_strides.append(idx_stride*array_stride)
2115
+ new_offset += array_stride*start
2116
+
2117
+ index_axis += 1
2118
+ array_axis += 1
2119
+
2120
+ elif isinstance(index_entry, (int, np.integer)):
2121
+ array_shape = self.shape[array_axis]
2122
+ if index_entry < 0:
2123
+ index_entry += array_shape
2124
+
2125
+ if not (0 <= index_entry < array_shape):
2126
+ raise IndexError(
2127
+ "subindex in axis %d out of range" % index_axis)
2128
+
2129
+ new_offset += self.strides[array_axis]*index_entry
2130
+
2131
+ index_axis += 1
2132
+ array_axis += 1
2133
+
2134
+ elif index_entry is Ellipsis:
2135
+ index_axis += 1
2136
+
2137
+ remaining_index_count = len(index) - index_axis
2138
+ new_array_axis = len(self.shape) - remaining_index_count
2139
+ if new_array_axis < array_axis:
2140
+ raise IndexError("invalid use of ellipsis in index")
2141
+ while array_axis < new_array_axis:
2142
+ new_shape.append(self.shape[array_axis])
2143
+ new_strides.append(self.strides[array_axis])
2144
+ array_axis += 1
2145
+
2146
+ if seen_ellipsis:
2147
+ raise IndexError(
2148
+ "more than one ellipsis not allowed in index")
2149
+ seen_ellipsis = True
2150
+
2151
+ elif index_entry is np.newaxis:
2152
+ new_shape.append(1)
2153
+ new_strides.append(0)
2154
+ index_axis += 1
2155
+
2156
+ else:
2157
+ raise IndexError("invalid subindex in axis %d" % index_axis)
2158
+
2159
+ while array_axis < len(self.shape):
2160
+ new_shape.append(self.shape[array_axis])
2161
+ new_strides.append(self.strides[array_axis])
2162
+
2163
+ array_axis += 1
2164
+
2165
+ return self._new_with_changes(
2166
+ self.base_data, offset=new_offset,
2167
+ shape=tuple(new_shape),
2168
+ strides=tuple(new_strides))
2169
+
2170
+ def setitem(self,
2171
+ subscript: Array | slice | int,
2172
+ value: object,
2173
+ queue: cl.CommandQueue | None = None,
2174
+ wait_for: cl.WaitList = None
2175
+ ):
2176
+ """Like :meth:`__setitem__`, but with the ability to specify
2177
+ a *queue* and *wait_for*.
2178
+
2179
+ .. versionadded:: 2013.1
2180
+
2181
+ .. versionchanged:: 2013.2
2182
+
2183
+ Added *wait_for*.
2184
+ """
2185
+
2186
+ queue = queue or self.queue
2187
+ assert queue is not None
2188
+ if wait_for is None:
2189
+ wait_for = []
2190
+ wait_for = [*wait_for, *self.events]
2191
+
2192
+ if isinstance(subscript, Array):
2193
+ if subscript.dtype.kind not in ("i", "u"):
2194
+ raise TypeError(
2195
+ "fancy indexing is only allowed with integers")
2196
+ if len(subscript.shape) != 1:
2197
+ raise NotImplementedError(
2198
+ "multidimensional fancy indexing is not supported")
2199
+ if len(self.shape) != 1:
2200
+ raise NotImplementedError(
2201
+ "fancy indexing into a multi-d array is not supported")
2202
+
2203
+ multi_put([value], subscript, out=[self], queue=queue,
2204
+ wait_for=wait_for)
2205
+ return
2206
+
2207
+ subarray = self[subscript]
2208
+
2209
+ if not subarray.size:
2210
+ # This prevents errors about mismatched strides that neither we
2211
+ # nor numpy worry about in the empty case.
2212
+ return
2213
+
2214
+ if isinstance(value, np.ndarray):
2215
+ if subarray.shape == value.shape and subarray.strides == value.strides:
2216
+ assert subarray.base_data is not None
2217
+ self.add_event(
2218
+ cl.enqueue_copy(queue, subarray.base_data,
2219
+ value, dst_offset=subarray.offset, wait_for=wait_for))
2220
+ return
2221
+ else:
2222
+ value = to_device(queue, value, self.allocator)
2223
+
2224
+ if isinstance(value, Array):
2225
+ if len(subarray.shape) != len(value.shape):
2226
+ raise NotImplementedError("broadcasting is not "
2227
+ "supported in __setitem__")
2228
+ if subarray.shape != value.shape:
2229
+ raise ValueError("cannot assign between arrays of "
2230
+ "differing shapes")
2231
+ if subarray.strides != value.strides:
2232
+ raise NotImplementedError("cannot assign between arrays of "
2233
+ "differing strides")
2234
+
2235
+ self.add_event(
2236
+ self._copy(subarray, value, queue=queue, wait_for=wait_for))
2237
+
2238
+ else:
2239
+ # Let's assume it's a scalar
2240
+ subarray.fill(value, queue=queue, wait_for=wait_for)
2241
+
2242
+ def __setitem__(self, subscript, value):
2243
+ """Set the slice of *self* identified *subscript* to *value*.
2244
+
2245
+ *value* is allowed to be:
2246
+
2247
+ * A :class:`Array` of the same :attr:`shape` and (for now) :attr:`strides`,
2248
+ but with potentially different :attr:`dtype`.
2249
+ * A :class:`numpy.ndarray` of the same :attr:`shape` and (for now)
2250
+ :attr:`strides`, but with potentially different :attr:`dtype`.
2251
+ * A scalar.
2252
+
2253
+ Non-scalar broadcasting is not currently supported.
2254
+
2255
+ .. versionadded:: 2013.1
2256
+ """
2257
+ self.setitem(subscript, value)
2258
+
2259
+ # }}}
2260
+
2261
+ # }}}
2262
+
2263
+
2264
+ # {{{ creation helpers
2265
+
2266
+ def as_strided(ary, shape=None, strides=None):
2267
+ """Make an :class:`Array` from the given array with the given
2268
+ shape and strides.
2269
+ """
2270
+
2271
+ # undocumented for the moment
2272
+
2273
+ if shape is None:
2274
+ shape = ary.shape
2275
+ if strides is None:
2276
+ strides = ary.strides
2277
+
2278
+ return Array(ary.queue, shape, ary.dtype, allocator=ary.allocator,
2279
+ data=ary.data, strides=strides)
2280
+
2281
+
2282
+ class _same_as_transfer: # noqa: N801
2283
+ pass
2284
+
2285
+
2286
+ def to_device(
2287
+ queue: cl.CommandQueue,
2288
+ ary: NDArray[Any],
2289
+ allocator: Allocator | None = None,
2290
+ async_: bool = False,
2291
+ array_queue=_same_as_transfer,
2292
+ ) -> Array:
2293
+ """Return a :class:`Array` that is an exact copy of the
2294
+ :class:`numpy.ndarray` instance *ary*.
2295
+
2296
+ :arg array_queue: The :class:`~pyopencl.CommandQueue` which will
2297
+ be stored in the resulting array. Useful
2298
+ to make sure there is no implicit queue associated
2299
+ with the array by passing *None*.
2300
+
2301
+ See :class:`Array` for the meaning of *allocator*.
2302
+
2303
+ .. versionchanged:: 2015.2
2304
+ *array_queue* argument was added.
2305
+ """
2306
+
2307
+ if ary.dtype == object:
2308
+ raise RuntimeError("to_device does not work on object arrays.")
2309
+
2310
+ if array_queue is _same_as_transfer:
2311
+ first_arg = queue
2312
+ else:
2313
+ first_arg = queue.context
2314
+
2315
+ result = Array(first_arg, ary.shape, ary.dtype,
2316
+ allocator=allocator, strides=ary.strides)
2317
+ result.set(ary, async_=async_, queue=queue)
2318
+ return result
2319
+
2320
+
2321
+ empty = Array
2322
+
2323
+
2324
+ def zeros(
2325
+ queue: cl.CommandQueue,
2326
+ shape: int | tuple[int, ...],
2327
+ dtype: DTypeLike,
2328
+ order: Literal["C"] | Literal["F"] = "C",
2329
+ allocator: Allocator | None = None,
2330
+ ) -> Array:
2331
+ """Same as :func:`empty`, but the :class:`Array` is zero-initialized before
2332
+ being returned.
2333
+
2334
+ .. versionchanged:: 2011.1
2335
+ *context* argument was deprecated.
2336
+ """
2337
+
2338
+ result = Array(None, shape, dtype,
2339
+ order=order, allocator=allocator,
2340
+ _context=queue.context, _queue=queue)
2341
+ result._zero_fill()
2342
+ return result
2343
+
2344
+
2345
+ def empty_like(
2346
+ ary: Array,
2347
+ queue: cl.CommandQueue | type[_copy_queue] | None = _copy_queue,
2348
+ allocator: Allocator | None = None,
2349
+ ):
2350
+ """Make a new, uninitialized :class:`Array` having the same properties
2351
+ as *other_ary*.
2352
+ """
2353
+
2354
+ return ary._new_with_changes(data=None, offset=0, queue=queue,
2355
+ allocator=allocator)
2356
+
2357
+
2358
+ def zeros_like(ary):
2359
+ """Make a new, zero-initialized :class:`Array` having the same properties
2360
+ as *other_ary*.
2361
+ """
2362
+
2363
+ result = ary._new_like_me()
2364
+ result._zero_fill()
2365
+ return result
2366
+
2367
+
2368
+ @dataclass
2369
+ class _ArangeInfo:
2370
+ start: int | None = None
2371
+ stop: int | None = None
2372
+ step: int | None = None
2373
+ dtype: np.dtype | None = None
2374
+ allocator: Any | None = None
2375
+
2376
+
2377
+ @elwise_kernel_runner
2378
+ def _arange_knl(result, start, step):
2379
+ return elementwise.get_arange_kernel(
2380
+ result.context, result.dtype)
2381
+
2382
+
2383
+ def arange(queue: cl.CommandQueue, *args: Any, **kwargs: Any) -> Array:
2384
+ """arange(queue, [start, ] stop [, step], **kwargs)
2385
+ Create a :class:`Array` filled with numbers spaced *step* apart,
2386
+ starting from *start* and ending at *stop*. If not given, *start*
2387
+ defaults to 0, *step* defaults to 1.
2388
+
2389
+ For floating point arguments, the length of the result is
2390
+ ``ceil((stop - start)/step)``. This rule may result in the last
2391
+ element of the result being greater than *stop*.
2392
+
2393
+ *dtype* is a required keyword argument.
2394
+
2395
+ .. versionchanged:: 2011.1
2396
+ *context* argument was deprecated.
2397
+
2398
+ .. versionchanged:: 2011.2
2399
+ *allocator* keyword argument was added.
2400
+ """
2401
+
2402
+ # {{{ argument processing
2403
+
2404
+ # Yuck. Thanks, numpy developers. ;)
2405
+
2406
+ explicit_dtype = False
2407
+ inf = _ArangeInfo()
2408
+
2409
+ if isinstance(args[-1], np.dtype):
2410
+ inf.dtype = args[-1]
2411
+ args = args[:-1]
2412
+ explicit_dtype = True
2413
+
2414
+ argc = len(args)
2415
+ if argc == 0:
2416
+ raise ValueError("stop argument required")
2417
+ elif argc == 1:
2418
+ inf.stop = args[0]
2419
+ elif argc == 2:
2420
+ inf.start = args[0]
2421
+ inf.stop = args[1]
2422
+ elif argc == 3:
2423
+ inf.start = args[0]
2424
+ inf.stop = args[1]
2425
+ inf.step = args[2]
2426
+ else:
2427
+ raise ValueError("too many arguments")
2428
+
2429
+ admissible_names = ["start", "stop", "step", "dtype", "allocator"]
2430
+ for k, v in kwargs.items():
2431
+ if k in admissible_names:
2432
+ if getattr(inf, k) is None:
2433
+ setattr(inf, k, v)
2434
+ if k == "dtype":
2435
+ explicit_dtype = True
2436
+ else:
2437
+ raise ValueError(f"may not specify '{k}' by position and keyword")
2438
+ else:
2439
+ raise ValueError(f"unexpected keyword argument '{k}'")
2440
+
2441
+ if inf.start is None:
2442
+ inf.start = 0
2443
+ if inf.step is None:
2444
+ inf.step = 1
2445
+ if inf.dtype is None:
2446
+ inf.dtype = np.array([inf.start, inf.stop, inf.step]).dtype
2447
+
2448
+ # }}}
2449
+
2450
+ # {{{ actual functionality
2451
+
2452
+ dtype = np.dtype(inf.dtype)
2453
+ start = dtype.type(inf.start)
2454
+ step = dtype.type(inf.step)
2455
+ stop = dtype.type(inf.stop)
2456
+
2457
+ if not explicit_dtype:
2458
+ raise TypeError("arange requires a dtype argument")
2459
+
2460
+ from math import ceil
2461
+ size = ceil((stop-start)/step)
2462
+
2463
+ result = Array(queue, (size,), dtype, allocator=inf.allocator)
2464
+ result.add_event(_arange_knl(result, start, step, queue=queue))
2465
+
2466
+ # }}}
2467
+
2468
+ return result
2469
+
2470
+ # }}}
2471
+
2472
+
2473
+ # {{{ take/put/concatenate/diff/(h?stack)
2474
+
2475
+ @elwise_kernel_runner
2476
+ def _take(result, ary, indices):
2477
+ return elementwise.get_take_kernel(
2478
+ result.context, result.dtype, indices.dtype)
2479
+
2480
+
2481
+ def take(
2482
+ a: Array,
2483
+ indices: Array,
2484
+ out: Array | None = None,
2485
+ queue: cl.CommandQueue | None = None,
2486
+ wait_for: cl.WaitList = None
2487
+ ) -> Array:
2488
+ """Return the :class:`Array` ``[a[indices[0]], ..., a[indices[n]]]``.
2489
+ For the moment, *a* must be a type that can be bound to a texture.
2490
+ """
2491
+
2492
+ queue = queue or a.queue
2493
+ if out is None:
2494
+ out = type(a)(queue, indices.shape, a.dtype, allocator=a.allocator)
2495
+
2496
+ assert len(indices.shape) == 1
2497
+ out.add_event(
2498
+ _take(out, a, indices, queue=queue, wait_for=wait_for))
2499
+ return out
2500
+
2501
+
2502
+ def multi_take(arrays, indices, out=None, queue: cl.CommandQueue | None = None):
2503
+ if not len(arrays):
2504
+ return []
2505
+
2506
+ assert len(indices.shape) == 1
2507
+
2508
+ from pytools import single_valued
2509
+ a_dtype = single_valued(a.dtype for a in arrays)
2510
+ a_allocator = arrays[0].dtype
2511
+ context = indices.context
2512
+ queue = queue or indices.queue
2513
+
2514
+ vec_count = len(arrays)
2515
+
2516
+ if out is None:
2517
+ out = [
2518
+ type(arrays[i])(
2519
+ context, queue, indices.shape, a_dtype,
2520
+ allocator=a_allocator)
2521
+ for i in range(vec_count)]
2522
+ else:
2523
+ if len(out) != len(arrays):
2524
+ raise ValueError("out and arrays must have the same length")
2525
+
2526
+ chunk_size = builtins.min(vec_count, 10)
2527
+
2528
+ def make_func_for_chunk_size(chunk_size):
2529
+ knl = elementwise.get_take_kernel(
2530
+ indices.context, a_dtype, indices.dtype,
2531
+ vec_count=chunk_size)
2532
+ knl.set_block_shape(*indices._block)
2533
+ return knl
2534
+
2535
+ knl = make_func_for_chunk_size(chunk_size)
2536
+
2537
+ for start_i in range(0, len(arrays), chunk_size):
2538
+ chunk_slice = slice(start_i, start_i+chunk_size)
2539
+
2540
+ if start_i + chunk_size > vec_count:
2541
+ knl = make_func_for_chunk_size(vec_count-start_i)
2542
+
2543
+ gs, ls = indices._get_sizes(queue,
2544
+ knl.get_work_group_info(
2545
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2546
+ queue.device))
2547
+
2548
+ wait_for_this = (
2549
+ *indices.events,
2550
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2551
+ *[evt for o in out[chunk_slice] for evt in o.events])
2552
+ evt = knl(queue, gs, ls,
2553
+ indices.data,
2554
+ *[o.data for o in out[chunk_slice]],
2555
+ *[i.data for i in arrays[chunk_slice]],
2556
+ *[indices.size],
2557
+ wait_for=wait_for_this)
2558
+ for o in out[chunk_slice]:
2559
+ o.add_event(evt)
2560
+
2561
+ return out
2562
+
2563
+
2564
+ def multi_take_put(arrays, dest_indices, src_indices, dest_shape=None,
2565
+ out=None, queue: cl.CommandQueue | None = None, src_offsets=None):
2566
+ if not len(arrays):
2567
+ return []
2568
+
2569
+ from pytools import single_valued
2570
+ a_dtype = single_valued(a.dtype for a in arrays)
2571
+ a_allocator = arrays[0].allocator
2572
+ context = src_indices.context
2573
+ queue = queue or src_indices.queue
2574
+
2575
+ vec_count = len(arrays)
2576
+
2577
+ if out is None:
2578
+ out = [type(arrays[i])(queue, dest_shape, a_dtype, allocator=a_allocator)
2579
+ for i in range(vec_count)]
2580
+ else:
2581
+ if a_dtype != single_valued(o.dtype for o in out):
2582
+ raise TypeError("arrays and out must have the same dtype")
2583
+ if len(out) != vec_count:
2584
+ raise ValueError("out and arrays must have the same length")
2585
+
2586
+ if src_indices.dtype != dest_indices.dtype:
2587
+ raise TypeError(
2588
+ "src_indices and dest_indices must have the same dtype")
2589
+
2590
+ if len(src_indices.shape) != 1:
2591
+ raise ValueError("src_indices must be 1D")
2592
+
2593
+ if src_indices.shape != dest_indices.shape:
2594
+ raise ValueError(
2595
+ "src_indices and dest_indices must have the same shape")
2596
+
2597
+ if src_offsets is None:
2598
+ src_offsets_list = []
2599
+ else:
2600
+ src_offsets_list = src_offsets
2601
+ if len(src_offsets) != vec_count:
2602
+ raise ValueError(
2603
+ "src_indices and src_offsets must have the same length")
2604
+
2605
+ max_chunk_size = 10
2606
+
2607
+ chunk_size = builtins.min(vec_count, max_chunk_size)
2608
+
2609
+ def make_func_for_chunk_size(chunk_size):
2610
+ return elementwise.get_take_put_kernel(context,
2611
+ a_dtype, src_indices.dtype,
2612
+ with_offsets=src_offsets is not None,
2613
+ vec_count=chunk_size)
2614
+
2615
+ knl = make_func_for_chunk_size(chunk_size)
2616
+
2617
+ for start_i in range(0, len(arrays), chunk_size):
2618
+ chunk_slice = slice(start_i, start_i+chunk_size)
2619
+
2620
+ if start_i + chunk_size > vec_count:
2621
+ knl = make_func_for_chunk_size(vec_count-start_i)
2622
+
2623
+ gs, ls = src_indices._get_sizes(queue,
2624
+ knl.get_work_group_info(
2625
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2626
+ queue.device))
2627
+
2628
+ wait_for_this = (
2629
+ *dest_indices.events,
2630
+ *src_indices.events,
2631
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2632
+ *[evt for o in out[chunk_slice] for evt in o.events])
2633
+ evt = knl(queue, gs, ls,
2634
+ *out[chunk_slice],
2635
+ dest_indices,
2636
+ src_indices,
2637
+ *arrays[chunk_slice],
2638
+ *src_offsets_list[chunk_slice],
2639
+ src_indices.size,
2640
+ wait_for=wait_for_this)
2641
+ for o in out[chunk_slice]:
2642
+ o.add_event(evt)
2643
+
2644
+ return out
2645
+
2646
+
2647
+ def multi_put(
2648
+ arrays,
2649
+ dest_indices: Array,
2650
+ dest_shape=None,
2651
+ out=None,
2652
+ queue: cl.CommandQueue | None = None,
2653
+ wait_for: cl.WaitList = None
2654
+ ):
2655
+ if not len(arrays):
2656
+ return []
2657
+
2658
+ from pytools import single_valued
2659
+ a_dtype = single_valued(a.dtype for a in arrays)
2660
+ a_allocator = arrays[0].allocator
2661
+ context = dest_indices.context
2662
+ queue = queue or dest_indices.queue
2663
+ assert queue is not None
2664
+ if wait_for is None:
2665
+ wait_for = []
2666
+ wait_for = [*wait_for, *dest_indices.events]
2667
+
2668
+ vec_count = len(arrays)
2669
+
2670
+ if out is None:
2671
+ out = [type(arrays[i])(queue, dest_shape, a_dtype, allocator=a_allocator)
2672
+ for i in range(vec_count)]
2673
+ else:
2674
+ if a_dtype != single_valued(o.dtype for o in out):
2675
+ raise TypeError("arrays and out must have the same dtype")
2676
+ if len(out) != vec_count:
2677
+ raise ValueError("out and arrays must have the same length")
2678
+
2679
+ if len(dest_indices.shape) != 1:
2680
+ raise ValueError("dest_indices must be 1D")
2681
+
2682
+ chunk_size = builtins.min(vec_count, 10)
2683
+
2684
+ # array of bools to specify whether the array of same index in this chunk
2685
+ # will be filled with a single value.
2686
+ use_fill = np.ndarray((chunk_size,), dtype=np.uint8)
2687
+ array_lengths = np.ndarray((chunk_size,), dtype=np.int64)
2688
+
2689
+ def make_func_for_chunk_size(chunk_size):
2690
+ knl = elementwise.get_put_kernel(
2691
+ context, a_dtype, dest_indices.dtype,
2692
+ vec_count=chunk_size)
2693
+ return knl
2694
+
2695
+ knl = make_func_for_chunk_size(chunk_size)
2696
+
2697
+ for start_i in range(0, len(arrays), chunk_size):
2698
+ chunk_slice = slice(start_i, start_i+chunk_size)
2699
+ for fill_idx, ary in enumerate(arrays[chunk_slice]):
2700
+ # If there is only one value in the values array for this src array
2701
+ # in the chunk then fill every index in `dest_idx` array with it.
2702
+ use_fill[fill_idx] = 1 if ary.size == 1 else 0
2703
+ array_lengths[fill_idx] = len(ary)
2704
+ # Copy the populated `use_fill` array to a buffer on the device.
2705
+ use_fill_cla = to_device(queue, use_fill)
2706
+ array_lengths_cla = to_device(queue, array_lengths)
2707
+
2708
+ if start_i + chunk_size > vec_count:
2709
+ knl = make_func_for_chunk_size(vec_count-start_i)
2710
+
2711
+ gs, ls = dest_indices._get_sizes(queue,
2712
+ knl.get_work_group_info(
2713
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2714
+ queue.device))
2715
+
2716
+ wait_for_this = (
2717
+ *wait_for,
2718
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2719
+ *[evt for o in out[chunk_slice] for evt in o.events])
2720
+ evt = knl(queue, gs, ls,
2721
+ *out[chunk_slice],
2722
+ dest_indices,
2723
+ *arrays[chunk_slice],
2724
+ use_fill_cla, array_lengths_cla, dest_indices.size,
2725
+ wait_for=wait_for_this)
2726
+
2727
+ for o in out[chunk_slice]:
2728
+ o.add_event(evt)
2729
+
2730
+ return out
2731
+
2732
+
2733
+ def concatenate(arrays, axis=0, queue: cl.CommandQueue | None = None, allocator=None):
2734
+ """
2735
+ .. versionadded:: 2013.1
2736
+
2737
+ .. note::
2738
+
2739
+ The returned array is of the same type as the first array in the list.
2740
+ """
2741
+ if not arrays:
2742
+ raise ValueError("need at least one array to concatenate")
2743
+
2744
+ # {{{ find properties of result array
2745
+
2746
+ shape = None
2747
+
2748
+ for i_ary, ary in enumerate(arrays):
2749
+ queue = queue or ary.queue
2750
+ allocator = allocator or ary.allocator
2751
+
2752
+ if shape is None:
2753
+ # first array
2754
+ shape = list(ary.shape)
2755
+ else:
2756
+ if len(ary.shape) != len(shape):
2757
+ raise ValueError(
2758
+ f"{i_ary}-th array has different number of axes: "
2759
+ f"expected {len(ary.shape)}, got {len(shape)})")
2760
+
2761
+ ary_shape_list = list(ary.shape)
2762
+ if (ary_shape_list[:axis] != shape[:axis]
2763
+ or ary_shape_list[axis+1:] != shape[axis+1:]):
2764
+ raise ValueError(
2765
+ f"{i_ary}-th array has residual not matching other arrays")
2766
+
2767
+ # pylint: disable=unsupported-assignment-operation
2768
+ shape[axis] += ary.shape[axis]
2769
+
2770
+ # }}}
2771
+
2772
+ shape = tuple(shape)
2773
+ dtype = np.result_type(*[ary.dtype for ary in arrays])
2774
+
2775
+ if __debug__:
2776
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2777
+ for ary in arrays[1:]):
2778
+ warn("Elements of 'arrays' not of the same type, returning "
2779
+ "an instance of the type of arrays[0]",
2780
+ stacklevel=2)
2781
+
2782
+ result = arrays[0].__class__(queue, shape, dtype, allocator=allocator)
2783
+
2784
+ full_slice = (slice(None),) * len(shape)
2785
+
2786
+ base_idx = 0
2787
+ for ary in arrays:
2788
+ my_len = ary.shape[axis]
2789
+ result.setitem(
2790
+ (*full_slice[:axis],
2791
+ slice(base_idx, base_idx+my_len),
2792
+ *full_slice[axis+1:]),
2793
+ ary)
2794
+
2795
+ base_idx += my_len
2796
+
2797
+ return result
2798
+
2799
+
2800
+ @elwise_kernel_runner
2801
+ def _diff(result, array):
2802
+ return elementwise.get_diff_kernel(array.context, array.dtype)
2803
+
2804
+
2805
+ def diff(array, queue: cl.CommandQueue | None = None, allocator=None):
2806
+ """
2807
+ .. versionadded:: 2013.2
2808
+ """
2809
+
2810
+ if len(array.shape) != 1:
2811
+ raise ValueError("multi-D arrays are not supported")
2812
+
2813
+ n, = array.shape
2814
+
2815
+ queue = queue or array.queue
2816
+ allocator = allocator or array.allocator
2817
+
2818
+ result = array.__class__(queue, (n-1,), array.dtype, allocator=allocator)
2819
+ event1 = _diff(result, array, queue=queue)
2820
+ result.add_event(event1)
2821
+ return result
2822
+
2823
+
2824
+ def hstack(arrays, queue: cl.CommandQueue | None = None):
2825
+ if len(arrays) == 0:
2826
+ raise ValueError("need at least one array to hstack")
2827
+
2828
+ if queue is None:
2829
+ for ary in arrays:
2830
+ if ary.queue is not None:
2831
+ queue = ary.queue
2832
+ break
2833
+
2834
+ from pytools import all_equal, single_valued
2835
+ if not all_equal(len(ary.shape) for ary in arrays):
2836
+ raise ValueError("arguments must all have the same number of axes")
2837
+
2838
+ lead_shape = single_valued(ary.shape[:-1] for ary in arrays)
2839
+
2840
+ w = builtins.sum(ary.shape[-1] for ary in arrays)
2841
+
2842
+ if __debug__:
2843
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2844
+ for ary in arrays[1:]):
2845
+ warn("Elements of 'arrays' not of the same type, returning "
2846
+ "an instance of the type of arrays[0]",
2847
+ stacklevel=2)
2848
+
2849
+ result = arrays[0].__class__(queue, (*lead_shape, w), arrays[0].dtype,
2850
+ allocator=arrays[0].allocator)
2851
+ index = 0
2852
+ for ary in arrays:
2853
+ result[..., index:index+ary.shape[-1]] = ary
2854
+ index += ary.shape[-1]
2855
+
2856
+ return result
2857
+
2858
+
2859
+ def stack(arrays, axis=0, queue: cl.CommandQueue | None = None):
2860
+ """
2861
+ Join a sequence of arrays along a new axis.
2862
+
2863
+ :arg arrays: A sequence of :class:`Array`.
2864
+ :arg axis: Index of the dimension of the new axis in the result array.
2865
+ Can be -1, for the new axis to be last dimension.
2866
+
2867
+ :returns: :class:`Array`
2868
+ """
2869
+ if not arrays:
2870
+ raise ValueError("need at least one array to stack")
2871
+
2872
+ input_shape = arrays[0].shape
2873
+ input_ndim = arrays[0].ndim
2874
+ axis = input_ndim if axis == -1 else axis
2875
+
2876
+ if queue is None:
2877
+ for ary in arrays:
2878
+ if ary.queue is not None:
2879
+ queue = ary.queue
2880
+ break
2881
+
2882
+ if not builtins.all(ary.shape == input_shape for ary in arrays[1:]):
2883
+ raise ValueError("arrays must have the same shape")
2884
+
2885
+ if not (0 <= axis <= input_ndim):
2886
+ raise ValueError("invalid axis")
2887
+
2888
+ if (axis == 0 and not builtins.all(
2889
+ ary.flags.c_contiguous for ary in arrays)):
2890
+ # pyopencl.Array.__setitem__ does not support non-contiguous assignments
2891
+ raise NotImplementedError
2892
+
2893
+ if (axis == input_ndim and not builtins.all(
2894
+ ary.flags.f_contiguous for ary in arrays)):
2895
+ # pyopencl.Array.__setitem__ does not support non-contiguous assignments
2896
+ raise NotImplementedError
2897
+
2898
+ result_shape = (*input_shape[:axis], len(arrays), *input_shape[axis:])
2899
+
2900
+ if __debug__:
2901
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2902
+ for ary in arrays[1:]):
2903
+ warn("Elements of 'arrays' not of the same type, returning "
2904
+ "an instance of the type of arrays[0]",
2905
+ stacklevel=2)
2906
+
2907
+ result = arrays[0].__class__(queue, result_shape,
2908
+ np.result_type(*(ary.dtype
2909
+ for ary in arrays)),
2910
+ # TODO: reconsider once arrays support
2911
+ # non-contiguous assignments
2912
+ order="C" if axis == 0 else "F",
2913
+ allocator=arrays[0].allocator)
2914
+ for i, ary in enumerate(arrays):
2915
+ idx = (slice(None),)*axis + (i,) + (slice(None),)*(input_ndim-axis)
2916
+ result[idx] = ary
2917
+
2918
+ return result
2919
+
2920
+ # }}}
2921
+
2922
+
2923
+ # {{{ shape manipulation
2924
+
2925
+ def transpose(a, axes=None):
2926
+ """Permute the dimensions of an array.
2927
+
2928
+ :arg a: :class:`Array`
2929
+ :arg axes: list of ints, optional.
2930
+ By default, reverse the dimensions, otherwise permute the axes
2931
+ according to the values given.
2932
+
2933
+ :returns: :class:`Array` A view of the array with its axes permuted.
2934
+ """
2935
+ return a.transpose(axes)
2936
+
2937
+
2938
+ def reshape(a, shape):
2939
+ """Gives a new shape to an array without changing its data.
2940
+
2941
+ .. versionadded:: 2015.2
2942
+ """
2943
+
2944
+ return a.reshape(shape)
2945
+
2946
+ # }}}
2947
+
2948
+
2949
+ # {{{ conditionals
2950
+
2951
+ @elwise_kernel_runner
2952
+ def _if_positive(result, criterion, then_, else_):
2953
+ return elementwise.get_if_positive_kernel(
2954
+ result.context, criterion.dtype, then_.dtype,
2955
+ is_then_array=isinstance(then_, Array),
2956
+ is_else_array=isinstance(else_, Array),
2957
+ is_then_scalar=then_.shape == (),
2958
+ is_else_scalar=else_.shape == (),
2959
+ )
2960
+
2961
+
2962
+ def if_positive(
2963
+ criterion,
2964
+ then_,
2965
+ else_,
2966
+ out=None,
2967
+ queue: cl.CommandQueue | None = None):
2968
+ """Return an array like *then_*, which, for the element at index *i*,
2969
+ contains *then_[i]* if *criterion[i]>0*, else *else_[i]*.
2970
+ """
2971
+
2972
+ is_then_scalar = isinstance(then_, SCALAR_CLASSES)
2973
+ is_else_scalar = isinstance(else_, SCALAR_CLASSES)
2974
+ if isinstance(criterion, SCALAR_CLASSES) and is_then_scalar and is_else_scalar:
2975
+ result = np.where(criterion, then_, else_)
2976
+
2977
+ if out is not None:
2978
+ out[...] = result
2979
+ return out
2980
+
2981
+ return result
2982
+
2983
+ if is_then_scalar:
2984
+ then_ = np.array(then_)
2985
+
2986
+ if is_else_scalar:
2987
+ else_ = np.array(else_)
2988
+
2989
+ if then_.dtype != else_.dtype:
2990
+ raise ValueError(
2991
+ f"dtypes do not match: then_ is '{then_.dtype}' and "
2992
+ f"else_ is '{else_.dtype}'")
2993
+
2994
+ if then_.shape == () and else_.shape == ():
2995
+ pass
2996
+ elif then_.shape != () and else_.shape != ():
2997
+ if not (criterion.shape == then_.shape == else_.shape):
2998
+ raise ValueError(
2999
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3000
+ f", 'then_' has shape {then_.shape} and 'else_' has shape "
3001
+ f"{else_.shape}")
3002
+ elif then_.shape == ():
3003
+ if criterion.shape != else_.shape:
3004
+ raise ValueError(
3005
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3006
+ f" and 'else_' has shape {else_.shape}")
3007
+ elif else_.shape == ():
3008
+ if criterion.shape != then_.shape:
3009
+ raise ValueError(
3010
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3011
+ f" and 'then_' has shape {then_.shape}")
3012
+ else:
3013
+ raise AssertionError()
3014
+
3015
+ if out is None:
3016
+ if then_.shape != ():
3017
+ out = empty_like(
3018
+ then_, criterion.queue, allocator=criterion.allocator)
3019
+ else:
3020
+ # Use same strides as criterion
3021
+ cr_byte_strides = np.array(criterion.strides, dtype=np.int64)
3022
+ cr_item_strides = cr_byte_strides // criterion.dtype.itemsize
3023
+ out_strides = tuple(cr_item_strides*then_.dtype.itemsize)
3024
+
3025
+ out = type(criterion)(
3026
+ criterion.queue, criterion.shape, then_.dtype,
3027
+ allocator=criterion.allocator,
3028
+ strides=out_strides)
3029
+
3030
+ event1 = _if_positive(out, criterion, then_, else_, queue=queue)
3031
+ out.add_event(event1)
3032
+
3033
+ return out
3034
+
3035
+ # }}}
3036
+
3037
+
3038
+ # {{{ minimum/maximum
3039
+
3040
+ @elwise_kernel_runner
3041
+ def _minimum_maximum_backend(out, a, b, minmax):
3042
+ from pyopencl.elementwise import get_minmaximum_kernel
3043
+ return get_minmaximum_kernel(out.context, minmax,
3044
+ out.dtype,
3045
+ a.dtype if isinstance(a, Array) else np.dtype(type(a)),
3046
+ b.dtype if isinstance(b, Array) else np.dtype(type(b)),
3047
+ elementwise.get_argument_kind(a),
3048
+ elementwise.get_argument_kind(b))
3049
+
3050
+
3051
+ def maximum(a, b, out=None, queue: cl.CommandQueue | None = None):
3052
+ """Return the elementwise maximum of *a* and *b*."""
3053
+
3054
+ a_is_scalar = np.isscalar(a)
3055
+ b_is_scalar = np.isscalar(b)
3056
+ if a_is_scalar and b_is_scalar:
3057
+ result = np.maximum(a, b)
3058
+ if out is not None:
3059
+ out[...] = result
3060
+ return out
3061
+
3062
+ return result
3063
+
3064
+ queue = queue or a.queue or b.queue
3065
+
3066
+ if out is None:
3067
+ out_dtype = _get_common_dtype(a, b, queue)
3068
+ if not a_is_scalar:
3069
+ out = a._new_like_me(out_dtype, queue)
3070
+ elif not b_is_scalar:
3071
+ out = b._new_like_me(out_dtype, queue)
3072
+
3073
+ out.add_event(_minimum_maximum_backend(out, a, b, queue=queue, minmax="max"))
3074
+
3075
+ return out
3076
+
3077
+
3078
+ def minimum(a, b, out=None, queue: cl.CommandQueue | None = None):
3079
+ """Return the elementwise minimum of *a* and *b*."""
3080
+ a_is_scalar = np.isscalar(a)
3081
+ b_is_scalar = np.isscalar(b)
3082
+ if a_is_scalar and b_is_scalar:
3083
+ result = np.minimum(a, b)
3084
+ if out is not None:
3085
+ out[...] = result
3086
+ return out
3087
+
3088
+ return result
3089
+
3090
+ queue = queue or a.queue or b.queue
3091
+
3092
+ if out is None:
3093
+ out_dtype = _get_common_dtype(a, b, queue)
3094
+ if not a_is_scalar:
3095
+ out = a._new_like_me(out_dtype, queue)
3096
+ elif not b_is_scalar:
3097
+ out = b._new_like_me(out_dtype, queue)
3098
+
3099
+ out.add_event(_minimum_maximum_backend(out, a, b, queue=queue, minmax="min"))
3100
+
3101
+ return out
3102
+
3103
+ # }}}
3104
+
3105
+
3106
+ # {{{ logical ops
3107
+
3108
+ def _logical_op(x1, x2, out, operator, queue: cl.CommandQueue | None = None):
3109
+ # NOTE: Copied from pycuda.gpuarray
3110
+ assert operator in ["&&", "||"]
3111
+
3112
+ if np.isscalar(x1) and np.isscalar(x2):
3113
+ if out is None:
3114
+ out = empty(queue, shape=(), dtype=np.int8)
3115
+
3116
+ if operator == "&&":
3117
+ out[:] = np.logical_and(x1, x2)
3118
+ else:
3119
+ out[:] = np.logical_or(x1, x2)
3120
+ elif np.isscalar(x1) or np.isscalar(x2):
3121
+ scalar_arg, = (x for x in (x1, x2) if np.isscalar(x))
3122
+ ary_arg, = (x for x in (x1, x2) if not np.isscalar(x))
3123
+ queue = queue or ary_arg.queue
3124
+ allocator = ary_arg.allocator
3125
+
3126
+ if not isinstance(ary_arg, Array):
3127
+ raise ValueError("logical_and can take either scalar or Array"
3128
+ " as inputs")
3129
+
3130
+ out = out or ary_arg._new_like_me(dtype=np.int8)
3131
+
3132
+ assert out.shape == ary_arg.shape and out.dtype == np.int8
3133
+
3134
+ knl = elementwise.get_array_scalar_binop_kernel(
3135
+ queue.context,
3136
+ operator,
3137
+ out.dtype,
3138
+ ary_arg.dtype,
3139
+ np.dtype(type(scalar_arg))
3140
+ )
3141
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, ary_arg, scalar_arg)
3142
+ else:
3143
+ if not (isinstance(x1, Array) and isinstance(x2, Array)):
3144
+ raise ValueError("logical_or/logical_and can take either scalar"
3145
+ " or Arrays as inputs")
3146
+ if x1.shape != x2.shape:
3147
+ raise NotImplementedError("Broadcasting not supported")
3148
+
3149
+ queue = queue or x1.queue or x2.queue
3150
+ allocator = x1.allocator or x2.allocator
3151
+
3152
+ if out is None:
3153
+ out = empty(queue, allocator=allocator,
3154
+ shape=x1.shape, dtype=np.int8)
3155
+
3156
+ assert out.shape == x1.shape and out.dtype == np.int8
3157
+
3158
+ knl = elementwise.get_array_binop_kernel(
3159
+ queue.context,
3160
+ operator,
3161
+ out.dtype,
3162
+ x1.dtype, x2.dtype)
3163
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, x1, x2)
3164
+
3165
+ return out
3166
+
3167
+
3168
+ def logical_and(x1, x2, /, out=None, queue: cl.CommandQueue | None = None):
3169
+ """
3170
+ Returns the element-wise logical AND of *x1* and *x2*.
3171
+ """
3172
+ return _logical_op(x1, x2, out, "&&", queue=queue)
3173
+
3174
+
3175
+ def logical_or(x1, x2, /, out=None, queue: cl.CommandQueue | None = None):
3176
+ """
3177
+ Returns the element-wise logical OR of *x1* and *x2*.
3178
+ """
3179
+ return _logical_op(x1, x2, out, "||", queue=queue)
3180
+
3181
+
3182
+ def logical_not(x, /, out=None, queue: cl.CommandQueue | None = None):
3183
+ """
3184
+ Returns the element-wise logical NOT of *x*.
3185
+ """
3186
+ if np.isscalar(x):
3187
+ out = out or empty(queue, shape=(), dtype=np.int8)
3188
+ out[:] = np.logical_not(x)
3189
+ else:
3190
+ queue = queue or x.queue
3191
+ out = out or empty(queue, shape=x.shape, dtype=np.int8,
3192
+ allocator=x.allocator)
3193
+ knl = elementwise.get_logical_not_kernel(queue.context,
3194
+ x.dtype)
3195
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, x)
3196
+
3197
+ return out
3198
+
3199
+ # }}}
3200
+
3201
+
3202
+ # {{{ reductions
3203
+
3204
+ def sum(
3205
+ a,
3206
+ dtype=None,
3207
+ queue: cl.CommandQueue | None = None,
3208
+ slice=None,
3209
+ initial=_NoValue):
3210
+ """
3211
+ .. versionadded:: 2011.1
3212
+ """
3213
+ if initial is not _NoValue and not isinstance(initial, SCALAR_CLASSES):
3214
+ raise ValueError("'initial' is not a scalar")
3215
+
3216
+ if dtype is not None:
3217
+ dtype = np.dtype(dtype)
3218
+
3219
+ from pyopencl.reduction import get_sum_kernel
3220
+ krnl = get_sum_kernel(a.context, dtype, a.dtype)
3221
+ result, event1 = krnl(a, queue=queue, slice=slice, wait_for=a.events,
3222
+ return_event=True)
3223
+ result.add_event(event1)
3224
+
3225
+ # NOTE: neutral element in `get_sum_kernel` is 0 by default
3226
+ if initial is not _NoValue:
3227
+ result += a.dtype.type(initial)
3228
+
3229
+ return result
3230
+
3231
+
3232
+ def any(a, queue: cl.CommandQueue | None = None, wait_for: cl.WaitList = None):
3233
+ if len(a) == 0:
3234
+ return _BOOL_DTYPE.type(False)
3235
+
3236
+ return a.any(queue=queue, wait_for=wait_for)
3237
+
3238
+
3239
+ def all(a, queue: cl.CommandQueue | None = None, wait_for: cl.WaitList = None):
3240
+ if len(a) == 0:
3241
+ return _BOOL_DTYPE.type(True)
3242
+
3243
+ return a.all(queue=queue, wait_for=wait_for)
3244
+
3245
+
3246
+ def dot(a, b, dtype=None, queue: cl.CommandQueue | None = None, slice=None):
3247
+ """
3248
+ .. versionadded:: 2011.1
3249
+ """
3250
+ if dtype is not None:
3251
+ dtype = np.dtype(dtype)
3252
+
3253
+ from pyopencl.reduction import get_dot_kernel
3254
+ krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype)
3255
+
3256
+ result, event1 = krnl(a, b, queue=queue, slice=slice,
3257
+ wait_for=a.events + b.events, return_event=True)
3258
+ result.add_event(event1)
3259
+
3260
+ return result
3261
+
3262
+
3263
+ def vdot(a, b, dtype=None, queue: cl.CommandQueue | None = None, slice=None):
3264
+ """Like :func:`numpy.vdot`.
3265
+
3266
+ .. versionadded:: 2013.1
3267
+ """
3268
+ if dtype is not None:
3269
+ dtype = np.dtype(dtype)
3270
+
3271
+ from pyopencl.reduction import get_dot_kernel
3272
+ krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype,
3273
+ conjugate_first=True)
3274
+
3275
+ result, event1 = krnl(a, b, queue=queue, slice=slice,
3276
+ wait_for=a.events + b.events, return_event=True)
3277
+ result.add_event(event1)
3278
+
3279
+ return result
3280
+
3281
+
3282
+ def subset_dot(
3283
+ subset,
3284
+ a,
3285
+ b,
3286
+ dtype=None,
3287
+ queue: cl.CommandQueue | None = None,
3288
+ slice=None):
3289
+ """
3290
+ .. versionadded:: 2011.1
3291
+ """
3292
+ if dtype is not None:
3293
+ dtype = np.dtype(dtype)
3294
+
3295
+ from pyopencl.reduction import get_subset_dot_kernel
3296
+ krnl = get_subset_dot_kernel(
3297
+ a.context, dtype, subset.dtype, a.dtype, b.dtype)
3298
+
3299
+ result, event1 = krnl(subset, a, b, queue=queue, slice=slice,
3300
+ wait_for=subset.events + a.events + b.events, return_event=True)
3301
+ result.add_event(event1)
3302
+
3303
+ return result
3304
+
3305
+
3306
+ def _make_minmax_kernel(what):
3307
+ def f(a, queue: cl.CommandQueue | None = None, initial=_NoValue):
3308
+ if isinstance(a, SCALAR_CLASSES):
3309
+ return np.array(a).dtype.type(a)
3310
+
3311
+ if len(a) == 0:
3312
+ if initial is _NoValue:
3313
+ raise ValueError(
3314
+ f"zero-size array to reduction '{what}' "
3315
+ "which has no identity")
3316
+ else:
3317
+ return initial
3318
+
3319
+ if initial is not _NoValue and not isinstance(initial, SCALAR_CLASSES):
3320
+ raise ValueError("'initial' is not a scalar")
3321
+
3322
+ from pyopencl.reduction import get_minmax_kernel
3323
+ krnl = get_minmax_kernel(a.context, what, a.dtype)
3324
+ result, event1 = krnl(a, queue=queue, wait_for=a.events,
3325
+ return_event=True)
3326
+ result.add_event(event1)
3327
+
3328
+ if initial is not _NoValue:
3329
+ initial = a.dtype.type(initial)
3330
+ if what == "min":
3331
+ result = minimum(result, initial, queue=queue)
3332
+ elif what == "max":
3333
+ result = maximum(result, initial, queue=queue)
3334
+ else:
3335
+ raise ValueError(f"unknown minmax reduction type: '{what}'")
3336
+
3337
+ return result
3338
+
3339
+ return f
3340
+
3341
+
3342
+ min = _make_minmax_kernel("min")
3343
+ min.__name__ = "min"
3344
+ min.__doc__ = """
3345
+ .. versionadded:: 2011.1
3346
+ """
3347
+
3348
+ max = _make_minmax_kernel("max")
3349
+ max.__name__ = "max"
3350
+ max.__doc__ = """
3351
+ .. versionadded:: 2011.1
3352
+ """
3353
+
3354
+
3355
+ def _make_subset_minmax_kernel(what):
3356
+ def f(subset, a, queue: cl.CommandQueue | None = None, slice=None):
3357
+ from pyopencl.reduction import get_subset_minmax_kernel
3358
+ krnl = get_subset_minmax_kernel(a.context, what, a.dtype, subset.dtype)
3359
+ result, event1 = krnl(subset, a, queue=queue, slice=slice,
3360
+ wait_for=a.events + subset.events, return_event=True)
3361
+ result.add_event(event1)
3362
+ return result
3363
+ return f
3364
+
3365
+
3366
+ subset_min = _make_subset_minmax_kernel("min")
3367
+ subset_min.__doc__ = """.. versionadded:: 2011.1"""
3368
+ subset_max = _make_subset_minmax_kernel("max")
3369
+ subset_max.__doc__ = """.. versionadded:: 2011.1"""
3370
+
3371
+ # }}}
3372
+
3373
+
3374
+ # {{{ scans
3375
+
3376
+ def cumsum(a, output_dtype=None, queue: cl.CommandQueue | None = None,
3377
+ wait_for: cl.WaitList = None, return_event=False):
3378
+ # undocumented for now
3379
+
3380
+ """
3381
+ .. versionadded:: 2013.1
3382
+ """
3383
+
3384
+ if output_dtype is None:
3385
+ output_dtype = a.dtype
3386
+ else:
3387
+ output_dtype = np.dtype(output_dtype)
3388
+
3389
+ if wait_for is None:
3390
+ wait_for = []
3391
+
3392
+ result = a._new_like_me(output_dtype)
3393
+
3394
+ from pyopencl.scan import get_cumsum_kernel
3395
+ krnl = get_cumsum_kernel(a.context, a.dtype, output_dtype)
3396
+ evt = krnl(a, result, queue=queue, wait_for=wait_for + a.events)
3397
+ result.add_event(evt)
3398
+
3399
+ if return_event:
3400
+ return evt, result
3401
+ else:
3402
+ return result
3403
+
3404
+ # }}}
3405
+
3406
+
3407
+ __all__ = [
3408
+ "Allocator",
3409
+ "Array",
3410
+ "all",
3411
+ "any",
3412
+ "arange",
3413
+ "as_strided",
3414
+ "concatenate",
3415
+ "cumsum",
3416
+ "diff",
3417
+ "dot",
3418
+ "empty_like",
3419
+ "hstack",
3420
+ "if_positive",
3421
+ "logical_and",
3422
+ "logical_not",
3423
+ "logical_or",
3424
+ "maximum",
3425
+ "minimum",
3426
+ "multi_put",
3427
+ "multi_take",
3428
+ "multi_take_put",
3429
+ "reshape",
3430
+ "stack",
3431
+ "subset_dot",
3432
+ "sum",
3433
+ "take",
3434
+ "to_device",
3435
+ "transpose",
3436
+ "vdot",
3437
+ "zeros",
3438
+ "zeros_like",
3439
+ ]
3440
+
3441
+ # vim: foldmethod=marker