pyopencl 2025.1__cp313-cp313-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (42) hide show
  1. pyopencl/__init__.py +2410 -0
  2. pyopencl/_cl.cpython-313-darwin.so +0 -0
  3. pyopencl/_cluda.py +54 -0
  4. pyopencl/_mymako.py +14 -0
  5. pyopencl/algorithm.py +1449 -0
  6. pyopencl/array.py +3362 -0
  7. pyopencl/bitonic_sort.py +242 -0
  8. pyopencl/bitonic_sort_templates.py +594 -0
  9. pyopencl/cache.py +535 -0
  10. pyopencl/capture_call.py +177 -0
  11. pyopencl/characterize/__init__.py +456 -0
  12. pyopencl/characterize/performance.py +237 -0
  13. pyopencl/cl/pyopencl-airy.cl +324 -0
  14. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  15. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  16. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  17. pyopencl/cl/pyopencl-complex.h +303 -0
  18. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  19. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  20. pyopencl/cl/pyopencl-random123/array.h +325 -0
  21. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  22. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  23. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  24. pyopencl/clmath.py +280 -0
  25. pyopencl/clrandom.py +409 -0
  26. pyopencl/cltypes.py +137 -0
  27. pyopencl/compyte/.gitignore +21 -0
  28. pyopencl/compyte/__init__.py +0 -0
  29. pyopencl/compyte/array.py +214 -0
  30. pyopencl/compyte/dtypes.py +290 -0
  31. pyopencl/compyte/pyproject.toml +54 -0
  32. pyopencl/elementwise.py +1171 -0
  33. pyopencl/invoker.py +421 -0
  34. pyopencl/ipython_ext.py +68 -0
  35. pyopencl/reduction.py +786 -0
  36. pyopencl/scan.py +1915 -0
  37. pyopencl/tools.py +1527 -0
  38. pyopencl/version.py +9 -0
  39. pyopencl-2025.1.dist-info/METADATA +108 -0
  40. pyopencl-2025.1.dist-info/RECORD +42 -0
  41. pyopencl-2025.1.dist-info/WHEEL +5 -0
  42. pyopencl-2025.1.dist-info/licenses/LICENSE +282 -0
pyopencl/array.py ADDED
@@ -0,0 +1,3362 @@
1
+ """CL device arrays."""
2
+
3
+ # NOTE: for elwise_kernel_runner which adds keyword arguments
4
+ # pylint:disable=unexpected-keyword-arg
5
+
6
+ __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
7
+
8
+ __license__ = """
9
+ Permission is hereby granted, free of charge, to any person
10
+ obtaining a copy of this software and associated documentation
11
+ files (the "Software"), to deal in the Software without
12
+ restriction, including without limitation the rights to use,
13
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the
15
+ Software is furnished to do so, subject to the following
16
+ conditions:
17
+
18
+ The above copyright notice and this permission notice shall be
19
+ included in all copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28
+ OTHER DEALINGS IN THE SOFTWARE.
29
+ """
30
+
31
+ import builtins
32
+ from dataclasses import dataclass
33
+ from functools import reduce
34
+ from numbers import Number
35
+ from typing import Any, Dict, List, Optional, Tuple, Union
36
+ from warnings import warn
37
+
38
+ import numpy as np
39
+
40
+ import pyopencl as cl
41
+ import pyopencl.elementwise as elementwise
42
+ from pyopencl import cltypes
43
+ from pyopencl.characterize import has_double_support
44
+ from pyopencl.compyte.array import (
45
+ ArrayFlags as _ArrayFlags,
46
+ as_strided as _as_strided,
47
+ c_contiguous_strides as _c_contiguous_strides,
48
+ equal_strides as _equal_strides,
49
+ f_contiguous_strides as _f_contiguous_strides,
50
+ )
51
+
52
+
53
+ SCALAR_CLASSES = (Number, np.bool_, bool)
54
+
55
+ if cl.get_cl_header_version() >= (2, 0):
56
+ _SVMPointer_or_nothing = cl.SVMPointer
57
+ else:
58
+ _SVMPointer_or_nothing = ()
59
+
60
+
61
+ # {{{ _get_common_dtype
62
+
63
+ class DoubleDowncastWarning(UserWarning):
64
+ pass
65
+
66
+
67
+ _DOUBLE_DOWNCAST_WARNING = (
68
+ "The operation you requested would result in a double-precision "
69
+ "quantity according to numpy semantics. Since your device does not "
70
+ "support double precision, a single-precision quantity is being returned.")
71
+
72
+
73
+ def _get_common_dtype(obj1, obj2, queue):
74
+ if queue is None:
75
+ raise ValueError("PyOpenCL array has no queue; call .with_queue() to "
76
+ "add one in order to be able to perform operations")
77
+
78
+ # Note: We are calling np.result_type with pyopencl arrays here.
79
+ # Luckily, np.result_type only looks at the dtype of input arrays up until
80
+ # at least numpy v2.1.
81
+ result = np.result_type(obj1, obj2)
82
+
83
+ if not has_double_support(queue.device):
84
+ if result == np.float64:
85
+ result = np.dtype(np.float32)
86
+ warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
87
+ elif result == np.complex128:
88
+ result = np.dtype(np.complex64)
89
+ warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
90
+
91
+ return result
92
+
93
+ # }}}
94
+
95
+
96
+ # {{{ _get_truedivide_dtype
97
+
98
+ def _get_truedivide_dtype(obj1, obj2, queue):
99
+ # the dtype of the division result obj1 / obj2
100
+
101
+ allow_double = has_double_support(queue.device)
102
+
103
+ x1 = obj1 if np.isscalar(obj1) else np.ones(1, obj1.dtype)
104
+ x2 = obj2 if np.isscalar(obj2) else np.ones(1, obj2.dtype)
105
+
106
+ result = (x1/x2).dtype
107
+
108
+ if not allow_double:
109
+ if result == np.float64:
110
+ result = np.dtype(np.float32)
111
+ elif result == np.complex128:
112
+ result = np.dtype(np.complex64)
113
+
114
+ return result
115
+
116
+ # }}}
117
+
118
+
119
+ # {{{ _get_broadcasted_binary_op_result
120
+
121
+ def _get_broadcasted_binary_op_result(obj1, obj2, cq,
122
+ dtype_getter=_get_common_dtype):
123
+
124
+ if obj1.shape == obj2.shape:
125
+ return obj1._new_like_me(dtype_getter(obj1, obj2, cq),
126
+ cq)
127
+ elif obj1.shape == ():
128
+ return obj2._new_like_me(dtype_getter(obj1, obj2, cq),
129
+ cq)
130
+ elif obj2.shape == ():
131
+ return obj1._new_like_me(dtype_getter(obj1, obj2, cq),
132
+ cq)
133
+ else:
134
+ raise NotImplementedError("Broadcasting binary operator with shapes:"
135
+ f" {obj1.shape}, {obj2.shape}.")
136
+
137
+ # }}}
138
+
139
+
140
+ # {{{ VecLookupWarner
141
+
142
+ class VecLookupWarner:
143
+ def __getattr__(self, name):
144
+ warn("pyopencl.array.vec is deprecated. "
145
+ "Please use pyopencl.cltypes for OpenCL vector and scalar types",
146
+ DeprecationWarning, stacklevel=2)
147
+
148
+ if name == "types":
149
+ name = "vec_types"
150
+ elif name == "type_to_scalar_and_count":
151
+ name = "vec_type_to_scalar_and_count"
152
+
153
+ return getattr(cltypes, name)
154
+
155
+
156
+ vec = VecLookupWarner()
157
+
158
+ # }}}
159
+
160
+
161
+ # {{{ helper functionality
162
+
163
+ def _splay(device, n, kernel_specific_max_wg_size=None):
164
+ max_work_items = builtins.min(128, device.max_work_group_size)
165
+
166
+ if kernel_specific_max_wg_size is not None:
167
+ max_work_items = builtins.min(max_work_items, kernel_specific_max_wg_size)
168
+
169
+ min_work_items = builtins.min(32, max_work_items)
170
+ max_groups = device.max_compute_units * 4 * 8
171
+ # 4 to overfill the device
172
+ # 8 is an Nvidia constant--that's how many
173
+ # groups fit onto one compute device
174
+
175
+ if n < min_work_items:
176
+ group_count = 1
177
+ work_items_per_group = min_work_items
178
+ elif n < (max_groups * min_work_items):
179
+ group_count = (n + min_work_items - 1) // min_work_items
180
+ work_items_per_group = min_work_items
181
+ elif n < (max_groups * max_work_items):
182
+ group_count = max_groups
183
+ grp = (n + min_work_items - 1) // min_work_items
184
+ work_items_per_group = (
185
+ (grp + max_groups - 1) // max_groups) * min_work_items
186
+ else:
187
+ group_count = max_groups
188
+ work_items_per_group = max_work_items
189
+
190
+ # print("n:%d gc:%d wipg:%d" % (n, group_count, work_items_per_group))
191
+ return (group_count*work_items_per_group,), (work_items_per_group,)
192
+
193
+
194
+ # deliberately undocumented for now
195
+ ARRAY_KERNEL_EXEC_HOOK = None
196
+
197
+
198
+ def elwise_kernel_runner(kernel_getter):
199
+ """Take a kernel getter of the same signature as the kernel
200
+ and return a function that invokes that kernel.
201
+
202
+ Assumes that the zeroth entry in *args* is an :class:`Array`.
203
+ """
204
+ from functools import wraps
205
+
206
+ @wraps(kernel_getter)
207
+ def kernel_runner(out, *args, **kwargs):
208
+ assert isinstance(out, Array)
209
+
210
+ wait_for = kwargs.pop("wait_for", None)
211
+ queue = kwargs.pop("queue", None)
212
+ if queue is None:
213
+ queue = out.queue
214
+
215
+ assert queue is not None
216
+
217
+ knl = kernel_getter(out, *args, **kwargs)
218
+ work_group_info = knl.get_work_group_info(
219
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
220
+ queue.device)
221
+ gs, ls = out._get_sizes(queue, work_group_info)
222
+
223
+ args = (out, *args, out.size)
224
+ if ARRAY_KERNEL_EXEC_HOOK is not None:
225
+ return ARRAY_KERNEL_EXEC_HOOK( # pylint: disable=not-callable
226
+ knl, queue, gs, ls, *args, wait_for=wait_for)
227
+ else:
228
+ return knl(queue, gs, ls, *args, wait_for=wait_for)
229
+
230
+ return kernel_runner
231
+
232
+
233
+ class DefaultAllocator(cl.tools.DeferredAllocator):
234
+ def __init__(self, *args, **kwargs):
235
+ warn("pyopencl.array.DefaultAllocator is deprecated. "
236
+ "It will be continue to exist throughout the 2013.x "
237
+ "versions of PyOpenCL.",
238
+ DeprecationWarning, stacklevel=2)
239
+ cl.tools.DeferredAllocator.__init__(self, *args, **kwargs)
240
+
241
+ # }}}
242
+
243
+
244
+ # {{{ array class
245
+
246
+ class InconsistentOpenCLQueueWarning(UserWarning):
247
+ pass
248
+
249
+
250
+ class ArrayHasOffsetError(ValueError):
251
+ """
252
+ .. versionadded:: 2013.1
253
+ """
254
+
255
+ def __init__(self, val="The operation you are attempting does not yet "
256
+ "support arrays that start at an offset from the beginning "
257
+ "of their buffer."):
258
+ ValueError.__init__(self, val)
259
+
260
+
261
+ class _copy_queue: # noqa: N801
262
+ pass
263
+
264
+
265
+ _ARRAY_GET_SIZES_CACHE: Dict[Tuple[int, int, int], Tuple[int, int]] = {}
266
+ _BOOL_DTYPE = np.dtype(np.int8)
267
+ _NOT_PRESENT = object()
268
+
269
+
270
+ class Array:
271
+ """A :class:`numpy.ndarray` work-alike that stores its data and performs
272
+ its computations on the compute device. :attr:`shape` and :attr:`dtype` work
273
+ exactly as in :mod:`numpy`. Arithmetic methods in :class:`Array` support the
274
+ broadcasting of scalars. (e.g. ``array + 5``).
275
+
276
+ *cq* must be a :class:`~pyopencl.CommandQueue` or a :class:`~pyopencl.Context`.
277
+
278
+ If it is a queue, *cq* specifies the queue in which the array carries out
279
+ its computations by default. If a default queue (and thereby overloaded
280
+ operators and many other niceties) are not desired, pass a
281
+ :class:`~pyopencl.Context`.
282
+
283
+ *allocator* may be *None* or a callable that, upon being called with an
284
+ argument of the number of bytes to be allocated, returns a
285
+ :class:`pyopencl.Buffer` object. (A :class:`pyopencl.tools.MemoryPool`
286
+ instance is one useful example of an object to pass here.)
287
+
288
+ .. versionchanged:: 2011.1
289
+
290
+ Renamed *context* to *cqa*, made it general-purpose.
291
+
292
+ All arguments beyond *order* should be considered keyword-only.
293
+
294
+ .. versionchanged:: 2015.2
295
+
296
+ Renamed *context* to *cq*, disallowed passing allocators through it.
297
+
298
+ .. attribute :: data
299
+
300
+ The :class:`pyopencl.MemoryObject` instance created for the memory that
301
+ backs this :class:`Array`.
302
+
303
+ .. versionchanged:: 2013.1
304
+
305
+ If a non-zero :attr:`offset` has been specified for this array,
306
+ this will fail with :exc:`ArrayHasOffsetError`.
307
+
308
+ .. attribute :: base_data
309
+
310
+ The :class:`pyopencl.MemoryObject` instance created for the memory that
311
+ backs this :class:`Array`. Unlike :attr:`data`, the base address of
312
+ *base_data* is allowed to be different from the beginning of the array.
313
+ The actual beginning is the base address of *base_data* plus
314
+ :attr:`offset` bytes.
315
+
316
+ Unlike :attr:`data`, retrieving :attr:`base_data` always succeeds.
317
+
318
+ .. versionadded:: 2013.1
319
+
320
+ .. attribute :: offset
321
+
322
+ See :attr:`base_data`.
323
+
324
+ .. versionadded:: 2013.1
325
+
326
+ .. attribute :: shape
327
+
328
+ A tuple of lengths of each dimension in the array.
329
+
330
+ .. attribute :: ndim
331
+
332
+ The number of dimensions in :attr:`shape`.
333
+
334
+ .. attribute :: dtype
335
+
336
+ The :class:`numpy.dtype` of the items in the GPU array.
337
+
338
+ .. attribute :: size
339
+
340
+ The number of meaningful entries in the array. Can also be computed by
341
+ multiplying up the numbers in :attr:`shape`.
342
+
343
+ .. attribute :: nbytes
344
+
345
+ The size of the entire array in bytes. Computed as :attr:`size` times
346
+ ``dtype.itemsize``.
347
+
348
+ .. attribute :: strides
349
+
350
+ A tuple of bytes to step in each dimension when traversing an array.
351
+
352
+ .. attribute :: flags
353
+
354
+ An object with attributes ``c_contiguous``, ``f_contiguous`` and
355
+ ``forc``, which may be used to query contiguity properties in analogy to
356
+ :attr:`numpy.ndarray.flags`.
357
+
358
+ .. rubric:: Methods
359
+
360
+ .. automethod :: with_queue
361
+
362
+ .. automethod :: __len__
363
+ .. automethod :: reshape
364
+ .. automethod :: ravel
365
+ .. automethod :: view
366
+ .. automethod :: squeeze
367
+ .. automethod :: transpose
368
+ .. attribute :: T
369
+ .. automethod :: set
370
+ .. automethod :: get
371
+ .. automethod :: get_async
372
+ .. automethod :: copy
373
+
374
+ .. automethod :: __str__
375
+ .. automethod :: __repr__
376
+
377
+ .. automethod :: mul_add
378
+ .. automethod :: __add__
379
+ .. automethod :: __sub__
380
+ .. automethod :: __iadd__
381
+ .. automethod :: __isub__
382
+ .. automethod :: __pos__
383
+ .. automethod :: __neg__
384
+ .. automethod :: __mul__
385
+ .. automethod :: __div__
386
+ .. automethod :: __rdiv__
387
+ .. automethod :: __pow__
388
+
389
+ .. automethod :: __and__
390
+ .. automethod :: __xor__
391
+ .. automethod :: __or__
392
+ .. automethod :: __iand__
393
+ .. automethod :: __ixor__
394
+ .. automethod :: __ior__
395
+
396
+ .. automethod :: __abs__
397
+ .. automethod :: __invert__
398
+
399
+ .. UNDOC reverse()
400
+
401
+ .. automethod :: fill
402
+
403
+ .. automethod :: astype
404
+
405
+ .. autoattribute :: real
406
+ .. autoattribute :: imag
407
+ .. automethod :: conj
408
+ .. automethod :: conjugate
409
+
410
+ .. automethod :: __getitem__
411
+ .. automethod :: __setitem__
412
+
413
+ .. automethod :: setitem
414
+
415
+ .. automethod :: map_to_host
416
+
417
+ .. rubric:: Comparisons, conditionals, any, all
418
+
419
+ .. versionadded:: 2013.2
420
+
421
+ Boolean arrays are stored as :class:`numpy.int8` because ``bool``
422
+ has an unspecified size in the OpenCL spec.
423
+
424
+ .. automethod :: __bool__
425
+
426
+ Only works for device scalars. (i.e. "arrays" with ``shape == ()``)
427
+
428
+ .. automethod :: any
429
+ .. automethod :: all
430
+
431
+ .. automethod :: __eq__
432
+ .. automethod :: __ne__
433
+ .. automethod :: __lt__
434
+ .. automethod :: __le__
435
+ .. automethod :: __gt__
436
+ .. automethod :: __ge__
437
+
438
+ .. rubric:: Event management
439
+
440
+ If an array is used from within an out-of-order queue, it needs to take
441
+ care of its own operation ordering. The facilities in this section make
442
+ this possible.
443
+
444
+ .. versionadded:: 2014.1.1
445
+
446
+ .. attribute:: events
447
+
448
+ A list of :class:`pyopencl.Event` instances that the current content of
449
+ this array depends on. User code may read, but should never modify this
450
+ list directly. To update this list, instead use the following methods.
451
+
452
+ .. automethod:: add_event
453
+ .. automethod:: finish
454
+ """
455
+
456
+ __array_priority__ = 100
457
+
458
+ def __init__(
459
+ self,
460
+ cq: Optional[Union[cl.Context, cl.CommandQueue]],
461
+ shape: Union[Tuple[int, ...], int],
462
+ dtype: Any,
463
+ order: str = "C",
464
+ allocator: Optional[cl.tools.AllocatorBase] = None,
465
+ data: Any = None,
466
+ offset: int = 0,
467
+ strides: Optional[Tuple[int, ...]] = None,
468
+ events: Optional[List[cl.Event]] = None,
469
+
470
+ # NOTE: following args are used for the fast constructor
471
+ _flags: Any = None,
472
+ _fast: bool = False,
473
+ _size: Optional[int] = None,
474
+ _context: Optional[cl.Context] = None,
475
+ _queue: Optional[cl.CommandQueue] = None) -> None:
476
+ if _fast:
477
+ # Assumptions, should be disabled if not testing
478
+ if 0:
479
+ assert cq is None
480
+ assert isinstance(_context, cl.Context)
481
+ assert _queue is None or isinstance(_queue, cl.CommandQueue)
482
+ assert isinstance(shape, tuple)
483
+ assert isinstance(strides, tuple)
484
+ assert isinstance(dtype, np.dtype)
485
+ assert _size is not None
486
+
487
+ size = _size
488
+ context = _context
489
+ queue = _queue
490
+ alloc_nbytes = dtype.itemsize * size
491
+
492
+ else:
493
+ # {{{ backward compatibility
494
+
495
+ if cq is None:
496
+ context = _context
497
+ queue = _queue
498
+
499
+ elif isinstance(cq, cl.CommandQueue):
500
+ queue = cq
501
+ context = queue.context
502
+
503
+ elif isinstance(cq, cl.Context):
504
+ context = cq
505
+ queue = None
506
+
507
+ else:
508
+ raise TypeError(
509
+ f"cq may be a queue or a context, not '{type(cq).__name__}'")
510
+
511
+ if allocator is not None:
512
+ # "is" would be wrong because two Python objects are allowed
513
+ # to hold handles to the same context.
514
+
515
+ # FIXME It would be nice to check this. But it would require
516
+ # changing the allocator interface. Trust the user for now.
517
+
518
+ # assert allocator.context == context
519
+ pass
520
+
521
+ # Queue-less arrays do have a purpose in life.
522
+ # They don't do very much, but at least they don't run kernels
523
+ # in random queues.
524
+ #
525
+ # See also :meth:`with_queue`.
526
+
527
+ del cq
528
+
529
+ # }}}
530
+
531
+ # invariant here: allocator, queue set
532
+
533
+ # {{{ determine shape, size, and strides
534
+
535
+ dtype = np.dtype(dtype)
536
+
537
+ try:
538
+ shape = tuple(shape) # type: ignore[arg-type]
539
+ except TypeError as err:
540
+ if not isinstance(shape, (int, np.integer)):
541
+ raise TypeError(
542
+ "shape must either be iterable or castable to an integer: "
543
+ f"got a '{type(shape).__name__}'") from err
544
+
545
+ shape = (shape,)
546
+
547
+ shape_array = np.array(shape)
548
+
549
+ # Previously, the size was computed as
550
+ # "size = 1; size *= dim for dim in shape"
551
+ # However this can fail when using certain data types,
552
+ # eg numpy.uint64(1) * 2 returns 2.0 !
553
+ if np.any(shape_array < 0):
554
+ raise ValueError(f"negative dimensions are not allowed: {shape}")
555
+ if np.any([np.array([s]).dtype.kind not in ["u", "i"] for s in shape]):
556
+ raise ValueError(
557
+ "Invalid shape %s ; dimensions, must be integer" % (str(shape)))
558
+ size = np.prod(shape_array, dtype=np.uint64).item()
559
+
560
+ if strides is None:
561
+ if order in "cC":
562
+ # inlined from compyte.array.c_contiguous_strides
563
+ if shape:
564
+ strides_tmp = [dtype.itemsize]
565
+ for s in shape[:0:-1]:
566
+ # NOTE: https://github.com/inducer/compyte/pull/36
567
+ strides_tmp.append(strides_tmp[-1]*builtins.max(1, s))
568
+ strides = tuple(strides_tmp[::-1])
569
+ else:
570
+ strides = ()
571
+ elif order in "fF":
572
+ strides = _f_contiguous_strides(dtype.itemsize, shape)
573
+ else:
574
+ raise ValueError(f"invalid order: {order}")
575
+
576
+ else:
577
+ # FIXME: We should possibly perform some plausibility
578
+ # checking on 'strides' here.
579
+
580
+ strides = tuple(strides)
581
+
582
+ # }}}
583
+
584
+ assert dtype != object, \
585
+ "object arrays on the compute device are not allowed" # noqa: E721
586
+ assert isinstance(shape, tuple)
587
+ assert isinstance(strides, tuple)
588
+
589
+ alloc_nbytes = dtype.itemsize * size
590
+
591
+ if alloc_nbytes < 0:
592
+ raise ValueError("cannot allocate CL buffer with negative size")
593
+
594
+ self.queue = queue
595
+ self.shape = shape
596
+ self.dtype = dtype
597
+ self.strides = strides
598
+ self.events = [] if events is None else events
599
+ self.nbytes = alloc_nbytes
600
+ self.size = size
601
+ self.allocator = allocator
602
+
603
+ if data is None:
604
+ if alloc_nbytes == 0:
605
+ self.base_data = None
606
+
607
+ else:
608
+ if self.allocator is None:
609
+ if context is None and queue is not None:
610
+ context = queue.context
611
+
612
+ self.base_data = cl.Buffer(
613
+ context, cl.mem_flags.READ_WRITE, alloc_nbytes)
614
+ else:
615
+ self.base_data = self.allocator(alloc_nbytes)
616
+ else:
617
+ self.base_data = data
618
+
619
+ self.offset = offset
620
+ self.context = context
621
+ self._flags = _flags
622
+
623
+ if __debug__:
624
+ if queue is not None and isinstance(
625
+ self.base_data, _SVMPointer_or_nothing):
626
+ mem_queue = getattr(self.base_data, "_queue", _NOT_PRESENT)
627
+ if mem_queue is not _NOT_PRESENT and mem_queue != queue:
628
+ warn("Array has different queue from backing SVM memory. "
629
+ "This may lead to the array getting deallocated sooner "
630
+ "than expected, potentially leading to crashes.",
631
+ InconsistentOpenCLQueueWarning, stacklevel=2)
632
+
633
+ @property
634
+ def ndim(self):
635
+ return len(self.shape)
636
+
637
+ @property
638
+ def data(self):
639
+ if self.offset:
640
+ raise ArrayHasOffsetError()
641
+ else:
642
+ return self.base_data
643
+
644
+ @property
645
+ def flags(self):
646
+ f = self._flags
647
+ if f is None:
648
+ self._flags = f = _ArrayFlags(self)
649
+ return f
650
+
651
+ def _new_with_changes(self, data, offset, shape=None, dtype=None,
652
+ strides=None, queue=_copy_queue, allocator=None):
653
+ """
654
+ :arg data: *None* means allocate a new array.
655
+ """
656
+ fast = True
657
+ size = self.size
658
+ if shape is None:
659
+ shape = self.shape
660
+ else:
661
+ fast = False
662
+ size = None
663
+
664
+ if dtype is None:
665
+ dtype = self.dtype
666
+ if strides is None:
667
+ strides = self.strides
668
+ if queue is _copy_queue:
669
+ queue = self.queue
670
+ if allocator is None:
671
+ allocator = self.allocator
672
+
673
+ # If we're allocating new data, then there's not likely to be
674
+ # a data dependency. Otherwise, the two arrays should probably
675
+ # share the same events list.
676
+
677
+ if data is None:
678
+ events = None
679
+ else:
680
+ events = self.events
681
+
682
+ return self.__class__(None, shape, dtype, allocator=allocator,
683
+ strides=strides, data=data, offset=offset,
684
+ events=events,
685
+ _fast=fast, _context=self.context, _queue=queue, _size=size)
686
+
687
+ def with_queue(self, queue):
688
+ """Return a copy of *self* with the default queue set to *queue*.
689
+
690
+ *None* is allowed as a value for *queue*.
691
+
692
+ .. versionadded:: 2013.1
693
+ """
694
+
695
+ if queue is not None:
696
+ assert queue.context == self.context
697
+
698
+ return self._new_with_changes(self.base_data, self.offset,
699
+ queue=queue)
700
+
701
+ def _get_sizes(self, queue, kernel_specific_max_wg_size=None):
702
+ if not self.flags.forc:
703
+ raise NotImplementedError("cannot operate on non-contiguous array")
704
+ cache_key = (queue.device.int_ptr, self.size, kernel_specific_max_wg_size)
705
+ try:
706
+ return _ARRAY_GET_SIZES_CACHE[cache_key]
707
+ except KeyError:
708
+ sizes = _splay(queue.device, self.size,
709
+ kernel_specific_max_wg_size=kernel_specific_max_wg_size)
710
+ _ARRAY_GET_SIZES_CACHE[cache_key] = sizes
711
+ return sizes
712
+
713
+ def set(self, ary, queue=None, async_=None, **kwargs):
714
+ """Transfer the contents the :class:`numpy.ndarray` object *ary*
715
+ onto the device.
716
+
717
+ *ary* must have the same dtype and size (not necessarily shape) as
718
+ *self*.
719
+
720
+ *async_* is a Boolean indicating whether the function is allowed
721
+ to return before the transfer completes. To avoid synchronization
722
+ bugs, this defaults to *False*.
723
+
724
+ .. versionchanged:: 2017.2.1
725
+
726
+ Python 3.7 makes ``async`` a reserved keyword. On older Pythons,
727
+ we will continue to accept *async* as a parameter, however this
728
+ should be considered deprecated. *async_* is the new, official
729
+ spelling.
730
+ """
731
+
732
+ # {{{ handle 'async' deprecation
733
+
734
+ async_arg = kwargs.pop("async", None)
735
+ if async_arg is not None:
736
+ if async_ is not None:
737
+ raise TypeError("may not specify both 'async' and 'async_'")
738
+ async_ = async_arg
739
+
740
+ if async_ is None:
741
+ async_ = False
742
+
743
+ if kwargs:
744
+ raise TypeError("extra keyword arguments specified: %s"
745
+ % ", ".join(kwargs))
746
+
747
+ # }}}
748
+
749
+ assert ary.size == self.size
750
+ assert ary.dtype == self.dtype
751
+
752
+ if not ary.flags.forc:
753
+ raise RuntimeError("cannot set from non-contiguous array")
754
+
755
+ if not _equal_strides(ary.strides, self.strides, self.shape):
756
+ warn("Setting array from one with different "
757
+ "strides/storage order. This will cease to work "
758
+ "in 2013.x.",
759
+ stacklevel=2)
760
+
761
+ if self.size:
762
+ event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
763
+ dst_offset=self.offset,
764
+ is_blocking=not async_)
765
+
766
+ self.add_event(event1)
767
+
768
+ def _get(self, queue=None, ary=None, async_=None, **kwargs):
769
+ # {{{ handle 'async' deprecation
770
+
771
+ async_arg = kwargs.pop("async", None)
772
+ if async_arg is not None:
773
+ if async_ is not None:
774
+ raise TypeError("may not specify both 'async' and 'async_'")
775
+ async_ = async_arg
776
+
777
+ if async_ is None:
778
+ async_ = False
779
+
780
+ if kwargs:
781
+ raise TypeError("extra keyword arguments specified: %s"
782
+ % ", ".join(kwargs))
783
+
784
+ # }}}
785
+
786
+ if ary is None:
787
+ ary = np.empty(self.shape, self.dtype)
788
+
789
+ if self.strides != ary.strides:
790
+ ary = _as_strided(ary, strides=self.strides)
791
+ else:
792
+ if ary.size != self.size:
793
+ raise TypeError("'ary' has non-matching size")
794
+ if ary.dtype != self.dtype:
795
+ raise TypeError("'ary' has non-matching type")
796
+
797
+ if self.shape != ary.shape:
798
+ warn("get() between arrays of different shape is deprecated "
799
+ "and will be removed in PyCUDA 2017.x",
800
+ DeprecationWarning, stacklevel=2)
801
+
802
+ assert self.flags.forc, "Array in get() must be contiguous"
803
+
804
+ queue = queue or self.queue
805
+ if queue is None:
806
+ raise ValueError("Cannot copy array to host. "
807
+ "Array has no queue. Use "
808
+ "'new_array = array.with_queue(queue)' "
809
+ "to associate one.")
810
+
811
+ if self.size:
812
+ event1 = cl.enqueue_copy(queue, ary, self.base_data,
813
+ src_offset=self.offset,
814
+ wait_for=self.events, is_blocking=not async_)
815
+
816
+ self.add_event(event1)
817
+ else:
818
+ event1 = None
819
+
820
+ return ary, event1
821
+
822
+ def get(self, queue=None, ary=None, async_=None, **kwargs):
823
+ """Transfer the contents of *self* into *ary* or a newly allocated
824
+ :class:`numpy.ndarray`. If *ary* is given, it must have the same
825
+ shape and dtype.
826
+
827
+ .. versionchanged:: 2019.1.2
828
+
829
+ Calling with ``async_=True`` was deprecated and replaced by
830
+ :meth:`get_async`.
831
+ The event returned by :meth:`pyopencl.enqueue_copy` is now stored into
832
+ :attr:`events` to ensure data is not modified before the copy is
833
+ complete.
834
+
835
+ .. versionchanged:: 2015.2
836
+
837
+ *ary* with different shape was deprecated.
838
+
839
+ .. versionchanged:: 2017.2.1
840
+
841
+ Python 3.7 makes ``async`` a reserved keyword. On older Pythons,
842
+ we will continue to accept *async* as a parameter, however this
843
+ should be considered deprecated. *async_* is the new, official
844
+ spelling.
845
+ """
846
+
847
+ if async_:
848
+ warn("calling pyopencl.Array.get with 'async_=True' is deprecated. "
849
+ "Please use pyopencl.Array.get_async for asynchronous "
850
+ "device-to-host transfers",
851
+ DeprecationWarning, stacklevel=2)
852
+
853
+ ary, _event1 = self._get(queue=queue, ary=ary, async_=async_, **kwargs)
854
+
855
+ return ary
856
+
857
+ def get_async(self, queue=None, ary=None, **kwargs):
858
+ """
859
+ Asynchronous version of :meth:`get` which returns a tuple ``(ary, event)``
860
+ containing the host array ``ary``
861
+ and the :class:`pyopencl.NannyEvent` ``event`` returned by
862
+ :meth:`pyopencl.enqueue_copy`.
863
+
864
+ .. versionadded:: 2019.1.2
865
+ """
866
+
867
+ return self._get(queue=queue, ary=ary, async_=True, **kwargs)
868
+
869
+ def copy(self, queue=_copy_queue):
870
+ """
871
+ :arg queue: The :class:`~pyopencl.CommandQueue` for the returned array.
872
+
873
+ .. versionchanged:: 2017.1.2
874
+
875
+ Updates the queue of the returned array.
876
+
877
+ .. versionadded:: 2013.1
878
+ """
879
+
880
+ if queue is _copy_queue:
881
+ queue = self.queue
882
+
883
+ result = self._new_like_me(queue=queue)
884
+
885
+ # result.queue won't be the same as queue if queue is None.
886
+ # We force them to be the same here.
887
+ if result.queue is not queue:
888
+ result = result.with_queue(queue)
889
+
890
+ if not self.flags.forc:
891
+ raise RuntimeError("cannot copy non-contiguous array")
892
+
893
+ if self.nbytes:
894
+ event1 = cl.enqueue_copy(queue or self.queue,
895
+ result.base_data, self.base_data,
896
+ src_offset=self.offset, byte_count=self.nbytes,
897
+ wait_for=self.events)
898
+ result.add_event(event1)
899
+
900
+ return result
901
+
902
+ def __str__(self):
903
+ if self.queue is None:
904
+ return (f"<cl.{type(self).__name__} {self.shape} of {self.dtype} "
905
+ "without queue, call with_queue()>")
906
+
907
+ return str(self.get())
908
+
909
+ def __repr__(self):
910
+ if self.queue is None:
911
+ return (f"<cl.{type(self).__name__} {self.shape} of {self.dtype} "
912
+ f"at {id(self):x} without queue, call with_queue()>")
913
+
914
+ result = repr(self.get())
915
+ if result[:5] == "array":
916
+ result = f"cl.{type(self).__name__}" + result[5:]
917
+ else:
918
+ warn(
919
+ f"{type(result).__name__}.__repr__ was expected to return a "
920
+ f"string starting with 'array', got '{result[:10]!r}'",
921
+ stacklevel=2)
922
+
923
+ return result
924
+
925
+ def safely_stringify_for_pudb(self):
926
+ return f"cl.{type(self).__name__} {self.dtype} {self.shape}"
927
+
928
+ def __hash__(self):
929
+ raise TypeError("pyopencl arrays are not hashable.")
930
+
931
+ # {{{ kernel invocation wrappers
932
+
933
+ @staticmethod
934
+ @elwise_kernel_runner
935
+ def _axpbyz(out, afac, a, bfac, b, queue=None):
936
+ """Compute ``out = selffac * self + otherfac*other``,
937
+ where *other* is an array."""
938
+ a_shape = a.shape
939
+ b_shape = b.shape
940
+ out_shape = out.shape
941
+ assert (a_shape == b_shape == out_shape
942
+ or (a_shape == () and b_shape == out_shape)
943
+ or (b_shape == () and a_shape == out_shape))
944
+ return elementwise.get_axpbyz_kernel(
945
+ out.context, a.dtype, b.dtype, out.dtype,
946
+ x_is_scalar=(a_shape == ()),
947
+ y_is_scalar=(b_shape == ()))
948
+
949
+ @staticmethod
950
+ @elwise_kernel_runner
951
+ def _axpbz(out, a, x, b, queue=None):
952
+ """Compute ``z = a * x + b``, where *b* is a scalar."""
953
+ a = np.array(a)
954
+ b = np.array(b)
955
+ assert out.shape == x.shape
956
+ return elementwise.get_axpbz_kernel(out.context,
957
+ a.dtype, x.dtype, b.dtype, out.dtype)
958
+
959
+ @staticmethod
960
+ @elwise_kernel_runner
961
+ def _elwise_multiply(out, a, b, queue=None):
962
+ a_shape = a.shape
963
+ b_shape = b.shape
964
+ out_shape = out.shape
965
+ assert (a_shape == b_shape == out_shape
966
+ or (a_shape == () and b_shape == out_shape)
967
+ or (b_shape == () and a_shape == out_shape))
968
+ return elementwise.get_multiply_kernel(
969
+ a.context, a.dtype, b.dtype, out.dtype,
970
+ x_is_scalar=(a_shape == ()),
971
+ y_is_scalar=(b_shape == ())
972
+ )
973
+
974
+ @staticmethod
975
+ @elwise_kernel_runner
976
+ def _rdiv_scalar(out, ary, other, queue=None):
977
+ other = np.array(other)
978
+ assert out.shape == ary.shape
979
+ return elementwise.get_rdivide_elwise_kernel(
980
+ out.context, ary.dtype, other.dtype, out.dtype)
981
+
982
+ @staticmethod
983
+ @elwise_kernel_runner
984
+ def _div(out, self, other, queue=None):
985
+ """Divides an array by another array."""
986
+ assert (self.shape == other.shape == out.shape
987
+ or (self.shape == () and other.shape == out.shape)
988
+ or (other.shape == () and self.shape == out.shape))
989
+
990
+ return elementwise.get_divide_kernel(self.context,
991
+ self.dtype, other.dtype, out.dtype,
992
+ x_is_scalar=(self.shape == ()),
993
+ y_is_scalar=(other.shape == ()))
994
+
995
+ @staticmethod
996
+ @elwise_kernel_runner
997
+ def _fill(result, scalar):
998
+ return elementwise.get_fill_kernel(result.context, result.dtype)
999
+
1000
+ @staticmethod
1001
+ @elwise_kernel_runner
1002
+ def _abs(result, arg):
1003
+ if arg.dtype.kind == "c":
1004
+ from pyopencl.elementwise import complex_dtype_to_name
1005
+ fname = "%s_abs" % complex_dtype_to_name(arg.dtype)
1006
+ elif arg.dtype.kind == "f":
1007
+ fname = "fabs"
1008
+ elif arg.dtype.kind in ["u", "i"]:
1009
+ fname = "abs"
1010
+ else:
1011
+ raise TypeError("unsupported dtype in _abs()")
1012
+
1013
+ return elementwise.get_unary_func_kernel(
1014
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1015
+
1016
+ @staticmethod
1017
+ @elwise_kernel_runner
1018
+ def _real(result, arg):
1019
+ from pyopencl.elementwise import complex_dtype_to_name
1020
+ fname = "%s_real" % complex_dtype_to_name(arg.dtype)
1021
+ return elementwise.get_unary_func_kernel(
1022
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1023
+
1024
+ @staticmethod
1025
+ @elwise_kernel_runner
1026
+ def _imag(result, arg):
1027
+ from pyopencl.elementwise import complex_dtype_to_name
1028
+ fname = "%s_imag" % complex_dtype_to_name(arg.dtype)
1029
+ return elementwise.get_unary_func_kernel(
1030
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1031
+
1032
+ @staticmethod
1033
+ @elwise_kernel_runner
1034
+ def _conj(result, arg):
1035
+ from pyopencl.elementwise import complex_dtype_to_name
1036
+ fname = "%s_conj" % complex_dtype_to_name(arg.dtype)
1037
+ return elementwise.get_unary_func_kernel(
1038
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1039
+
1040
+ @staticmethod
1041
+ @elwise_kernel_runner
1042
+ def _pow_scalar(result, ary, exponent):
1043
+ exponent = np.array(exponent)
1044
+ return elementwise.get_pow_kernel(result.context,
1045
+ ary.dtype, exponent.dtype, result.dtype,
1046
+ is_base_array=True, is_exp_array=False)
1047
+
1048
+ @staticmethod
1049
+ @elwise_kernel_runner
1050
+ def _rpow_scalar(result, base, exponent):
1051
+ base = np.array(base)
1052
+ return elementwise.get_pow_kernel(result.context,
1053
+ base.dtype, exponent.dtype, result.dtype,
1054
+ is_base_array=False, is_exp_array=True)
1055
+
1056
+ @staticmethod
1057
+ @elwise_kernel_runner
1058
+ def _pow_array(result, base, exponent):
1059
+ return elementwise.get_pow_kernel(
1060
+ result.context, base.dtype, exponent.dtype, result.dtype,
1061
+ is_base_array=True, is_exp_array=True)
1062
+
1063
+ @staticmethod
1064
+ @elwise_kernel_runner
1065
+ def _reverse(result, ary):
1066
+ return elementwise.get_reverse_kernel(result.context, ary.dtype)
1067
+
1068
+ @staticmethod
1069
+ @elwise_kernel_runner
1070
+ def _copy(dest, src):
1071
+ return elementwise.get_copy_kernel(
1072
+ dest.context, dest.dtype, src.dtype)
1073
+
1074
+ def _new_like_me(self, dtype=None, queue=None):
1075
+ if dtype is None:
1076
+ dtype = self.dtype
1077
+ strides = self.strides
1078
+ flags = self.flags
1079
+ fast = True
1080
+ else:
1081
+ strides = None
1082
+ flags = None
1083
+ if dtype == self.dtype:
1084
+ strides = self.strides
1085
+ flags = self.flags
1086
+ fast = True
1087
+ else:
1088
+ fast = False
1089
+
1090
+ queue = queue or self.queue
1091
+ return self.__class__(None, self.shape, dtype,
1092
+ allocator=self.allocator, strides=strides, _flags=flags,
1093
+ _fast=fast,
1094
+ _size=self.size, _queue=queue, _context=self.context)
1095
+
1096
+ @staticmethod
1097
+ @elwise_kernel_runner
1098
+ def _scalar_binop(out, a, b, queue=None, op=None):
1099
+ return elementwise.get_array_scalar_binop_kernel(
1100
+ out.context, op, out.dtype, a.dtype,
1101
+ np.array(b).dtype)
1102
+
1103
+ @staticmethod
1104
+ @elwise_kernel_runner
1105
+ def _array_binop(out, a, b, queue=None, op=None):
1106
+ a_shape = a.shape
1107
+ b_shape = b.shape
1108
+ out_shape = out.shape
1109
+ assert (a_shape == b_shape == out_shape
1110
+ or (a_shape == () and b_shape == out_shape)
1111
+ or (b_shape == () and a_shape == out_shape))
1112
+ return elementwise.get_array_binop_kernel(
1113
+ out.context, op, out.dtype, a.dtype, b.dtype,
1114
+ a_is_scalar=(a_shape == ()),
1115
+ b_is_scalar=(b_shape == ()))
1116
+
1117
+ @staticmethod
1118
+ @elwise_kernel_runner
1119
+ def _unop(out, a, queue=None, op=None):
1120
+ if out.shape != a.shape:
1121
+ raise ValueError("shapes of arguments do not match")
1122
+ return elementwise.get_unop_kernel(
1123
+ out.context, op, a.dtype, out.dtype)
1124
+
1125
+ # }}}
1126
+
1127
+ # {{{ operators
1128
+
1129
+ def mul_add(self, selffac, other, otherfac, queue=None):
1130
+ """Return ``selffac * self + otherfac * other``.
1131
+ """
1132
+ queue = queue or self.queue
1133
+
1134
+ if isinstance(other, Array):
1135
+ result = _get_broadcasted_binary_op_result(self, other, queue)
1136
+ result.add_event(
1137
+ self._axpbyz(
1138
+ result, selffac, self, otherfac, other,
1139
+ queue=queue))
1140
+ return result
1141
+ elif np.isscalar(other):
1142
+ common_dtype = _get_common_dtype(self, other, queue)
1143
+ result = self._new_like_me(common_dtype, queue=queue)
1144
+ result.add_event(
1145
+ self._axpbz(result, selffac,
1146
+ self, common_dtype.type(otherfac * other),
1147
+ queue=queue))
1148
+ return result
1149
+ else:
1150
+ raise NotImplementedError
1151
+
1152
+ def __add__(self, other):
1153
+ """Add an array with an array or an array with a scalar."""
1154
+
1155
+ if isinstance(other, Array):
1156
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1157
+ result.add_event(
1158
+ self._axpbyz(result,
1159
+ self.dtype.type(1), self,
1160
+ other.dtype.type(1), other))
1161
+
1162
+ return result
1163
+ elif np.isscalar(other):
1164
+ if other == 0:
1165
+ return self.copy()
1166
+ else:
1167
+ common_dtype = _get_common_dtype(self, other, self.queue)
1168
+ result = self._new_like_me(common_dtype)
1169
+ result.add_event(
1170
+ self._axpbz(result, self.dtype.type(1),
1171
+ self, common_dtype.type(other)))
1172
+ return result
1173
+ else:
1174
+ return NotImplemented
1175
+
1176
+ __radd__ = __add__
1177
+
1178
+ def __sub__(self, other):
1179
+ """Subtract an array from an array or a scalar from an array."""
1180
+
1181
+ if isinstance(other, Array):
1182
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1183
+ result.add_event(
1184
+ self._axpbyz(result,
1185
+ self.dtype.type(1), self,
1186
+ result.dtype.type(-1), other))
1187
+
1188
+ return result
1189
+ elif np.isscalar(other):
1190
+ if other == 0:
1191
+ return self.copy()
1192
+ else:
1193
+ result = self._new_like_me(
1194
+ _get_common_dtype(self, other, self.queue))
1195
+ result.add_event(
1196
+ self._axpbz(result, self.dtype.type(1), self, -other))
1197
+ return result
1198
+ else:
1199
+ return NotImplemented
1200
+
1201
+ def __rsub__(self, other):
1202
+ """Subtracts an array by a scalar or an array::
1203
+
1204
+ x = n - self
1205
+ """
1206
+ if np.isscalar(other):
1207
+ common_dtype = _get_common_dtype(self, other, self.queue)
1208
+ result = self._new_like_me(common_dtype)
1209
+ result.add_event(
1210
+ self._axpbz(result, result.dtype.type(-1), self,
1211
+ common_dtype.type(other)))
1212
+
1213
+ return result
1214
+ else:
1215
+ return NotImplemented
1216
+
1217
+ def __iadd__(self, other):
1218
+ if isinstance(other, Array):
1219
+ if other.shape != self.shape and other.shape != ():
1220
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1221
+ f" {self.shape}, {other.shape}.")
1222
+ self.add_event(
1223
+ self._axpbyz(self,
1224
+ self.dtype.type(1), self,
1225
+ other.dtype.type(1), other))
1226
+
1227
+ return self
1228
+ elif np.isscalar(other):
1229
+ self.add_event(
1230
+ self._axpbz(self, self.dtype.type(1), self, other))
1231
+ return self
1232
+ else:
1233
+ return NotImplemented
1234
+
1235
+ def __isub__(self, other):
1236
+ if isinstance(other, Array):
1237
+ if other.shape != self.shape and other.shape != ():
1238
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1239
+ f" {self.shape}, {other.shape}.")
1240
+ self.add_event(
1241
+ self._axpbyz(self, self.dtype.type(1), self,
1242
+ other.dtype.type(-1), other))
1243
+ return self
1244
+ elif np.isscalar(other):
1245
+ self._axpbz(self, self.dtype.type(1), self, -other)
1246
+ return self
1247
+ else:
1248
+ return NotImplemented
1249
+
1250
+ def __pos__(self):
1251
+ return self
1252
+
1253
+ def __neg__(self):
1254
+ result = self._new_like_me()
1255
+ result.add_event(self._axpbz(result, -1, self, 0))
1256
+ return result
1257
+
1258
+ def __mul__(self, other):
1259
+ if isinstance(other, Array):
1260
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1261
+ result.add_event(
1262
+ self._elwise_multiply(result, self, other))
1263
+ return result
1264
+ elif np.isscalar(other):
1265
+ common_dtype = _get_common_dtype(self, other, self.queue)
1266
+ result = self._new_like_me(common_dtype)
1267
+ result.add_event(
1268
+ self._axpbz(result,
1269
+ common_dtype.type(other), self, self.dtype.type(0)))
1270
+ return result
1271
+ else:
1272
+ return NotImplemented
1273
+
1274
+ def __rmul__(self, other):
1275
+ if np.isscalar(other):
1276
+ common_dtype = _get_common_dtype(self, other, self.queue)
1277
+ result = self._new_like_me(common_dtype)
1278
+ result.add_event(
1279
+ self._axpbz(result,
1280
+ common_dtype.type(other), self, self.dtype.type(0)))
1281
+ return result
1282
+ else:
1283
+ return NotImplemented
1284
+
1285
+ def __imul__(self, other):
1286
+ if isinstance(other, Array):
1287
+ if other.shape != self.shape and other.shape != ():
1288
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1289
+ f" {self.shape}, {other.shape}.")
1290
+ self.add_event(
1291
+ self._elwise_multiply(self, self, other))
1292
+ return self
1293
+ elif np.isscalar(other):
1294
+ self.add_event(
1295
+ self._axpbz(self, other, self, self.dtype.type(0)))
1296
+ return self
1297
+ else:
1298
+ return NotImplemented
1299
+
1300
+ def __div__(self, other):
1301
+ """Divides an array by an array or a scalar, i.e. ``self / other``.
1302
+ """
1303
+ if isinstance(other, Array):
1304
+ result = _get_broadcasted_binary_op_result(
1305
+ self, other, self.queue,
1306
+ dtype_getter=_get_truedivide_dtype)
1307
+ result.add_event(self._div(result, self, other))
1308
+
1309
+ return result
1310
+ elif np.isscalar(other):
1311
+ if other == 1:
1312
+ return self.copy()
1313
+ else:
1314
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1315
+ result = self._new_like_me(common_dtype)
1316
+ result.add_event(
1317
+ self._axpbz(result,
1318
+ np.true_divide(common_dtype.type(1), other),
1319
+ self, self.dtype.type(0)))
1320
+ return result
1321
+ else:
1322
+ return NotImplemented
1323
+
1324
+ __truediv__ = __div__
1325
+
1326
+ def __rdiv__(self, other):
1327
+ """Divides an array by a scalar or an array, i.e. ``other / self``.
1328
+ """
1329
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1330
+
1331
+ if isinstance(other, Array):
1332
+ result = self._new_like_me(common_dtype)
1333
+ result.add_event(other._div(result, self))
1334
+ return result
1335
+ elif np.isscalar(other):
1336
+ result = self._new_like_me(common_dtype)
1337
+ result.add_event(
1338
+ self._rdiv_scalar(result, self, common_dtype.type(other)))
1339
+ return result
1340
+ else:
1341
+ return NotImplemented
1342
+
1343
+ __rtruediv__ = __rdiv__
1344
+
1345
+ def __itruediv__(self, other):
1346
+ # raise an error if the result cannot be cast to self
1347
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1348
+ if not np.can_cast(common_dtype, self.dtype.type, "same_kind"):
1349
+ raise TypeError(
1350
+ "Cannot cast {!r} to {!r}".format(self.dtype, common_dtype))
1351
+
1352
+ if isinstance(other, Array):
1353
+ if other.shape != self.shape and other.shape != ():
1354
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1355
+ f" {self.shape}, {other.shape}.")
1356
+ self.add_event(
1357
+ self._div(self, self, other))
1358
+ return self
1359
+ elif np.isscalar(other):
1360
+ if other == 1:
1361
+ return self
1362
+ else:
1363
+ self.add_event(
1364
+ self._axpbz(self, common_dtype.type(np.true_divide(1, other)),
1365
+ self, self.dtype.type(0)))
1366
+ return self
1367
+ else:
1368
+ return NotImplemented
1369
+
1370
+ def __and__(self, other):
1371
+ common_dtype = _get_common_dtype(self, other, self.queue)
1372
+
1373
+ if not np.issubdtype(common_dtype, np.integer):
1374
+ raise TypeError(f"Integral types only: {common_dtype}")
1375
+
1376
+ if isinstance(other, Array):
1377
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1378
+ result.add_event(self._array_binop(result, self, other, op="&"))
1379
+ return result
1380
+ elif np.isscalar(other):
1381
+ result = self._new_like_me(common_dtype)
1382
+ result.add_event(
1383
+ self._scalar_binop(result, self, other, op="&"))
1384
+ return result
1385
+ else:
1386
+ return NotImplemented
1387
+
1388
+ __rand__ = __and__ # commutes
1389
+
1390
+ def __or__(self, other):
1391
+ common_dtype = _get_common_dtype(self, other, self.queue)
1392
+
1393
+ if not np.issubdtype(common_dtype, np.integer):
1394
+ raise TypeError("Integral types only")
1395
+
1396
+ if isinstance(other, Array):
1397
+ result = _get_broadcasted_binary_op_result(self, other,
1398
+ self.queue)
1399
+ result.add_event(self._array_binop(result, self, other, op="|"))
1400
+ return result
1401
+ elif np.isscalar(other):
1402
+ result = self._new_like_me(common_dtype)
1403
+ result.add_event(
1404
+ self._scalar_binop(result, self, other, op="|"))
1405
+ return result
1406
+ else:
1407
+ return NotImplemented
1408
+
1409
+ __ror__ = __or__ # commutes
1410
+
1411
+ def __xor__(self, other):
1412
+ common_dtype = _get_common_dtype(self, other, self.queue)
1413
+
1414
+ if not np.issubdtype(common_dtype, np.integer):
1415
+ raise TypeError(f"Integral types only: {common_dtype}")
1416
+
1417
+ if isinstance(other, Array):
1418
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1419
+ result.add_event(self._array_binop(result, self, other, op="^"))
1420
+ return result
1421
+ elif np.isscalar(other):
1422
+ result = self._new_like_me(common_dtype)
1423
+ result.add_event(
1424
+ self._scalar_binop(result, self, other, op="^"))
1425
+ return result
1426
+ else:
1427
+ return NotImplemented
1428
+
1429
+ __rxor__ = __xor__ # commutes
1430
+
1431
+ def __iand__(self, other):
1432
+ common_dtype = _get_common_dtype(self, other, self.queue)
1433
+
1434
+ if not np.issubdtype(common_dtype, np.integer):
1435
+ raise TypeError(f"Integral types only: {common_dtype}")
1436
+
1437
+ if isinstance(other, Array):
1438
+ if other.shape != self.shape and other.shape != ():
1439
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1440
+ f" {self.shape}, {other.shape}.")
1441
+ self.add_event(self._array_binop(self, self, other, op="&"))
1442
+ return self
1443
+ elif np.isscalar(other):
1444
+ self.add_event(
1445
+ self._scalar_binop(self, self, other, op="&"))
1446
+ return self
1447
+ else:
1448
+ return NotImplemented
1449
+
1450
+ def __ior__(self, other):
1451
+ common_dtype = _get_common_dtype(self, other, self.queue)
1452
+
1453
+ if not np.issubdtype(common_dtype, np.integer):
1454
+ raise TypeError(f"Integral types only: {common_dtype}")
1455
+
1456
+ if isinstance(other, Array):
1457
+ if other.shape != self.shape and other.shape != ():
1458
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1459
+ f" {self.shape}, {other.shape}.")
1460
+ self.add_event(self._array_binop(self, self, other, op="|"))
1461
+ return self
1462
+ elif np.isscalar(other):
1463
+ self.add_event(
1464
+ self._scalar_binop(self, self, other, op="|"))
1465
+ return self
1466
+ else:
1467
+ return NotImplemented
1468
+
1469
+ def __ixor__(self, other):
1470
+ common_dtype = _get_common_dtype(self, other, self.queue)
1471
+
1472
+ if not np.issubdtype(common_dtype, np.integer):
1473
+ raise TypeError(f"Integral types only: {common_dtype}")
1474
+
1475
+ if isinstance(other, Array):
1476
+ if other.shape != self.shape and other.shape != ():
1477
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1478
+ f" {self.shape}, {other.shape}.")
1479
+ self.add_event(self._array_binop(self, self, other, op="^"))
1480
+ return self
1481
+ elif np.isscalar(other):
1482
+ self.add_event(
1483
+ self._scalar_binop(self, self, other, op="^"))
1484
+ return self
1485
+ else:
1486
+ return NotImplemented
1487
+
1488
+ def _zero_fill(self, queue=None, wait_for=None):
1489
+ queue = queue or self.queue
1490
+
1491
+ if not self.size:
1492
+ return
1493
+
1494
+ cl_version_gtr_1_2 = (
1495
+ queue._get_cl_version() >= (1, 2)
1496
+ and cl.get_cl_header_version() >= (1, 2)
1497
+ )
1498
+ on_nvidia = queue.device.vendor.startswith("NVIDIA")
1499
+
1500
+ # circumvent bug with large buffers on NVIDIA
1501
+ # https://github.com/inducer/pyopencl/issues/395
1502
+ if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31):
1503
+ self.add_event(
1504
+ cl.enqueue_fill(queue, self.base_data, np.int8(0),
1505
+ self.nbytes, offset=self.offset, wait_for=wait_for))
1506
+ else:
1507
+ zero = np.zeros((), self.dtype)
1508
+ self.fill(zero, queue=queue)
1509
+
1510
+ def fill(self, value, queue=None, wait_for=None):
1511
+ """Fill the array with *scalar*.
1512
+
1513
+ :returns: *self*.
1514
+ """
1515
+
1516
+ self.add_event(
1517
+ self._fill(self, value, queue=queue, wait_for=wait_for))
1518
+
1519
+ return self
1520
+
1521
+ def __len__(self):
1522
+ """Returns the size of the leading dimension of *self*."""
1523
+ if len(self.shape):
1524
+ return self.shape[0]
1525
+ else:
1526
+ return TypeError("len() of unsized object")
1527
+
1528
+ def __abs__(self):
1529
+ """Return an ``Array`` of the absolute values of the elements
1530
+ of *self*.
1531
+ """
1532
+
1533
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1534
+ result.add_event(self._abs(result, self))
1535
+ return result
1536
+
1537
+ def __pow__(self, other):
1538
+ """Exponentiation by a scalar or elementwise by another
1539
+ :class:`Array`.
1540
+ """
1541
+
1542
+ if isinstance(other, Array):
1543
+ assert self.shape == other.shape
1544
+
1545
+ result = self._new_like_me(
1546
+ _get_common_dtype(self, other, self.queue))
1547
+ result.add_event(
1548
+ self._pow_array(result, self, other))
1549
+ return result
1550
+ elif np.isscalar(other):
1551
+ result = self._new_like_me(
1552
+ _get_common_dtype(self, other, self.queue))
1553
+ result.add_event(self._pow_scalar(result, self, other))
1554
+ return result
1555
+ else:
1556
+ return NotImplemented
1557
+
1558
+ def __rpow__(self, other):
1559
+ if np.isscalar(other):
1560
+ common_dtype = _get_common_dtype(self, other, self.queue)
1561
+ result = self._new_like_me(common_dtype)
1562
+ result.add_event(
1563
+ self._rpow_scalar(result, common_dtype.type(other), self))
1564
+ return result
1565
+ else:
1566
+ return NotImplemented
1567
+
1568
+ def __invert__(self):
1569
+ if not np.issubdtype(self.dtype, np.integer):
1570
+ raise TypeError(f"Integral types only: {self.dtype}")
1571
+
1572
+ result = self._new_like_me()
1573
+ result.add_event(self._unop(result, self, op="~"))
1574
+
1575
+ return result
1576
+
1577
+ # }}}
1578
+
1579
+ def reverse(self, queue=None):
1580
+ """Return this array in reversed order. The array is treated
1581
+ as one-dimensional.
1582
+ """
1583
+
1584
+ result = self._new_like_me()
1585
+ result.add_event(self._reverse(result, self))
1586
+ return result
1587
+
1588
+ def astype(self, dtype, queue=None):
1589
+ """Return a copy of *self*, cast to *dtype*."""
1590
+ if dtype == self.dtype:
1591
+ return self.copy()
1592
+
1593
+ result = self._new_like_me(dtype=dtype)
1594
+ result.add_event(self._copy(result, self, queue=queue))
1595
+ return result
1596
+
1597
+ # {{{ rich comparisons, any, all
1598
+
1599
+ def __bool__(self):
1600
+ if self.shape == ():
1601
+ return bool(self.get())
1602
+ else:
1603
+ raise ValueError("The truth value of an array with "
1604
+ "more than one element is ambiguous. Use a.any() or a.all()")
1605
+
1606
+ def any(self, queue=None, wait_for=None):
1607
+ from pyopencl.reduction import get_any_kernel
1608
+ krnl = get_any_kernel(self.context, self.dtype)
1609
+ if wait_for is None:
1610
+ wait_for = []
1611
+ result, event1 = krnl(self, queue=queue,
1612
+ wait_for=wait_for + self.events, return_event=True)
1613
+ result.add_event(event1)
1614
+ return result
1615
+
1616
+ def all(self, queue=None, wait_for=None):
1617
+ from pyopencl.reduction import get_all_kernel
1618
+ krnl = get_all_kernel(self.context, self.dtype)
1619
+ if wait_for is None:
1620
+ wait_for = []
1621
+ result, event1 = krnl(self, queue=queue,
1622
+ wait_for=wait_for + self.events, return_event=True)
1623
+ result.add_event(event1)
1624
+ return result
1625
+
1626
+ @staticmethod
1627
+ @elwise_kernel_runner
1628
+ def _scalar_comparison(out, a, b, queue=None, op=None):
1629
+ return elementwise.get_array_scalar_comparison_kernel(
1630
+ out.context, op, a.dtype)
1631
+
1632
+ @staticmethod
1633
+ @elwise_kernel_runner
1634
+ def _array_comparison(out, a, b, queue=None, op=None):
1635
+ if a.shape != b.shape:
1636
+ raise ValueError("shapes of comparison arguments do not match")
1637
+ return elementwise.get_array_comparison_kernel(
1638
+ out.context, op, a.dtype, b.dtype)
1639
+
1640
+ def __eq__(self, other):
1641
+ if isinstance(other, Array):
1642
+ result = self._new_like_me(_BOOL_DTYPE)
1643
+ result.add_event(
1644
+ self._array_comparison(result, self, other, op="=="))
1645
+ return result
1646
+ elif np.isscalar(other):
1647
+ result = self._new_like_me(_BOOL_DTYPE)
1648
+ result.add_event(
1649
+ self._scalar_comparison(result, self, other, op="=="))
1650
+ return result
1651
+ else:
1652
+ return NotImplemented
1653
+
1654
+ def __ne__(self, other):
1655
+ if isinstance(other, Array):
1656
+ result = self._new_like_me(_BOOL_DTYPE)
1657
+ result.add_event(
1658
+ self._array_comparison(result, self, other, op="!="))
1659
+ return result
1660
+ elif np.isscalar(other):
1661
+ result = self._new_like_me(_BOOL_DTYPE)
1662
+ result.add_event(
1663
+ self._scalar_comparison(result, self, other, op="!="))
1664
+ return result
1665
+ else:
1666
+ return NotImplemented
1667
+
1668
+ def __le__(self, other):
1669
+ if isinstance(other, Array):
1670
+ result = self._new_like_me(_BOOL_DTYPE)
1671
+ result.add_event(
1672
+ self._array_comparison(result, self, other, op="<="))
1673
+ return result
1674
+ elif np.isscalar(other):
1675
+ result = self._new_like_me(_BOOL_DTYPE)
1676
+ self._scalar_comparison(result, self, other, op="<=")
1677
+ return result
1678
+ else:
1679
+ return NotImplemented
1680
+
1681
+ def __ge__(self, other):
1682
+ if isinstance(other, Array):
1683
+ result = self._new_like_me(_BOOL_DTYPE)
1684
+ result.add_event(
1685
+ self._array_comparison(result, self, other, op=">="))
1686
+ return result
1687
+ elif np.isscalar(other):
1688
+ result = self._new_like_me(_BOOL_DTYPE)
1689
+ result.add_event(
1690
+ self._scalar_comparison(result, self, other, op=">="))
1691
+ return result
1692
+ else:
1693
+ return NotImplemented
1694
+
1695
+ def __lt__(self, other):
1696
+ if isinstance(other, Array):
1697
+ result = self._new_like_me(_BOOL_DTYPE)
1698
+ result.add_event(
1699
+ self._array_comparison(result, self, other, op="<"))
1700
+ return result
1701
+ elif np.isscalar(other):
1702
+ result = self._new_like_me(_BOOL_DTYPE)
1703
+ result.add_event(
1704
+ self._scalar_comparison(result, self, other, op="<"))
1705
+ return result
1706
+ else:
1707
+ return NotImplemented
1708
+
1709
+ def __gt__(self, other):
1710
+ if isinstance(other, Array):
1711
+ result = self._new_like_me(_BOOL_DTYPE)
1712
+ result.add_event(
1713
+ self._array_comparison(result, self, other, op=">"))
1714
+ return result
1715
+ elif np.isscalar(other):
1716
+ result = self._new_like_me(_BOOL_DTYPE)
1717
+ result.add_event(
1718
+ self._scalar_comparison(result, self, other, op=">"))
1719
+ return result
1720
+ else:
1721
+ return NotImplemented
1722
+
1723
+ # }}}
1724
+
1725
+ # {{{ complex-valued business
1726
+
1727
+ @property
1728
+ def real(self):
1729
+ """
1730
+ .. versionadded:: 2012.1
1731
+ """
1732
+ if self.dtype.kind == "c":
1733
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1734
+ result.add_event(
1735
+ self._real(result, self))
1736
+ return result
1737
+ else:
1738
+ return self
1739
+
1740
+ @property
1741
+ def imag(self):
1742
+ """
1743
+ .. versionadded:: 2012.1
1744
+ """
1745
+ if self.dtype.kind == "c":
1746
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1747
+ result.add_event(
1748
+ self._imag(result, self))
1749
+ return result
1750
+ else:
1751
+ return zeros_like(self)
1752
+
1753
+ def conj(self):
1754
+ """
1755
+ .. versionadded:: 2012.1
1756
+ """
1757
+ if self.dtype.kind == "c":
1758
+ result = self._new_like_me()
1759
+ result.add_event(self._conj(result, self))
1760
+ return result
1761
+ else:
1762
+ return self
1763
+
1764
+ conjugate = conj
1765
+
1766
+ # }}}
1767
+
1768
+ # {{{ event management
1769
+
1770
+ def add_event(self, evt):
1771
+ """Add *evt* to :attr:`events`. If :attr:`events` is too long, this method
1772
+ may implicitly wait for a subset of :attr:`events` and clear them from the
1773
+ list.
1774
+ """
1775
+ n_wait = 4
1776
+
1777
+ self.events.append(evt)
1778
+
1779
+ if len(self.events) > 3*n_wait:
1780
+ wait_events = self.events[:n_wait]
1781
+ cl.wait_for_events(wait_events)
1782
+ del self.events[:n_wait]
1783
+
1784
+ def finish(self):
1785
+ """Wait for the entire contents of :attr:`events`, clear it."""
1786
+
1787
+ if self.events:
1788
+ cl.wait_for_events(self.events)
1789
+ del self.events[:]
1790
+
1791
+ # }}}
1792
+
1793
+ # {{{ views
1794
+
1795
+ def reshape(self, *shape, **kwargs):
1796
+ """Returns an array containing the same data with a new shape."""
1797
+
1798
+ order = kwargs.pop("order", "C")
1799
+ if kwargs:
1800
+ raise TypeError("unexpected keyword arguments: %s"
1801
+ % list(kwargs.keys()))
1802
+
1803
+ if order not in "CF":
1804
+ raise ValueError("order must be either 'C' or 'F'")
1805
+
1806
+ # TODO: add more error-checking, perhaps
1807
+
1808
+ # FIXME: The following is overly conservative. As long as we don't change
1809
+ # our memory footprint, we're good.
1810
+
1811
+ # if not self.flags.forc:
1812
+ # raise RuntimeError("only contiguous arrays may "
1813
+ # "be used as arguments to this operation")
1814
+
1815
+ if isinstance(shape[0], tuple) or isinstance(shape[0], list):
1816
+ shape = tuple(shape[0])
1817
+
1818
+ if -1 in shape:
1819
+ shape = list(shape)
1820
+ idx = shape.index(-1)
1821
+ size = -reduce(lambda x, y: x * y, shape, 1)
1822
+ if size == 0:
1823
+ shape[idx] = 0
1824
+ else:
1825
+ shape[idx] = self.size // size
1826
+ if builtins.any(s < 0 for s in shape):
1827
+ raise ValueError("can only specify one unknown dimension")
1828
+ shape = tuple(shape)
1829
+
1830
+ if shape == self.shape:
1831
+ return self._new_with_changes(
1832
+ data=self.base_data, offset=self.offset, shape=shape,
1833
+ strides=self.strides)
1834
+
1835
+ import operator
1836
+ size = reduce(operator.mul, shape, 1)
1837
+ if size != self.size:
1838
+ raise ValueError("total size of new array must be unchanged")
1839
+
1840
+ if self.size == 0:
1841
+ return self._new_with_changes(
1842
+ data=None, offset=0, shape=shape,
1843
+ strides=(
1844
+ _f_contiguous_strides(self.dtype.itemsize, shape)
1845
+ if order == "F" else
1846
+ _c_contiguous_strides(self.dtype.itemsize, shape)
1847
+ ))
1848
+
1849
+ # {{{ determine reshaped strides
1850
+
1851
+ # copied and translated from
1852
+ # https://github.com/numpy/numpy/blob/4083883228d61a3b571dec640185b5a5d983bf59/numpy/core/src/multiarray/shape.c # noqa: E501
1853
+
1854
+ newdims = shape
1855
+ newnd = len(newdims)
1856
+
1857
+ # Remove axes with dimension 1 from the old array. They have no effect
1858
+ # but would need special cases since their strides do not matter.
1859
+
1860
+ olddims = []
1861
+ oldstrides = []
1862
+ for oi in range(len(self.shape)):
1863
+ s = self.shape[oi]
1864
+ if s != 1:
1865
+ olddims.append(s)
1866
+ oldstrides.append(self.strides[oi])
1867
+
1868
+ oldnd = len(olddims)
1869
+
1870
+ newstrides = [-1]*len(newdims)
1871
+
1872
+ # oi to oj and ni to nj give the axis ranges currently worked with
1873
+ oi = 0
1874
+ oj = 1
1875
+ ni = 0
1876
+ nj = 1
1877
+ while ni < newnd and oi < oldnd:
1878
+ np = newdims[ni]
1879
+ op = olddims[oi]
1880
+
1881
+ while np != op:
1882
+ if np < op:
1883
+ # Misses trailing 1s, these are handled later
1884
+ np *= newdims[nj]
1885
+ nj += 1
1886
+ else:
1887
+ op *= olddims[oj]
1888
+ oj += 1
1889
+
1890
+ # Check whether the original axes can be combined
1891
+ for ok in range(oi, oj-1):
1892
+ if order == "F":
1893
+ if oldstrides[ok+1] != olddims[ok]*oldstrides[ok]:
1894
+ raise ValueError("cannot reshape without copy")
1895
+ else:
1896
+ # C order
1897
+ if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]):
1898
+ raise ValueError("cannot reshape without copy")
1899
+
1900
+ # Calculate new strides for all axes currently worked with
1901
+ if order == "F":
1902
+ newstrides[ni] = oldstrides[oi]
1903
+ for nk in range(ni+1, nj):
1904
+ newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1]
1905
+ else:
1906
+ # C order
1907
+ newstrides[nj - 1] = oldstrides[oj - 1]
1908
+ for nk in range(nj-1, ni, -1):
1909
+ newstrides[nk - 1] = newstrides[nk]*newdims[nk]
1910
+
1911
+ ni = nj
1912
+ nj += 1
1913
+
1914
+ oi = oj
1915
+ oj += 1
1916
+
1917
+ # Set strides corresponding to trailing 1s of the new shape.
1918
+ if ni >= 1:
1919
+ last_stride = newstrides[ni - 1]
1920
+ else:
1921
+ last_stride = self.dtype.itemsize
1922
+
1923
+ if order == "F":
1924
+ last_stride *= newdims[ni - 1]
1925
+
1926
+ for nk in range(ni, len(shape)):
1927
+ newstrides[nk] = last_stride
1928
+
1929
+ # }}}
1930
+
1931
+ return self._new_with_changes(
1932
+ data=self.base_data, offset=self.offset, shape=shape,
1933
+ strides=tuple(newstrides))
1934
+
1935
+ def ravel(self, order="C"):
1936
+ """Returns flattened array containing the same data."""
1937
+ return self.reshape(self.size, order=order)
1938
+
1939
+ def view(self, dtype=None):
1940
+ """Returns view of array with the same data. If *dtype* is different
1941
+ from current dtype, the actual bytes of memory will be reinterpreted.
1942
+ """
1943
+
1944
+ if dtype is None:
1945
+ dtype = self.dtype
1946
+
1947
+ old_itemsize = self.dtype.itemsize
1948
+ itemsize = np.dtype(dtype).itemsize
1949
+
1950
+ from pytools import argmin2
1951
+ min_stride_axis = argmin2(
1952
+ (axis, abs(stride))
1953
+ for axis, stride in enumerate(self.strides))
1954
+
1955
+ if self.shape[min_stride_axis] * old_itemsize % itemsize != 0:
1956
+ raise ValueError("new type not compatible with array")
1957
+
1958
+ new_shape = (
1959
+ self.shape[:min_stride_axis]
1960
+ + (self.shape[min_stride_axis] * old_itemsize // itemsize,)
1961
+ + self.shape[min_stride_axis+1:])
1962
+ new_strides = (
1963
+ self.strides[:min_stride_axis]
1964
+ + (self.strides[min_stride_axis] * itemsize // old_itemsize,)
1965
+ + self.strides[min_stride_axis+1:])
1966
+
1967
+ return self._new_with_changes(
1968
+ self.base_data, self.offset,
1969
+ shape=new_shape, dtype=dtype,
1970
+ strides=new_strides)
1971
+
1972
+ def squeeze(self):
1973
+ """Returns a view of the array with dimensions of
1974
+ length 1 removed.
1975
+
1976
+ .. versionadded:: 2015.2
1977
+ """
1978
+ new_shape = tuple(dim for dim in self.shape if dim > 1)
1979
+ new_strides = tuple(
1980
+ self.strides[i] for i, dim in enumerate(self.shape)
1981
+ if dim > 1)
1982
+
1983
+ return self._new_with_changes(
1984
+ self.base_data, self.offset,
1985
+ shape=new_shape, strides=new_strides)
1986
+
1987
+ def transpose(self, axes=None):
1988
+ """Permute the dimensions of an array.
1989
+
1990
+ :arg axes: list of ints, optional.
1991
+ By default, reverse the dimensions, otherwise permute the axes
1992
+ according to the values given.
1993
+
1994
+ :returns: :class:`Array` A view of the array with its axes permuted.
1995
+
1996
+ .. versionadded:: 2015.2
1997
+ """
1998
+
1999
+ if axes is None:
2000
+ axes = range(self.ndim-1, -1, -1)
2001
+
2002
+ if len(axes) != len(self.shape):
2003
+ raise ValueError("axes don't match array")
2004
+
2005
+ new_shape = [self.shape[axes[i]] for i in range(len(axes))]
2006
+ new_strides = [self.strides[axes[i]] for i in range(len(axes))]
2007
+
2008
+ return self._new_with_changes(
2009
+ self.base_data, self.offset,
2010
+ shape=tuple(new_shape),
2011
+ strides=tuple(new_strides))
2012
+
2013
+ @property
2014
+ def T(self): # noqa: N802
2015
+ """
2016
+ .. versionadded:: 2015.2
2017
+ """
2018
+ return self.transpose()
2019
+
2020
+ # }}}
2021
+
2022
+ def map_to_host(self, queue=None, flags=None, is_blocking=True, wait_for=None):
2023
+ """If *is_blocking*, return a :class:`numpy.ndarray` corresponding to the
2024
+ same memory as *self*.
2025
+
2026
+ If *is_blocking* is not true, return a tuple ``(ary, evt)``, where
2027
+ *ary* is the above-mentioned array.
2028
+
2029
+ The host array is obtained using :func:`pyopencl.enqueue_map_buffer`.
2030
+ See there for further details.
2031
+
2032
+ :arg flags: A combination of :class:`pyopencl.map_flags`.
2033
+ Defaults to read-write.
2034
+
2035
+ .. versionadded :: 2013.2
2036
+ """
2037
+
2038
+ if flags is None:
2039
+ flags = cl.map_flags.READ | cl.map_flags.WRITE
2040
+ if wait_for is None:
2041
+ wait_for = []
2042
+
2043
+ ary, evt = cl.enqueue_map_buffer(
2044
+ queue or self.queue, self.base_data, flags, self.offset,
2045
+ self.shape, self.dtype, strides=self.strides,
2046
+ wait_for=wait_for + self.events, is_blocking=is_blocking)
2047
+
2048
+ if is_blocking:
2049
+ return ary
2050
+ else:
2051
+ return ary, evt
2052
+
2053
+ # {{{ getitem/setitem
2054
+
2055
+ def __getitem__(self, index):
2056
+ """
2057
+ .. versionadded:: 2013.1
2058
+ """
2059
+
2060
+ if isinstance(index, Array):
2061
+ if index.dtype.kind not in ("i", "u"):
2062
+ raise TypeError(
2063
+ "fancy indexing is only allowed with integers")
2064
+ if len(index.shape) != 1:
2065
+ raise NotImplementedError(
2066
+ "multidimensional fancy indexing is not supported")
2067
+ if len(self.shape) != 1:
2068
+ raise NotImplementedError(
2069
+ "fancy indexing into a multi-d array is not supported")
2070
+
2071
+ return take(self, index)
2072
+
2073
+ if not isinstance(index, tuple):
2074
+ index = (index,)
2075
+
2076
+ new_shape = []
2077
+ new_offset = self.offset
2078
+ new_strides = []
2079
+
2080
+ seen_ellipsis = False
2081
+
2082
+ index_axis = 0
2083
+ array_axis = 0
2084
+ while index_axis < len(index):
2085
+ index_entry = index[index_axis]
2086
+
2087
+ if array_axis > len(self.shape):
2088
+ raise IndexError("too many axes in index")
2089
+
2090
+ if isinstance(index_entry, slice):
2091
+ start, stop, idx_stride = index_entry.indices(
2092
+ self.shape[array_axis])
2093
+
2094
+ array_stride = self.strides[array_axis]
2095
+
2096
+ new_shape.append((abs(stop-start)-1)//abs(idx_stride)+1)
2097
+ new_strides.append(idx_stride*array_stride)
2098
+ new_offset += array_stride*start
2099
+
2100
+ index_axis += 1
2101
+ array_axis += 1
2102
+
2103
+ elif isinstance(index_entry, (int, np.integer)):
2104
+ array_shape = self.shape[array_axis]
2105
+ if index_entry < 0:
2106
+ index_entry += array_shape
2107
+
2108
+ if not (0 <= index_entry < array_shape):
2109
+ raise IndexError(
2110
+ "subindex in axis %d out of range" % index_axis)
2111
+
2112
+ new_offset += self.strides[array_axis]*index_entry
2113
+
2114
+ index_axis += 1
2115
+ array_axis += 1
2116
+
2117
+ elif index_entry is Ellipsis:
2118
+ index_axis += 1
2119
+
2120
+ remaining_index_count = len(index) - index_axis
2121
+ new_array_axis = len(self.shape) - remaining_index_count
2122
+ if new_array_axis < array_axis:
2123
+ raise IndexError("invalid use of ellipsis in index")
2124
+ while array_axis < new_array_axis:
2125
+ new_shape.append(self.shape[array_axis])
2126
+ new_strides.append(self.strides[array_axis])
2127
+ array_axis += 1
2128
+
2129
+ if seen_ellipsis:
2130
+ raise IndexError(
2131
+ "more than one ellipsis not allowed in index")
2132
+ seen_ellipsis = True
2133
+
2134
+ elif index_entry is np.newaxis:
2135
+ new_shape.append(1)
2136
+ new_strides.append(0)
2137
+ index_axis += 1
2138
+
2139
+ else:
2140
+ raise IndexError("invalid subindex in axis %d" % index_axis)
2141
+
2142
+ while array_axis < len(self.shape):
2143
+ new_shape.append(self.shape[array_axis])
2144
+ new_strides.append(self.strides[array_axis])
2145
+
2146
+ array_axis += 1
2147
+
2148
+ return self._new_with_changes(
2149
+ self.base_data, offset=new_offset,
2150
+ shape=tuple(new_shape),
2151
+ strides=tuple(new_strides))
2152
+
2153
+ def setitem(self, subscript, value, queue=None, wait_for=None):
2154
+ """Like :meth:`__setitem__`, but with the ability to specify
2155
+ a *queue* and *wait_for*.
2156
+
2157
+ .. versionadded:: 2013.1
2158
+
2159
+ .. versionchanged:: 2013.2
2160
+
2161
+ Added *wait_for*.
2162
+ """
2163
+
2164
+ queue = queue or self.queue or value.queue
2165
+ if wait_for is None:
2166
+ wait_for = []
2167
+ wait_for = wait_for + self.events
2168
+
2169
+ if isinstance(subscript, Array):
2170
+ if subscript.dtype.kind not in ("i", "u"):
2171
+ raise TypeError(
2172
+ "fancy indexing is only allowed with integers")
2173
+ if len(subscript.shape) != 1:
2174
+ raise NotImplementedError(
2175
+ "multidimensional fancy indexing is not supported")
2176
+ if len(self.shape) != 1:
2177
+ raise NotImplementedError(
2178
+ "fancy indexing into a multi-d array is not supported")
2179
+
2180
+ multi_put([value], subscript, out=[self], queue=queue,
2181
+ wait_for=wait_for)
2182
+ return
2183
+
2184
+ subarray = self[subscript]
2185
+
2186
+ if not subarray.size:
2187
+ # This prevents errors about mismatched strides that neither we
2188
+ # nor numpy worry about in the empty case.
2189
+ return
2190
+
2191
+ if isinstance(value, np.ndarray):
2192
+ if subarray.shape == value.shape and subarray.strides == value.strides:
2193
+ self.add_event(
2194
+ cl.enqueue_copy(queue, subarray.base_data,
2195
+ value, dst_offset=subarray.offset, wait_for=wait_for))
2196
+ return
2197
+ else:
2198
+ value = to_device(queue, value, self.allocator)
2199
+
2200
+ if isinstance(value, Array):
2201
+ if len(subarray.shape) != len(value.shape):
2202
+ raise NotImplementedError("broadcasting is not "
2203
+ "supported in __setitem__")
2204
+ if subarray.shape != value.shape:
2205
+ raise ValueError("cannot assign between arrays of "
2206
+ "differing shapes")
2207
+ if subarray.strides != value.strides:
2208
+ raise NotImplementedError("cannot assign between arrays of "
2209
+ "differing strides")
2210
+
2211
+ self.add_event(
2212
+ self._copy(subarray, value, queue=queue, wait_for=wait_for))
2213
+
2214
+ else:
2215
+ # Let's assume it's a scalar
2216
+ subarray.fill(value, queue=queue, wait_for=wait_for)
2217
+
2218
+ def __setitem__(self, subscript, value):
2219
+ """Set the slice of *self* identified *subscript* to *value*.
2220
+
2221
+ *value* is allowed to be:
2222
+
2223
+ * A :class:`Array` of the same :attr:`shape` and (for now) :attr:`strides`,
2224
+ but with potentially different :attr:`dtype`.
2225
+ * A :class:`numpy.ndarray` of the same :attr:`shape` and (for now)
2226
+ :attr:`strides`, but with potentially different :attr:`dtype`.
2227
+ * A scalar.
2228
+
2229
+ Non-scalar broadcasting is not currently supported.
2230
+
2231
+ .. versionadded:: 2013.1
2232
+ """
2233
+ self.setitem(subscript, value)
2234
+
2235
+ # }}}
2236
+
2237
+ # }}}
2238
+
2239
+
2240
+ # {{{ creation helpers
2241
+
2242
+ def as_strided(ary, shape=None, strides=None):
2243
+ """Make an :class:`Array` from the given array with the given
2244
+ shape and strides.
2245
+ """
2246
+
2247
+ # undocumented for the moment
2248
+
2249
+ if shape is None:
2250
+ shape = ary.shape
2251
+ if strides is None:
2252
+ strides = ary.strides
2253
+
2254
+ return Array(ary.queue, shape, ary.dtype, allocator=ary.allocator,
2255
+ data=ary.data, strides=strides)
2256
+
2257
+
2258
+ class _same_as_transfer: # noqa: N801
2259
+ pass
2260
+
2261
+
2262
+ def to_device(queue, ary, allocator=None, async_=None,
2263
+ array_queue=_same_as_transfer, **kwargs):
2264
+ """Return a :class:`Array` that is an exact copy of the
2265
+ :class:`numpy.ndarray` instance *ary*.
2266
+
2267
+ :arg array_queue: The :class:`~pyopencl.CommandQueue` which will
2268
+ be stored in the resulting array. Useful
2269
+ to make sure there is no implicit queue associated
2270
+ with the array by passing *None*.
2271
+
2272
+ See :class:`Array` for the meaning of *allocator*.
2273
+
2274
+ .. versionchanged:: 2015.2
2275
+ *array_queue* argument was added.
2276
+
2277
+ .. versionchanged:: 2017.2.1
2278
+
2279
+ Python 3.7 makes ``async`` a reserved keyword. On older Pythons,
2280
+ we will continue to accept *async* as a parameter, however this
2281
+ should be considered deprecated. *async_* is the new, official
2282
+ spelling.
2283
+ """
2284
+
2285
+ # {{{ handle 'async' deprecation
2286
+
2287
+ async_arg = kwargs.pop("async", None)
2288
+ if async_arg is not None:
2289
+ if async_ is not None:
2290
+ raise TypeError("may not specify both 'async' and 'async_'")
2291
+ async_ = async_arg
2292
+
2293
+ if async_ is None:
2294
+ async_ = False
2295
+
2296
+ if kwargs:
2297
+ raise TypeError("extra keyword arguments specified: %s"
2298
+ % ", ".join(kwargs))
2299
+
2300
+ # }}}
2301
+
2302
+ if ary.dtype == object:
2303
+ raise RuntimeError("to_device does not work on object arrays.")
2304
+
2305
+ if array_queue is _same_as_transfer:
2306
+ first_arg = queue
2307
+ else:
2308
+ first_arg = queue.context
2309
+
2310
+ result = Array(first_arg, ary.shape, ary.dtype,
2311
+ allocator=allocator, strides=ary.strides)
2312
+ result.set(ary, async_=async_, queue=queue)
2313
+ return result
2314
+
2315
+
2316
+ empty = Array
2317
+
2318
+
2319
+ def zeros(queue, shape, dtype, order="C", allocator=None):
2320
+ """Same as :func:`empty`, but the :class:`Array` is zero-initialized before
2321
+ being returned.
2322
+
2323
+ .. versionchanged:: 2011.1
2324
+ *context* argument was deprecated.
2325
+ """
2326
+
2327
+ result = Array(None, shape, dtype,
2328
+ order=order, allocator=allocator,
2329
+ _context=queue.context, _queue=queue)
2330
+ result._zero_fill()
2331
+ return result
2332
+
2333
+
2334
+ def empty_like(ary, queue=_copy_queue, allocator=None):
2335
+ """Make a new, uninitialized :class:`Array` having the same properties
2336
+ as *other_ary*.
2337
+ """
2338
+
2339
+ return ary._new_with_changes(data=None, offset=0, queue=queue,
2340
+ allocator=allocator)
2341
+
2342
+
2343
+ def zeros_like(ary):
2344
+ """Make a new, zero-initialized :class:`Array` having the same properties
2345
+ as *other_ary*.
2346
+ """
2347
+
2348
+ result = ary._new_like_me()
2349
+ result._zero_fill()
2350
+ return result
2351
+
2352
+
2353
+ @dataclass
2354
+ class _ArangeInfo:
2355
+ start: Optional[int] = None
2356
+ stop: Optional[int] = None
2357
+ step: Optional[int] = None
2358
+ dtype: Optional["np.dtype"] = None
2359
+ allocator: Optional[Any] = None
2360
+
2361
+
2362
+ @elwise_kernel_runner
2363
+ def _arange_knl(result, start, step):
2364
+ return elementwise.get_arange_kernel(
2365
+ result.context, result.dtype)
2366
+
2367
+
2368
+ def arange(queue, *args, **kwargs):
2369
+ """arange(queue, [start, ] stop [, step], **kwargs)
2370
+ Create a :class:`Array` filled with numbers spaced *step* apart,
2371
+ starting from *start* and ending at *stop*. If not given, *start*
2372
+ defaults to 0, *step* defaults to 1.
2373
+
2374
+ For floating point arguments, the length of the result is
2375
+ ``ceil((stop - start)/step)``. This rule may result in the last
2376
+ element of the result being greater than *stop*.
2377
+
2378
+ *dtype* is a required keyword argument.
2379
+
2380
+ .. versionchanged:: 2011.1
2381
+ *context* argument was deprecated.
2382
+
2383
+ .. versionchanged:: 2011.2
2384
+ *allocator* keyword argument was added.
2385
+ """
2386
+
2387
+ # {{{ argument processing
2388
+
2389
+ # Yuck. Thanks, numpy developers. ;)
2390
+
2391
+ explicit_dtype = False
2392
+ inf = _ArangeInfo()
2393
+
2394
+ if isinstance(args[-1], np.dtype):
2395
+ inf.dtype = args[-1]
2396
+ args = args[:-1]
2397
+ explicit_dtype = True
2398
+
2399
+ argc = len(args)
2400
+ if argc == 0:
2401
+ raise ValueError("stop argument required")
2402
+ elif argc == 1:
2403
+ inf.stop = args[0]
2404
+ elif argc == 2:
2405
+ inf.start = args[0]
2406
+ inf.stop = args[1]
2407
+ elif argc == 3:
2408
+ inf.start = args[0]
2409
+ inf.stop = args[1]
2410
+ inf.step = args[2]
2411
+ else:
2412
+ raise ValueError("too many arguments")
2413
+
2414
+ admissible_names = ["start", "stop", "step", "dtype", "allocator"]
2415
+ for k, v in kwargs.items():
2416
+ if k in admissible_names:
2417
+ if getattr(inf, k) is None:
2418
+ setattr(inf, k, v)
2419
+ if k == "dtype":
2420
+ explicit_dtype = True
2421
+ else:
2422
+ raise ValueError(f"may not specify '{k}' by position and keyword")
2423
+ else:
2424
+ raise ValueError(f"unexpected keyword argument '{k}'")
2425
+
2426
+ if inf.start is None:
2427
+ inf.start = 0
2428
+ if inf.step is None:
2429
+ inf.step = 1
2430
+ if inf.dtype is None:
2431
+ inf.dtype = np.array([inf.start, inf.stop, inf.step]).dtype
2432
+
2433
+ # }}}
2434
+
2435
+ # {{{ actual functionality
2436
+
2437
+ dtype = np.dtype(inf.dtype)
2438
+ start = dtype.type(inf.start)
2439
+ step = dtype.type(inf.step)
2440
+ stop = dtype.type(inf.stop)
2441
+
2442
+ if not explicit_dtype:
2443
+ raise TypeError("arange requires a dtype argument")
2444
+
2445
+ from math import ceil
2446
+ size = ceil((stop-start)/step)
2447
+
2448
+ result = Array(queue, (size,), dtype, allocator=inf.allocator)
2449
+ result.add_event(_arange_knl(result, start, step, queue=queue))
2450
+
2451
+ # }}}
2452
+
2453
+ return result
2454
+
2455
+ # }}}
2456
+
2457
+
2458
+ # {{{ take/put/concatenate/diff/(h?stack)
2459
+
2460
+ @elwise_kernel_runner
2461
+ def _take(result, ary, indices):
2462
+ return elementwise.get_take_kernel(
2463
+ result.context, result.dtype, indices.dtype)
2464
+
2465
+
2466
+ def take(a, indices, out=None, queue=None, wait_for=None):
2467
+ """Return the :class:`Array` ``[a[indices[0]], ..., a[indices[n]]]``.
2468
+ For the moment, *a* must be a type that can be bound to a texture.
2469
+ """
2470
+
2471
+ queue = queue or a.queue
2472
+ if out is None:
2473
+ out = type(a)(queue, indices.shape, a.dtype, allocator=a.allocator)
2474
+
2475
+ assert len(indices.shape) == 1
2476
+ out.add_event(
2477
+ _take(out, a, indices, queue=queue, wait_for=wait_for))
2478
+ return out
2479
+
2480
+
2481
+ def multi_take(arrays, indices, out=None, queue=None):
2482
+ if not len(arrays):
2483
+ return []
2484
+
2485
+ assert len(indices.shape) == 1
2486
+
2487
+ from pytools import single_valued
2488
+ a_dtype = single_valued(a.dtype for a in arrays)
2489
+ a_allocator = arrays[0].dtype
2490
+ context = indices.context
2491
+ queue = queue or indices.queue
2492
+
2493
+ vec_count = len(arrays)
2494
+
2495
+ if out is None:
2496
+ out = [
2497
+ type(arrays[i])(
2498
+ context, queue, indices.shape, a_dtype,
2499
+ allocator=a_allocator)
2500
+ for i in range(vec_count)]
2501
+ else:
2502
+ if len(out) != len(arrays):
2503
+ raise ValueError("out and arrays must have the same length")
2504
+
2505
+ chunk_size = builtins.min(vec_count, 10)
2506
+
2507
+ def make_func_for_chunk_size(chunk_size):
2508
+ knl = elementwise.get_take_kernel(
2509
+ indices.context, a_dtype, indices.dtype,
2510
+ vec_count=chunk_size)
2511
+ knl.set_block_shape(*indices._block)
2512
+ return knl
2513
+
2514
+ knl = make_func_for_chunk_size(chunk_size)
2515
+
2516
+ for start_i in range(0, len(arrays), chunk_size):
2517
+ chunk_slice = slice(start_i, start_i+chunk_size)
2518
+
2519
+ if start_i + chunk_size > vec_count:
2520
+ knl = make_func_for_chunk_size(vec_count-start_i)
2521
+
2522
+ gs, ls = indices._get_sizes(queue,
2523
+ knl.get_work_group_info(
2524
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2525
+ queue.device))
2526
+
2527
+ wait_for_this = (
2528
+ *indices.events,
2529
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2530
+ *[evt for o in out[chunk_slice] for evt in o.events])
2531
+ evt = knl(queue, gs, ls,
2532
+ indices.data,
2533
+ *[o.data for o in out[chunk_slice]],
2534
+ *[i.data for i in arrays[chunk_slice]],
2535
+ *[indices.size],
2536
+ wait_for=wait_for_this)
2537
+ for o in out[chunk_slice]:
2538
+ o.add_event(evt)
2539
+
2540
+ return out
2541
+
2542
+
2543
+ def multi_take_put(arrays, dest_indices, src_indices, dest_shape=None,
2544
+ out=None, queue=None, src_offsets=None):
2545
+ if not len(arrays):
2546
+ return []
2547
+
2548
+ from pytools import single_valued
2549
+ a_dtype = single_valued(a.dtype for a in arrays)
2550
+ a_allocator = arrays[0].allocator
2551
+ context = src_indices.context
2552
+ queue = queue or src_indices.queue
2553
+
2554
+ vec_count = len(arrays)
2555
+
2556
+ if out is None:
2557
+ out = [type(arrays[i])(queue, dest_shape, a_dtype, allocator=a_allocator)
2558
+ for i in range(vec_count)]
2559
+ else:
2560
+ if a_dtype != single_valued(o.dtype for o in out):
2561
+ raise TypeError("arrays and out must have the same dtype")
2562
+ if len(out) != vec_count:
2563
+ raise ValueError("out and arrays must have the same length")
2564
+
2565
+ if src_indices.dtype != dest_indices.dtype:
2566
+ raise TypeError(
2567
+ "src_indices and dest_indices must have the same dtype")
2568
+
2569
+ if len(src_indices.shape) != 1:
2570
+ raise ValueError("src_indices must be 1D")
2571
+
2572
+ if src_indices.shape != dest_indices.shape:
2573
+ raise ValueError(
2574
+ "src_indices and dest_indices must have the same shape")
2575
+
2576
+ if src_offsets is None:
2577
+ src_offsets_list = []
2578
+ else:
2579
+ src_offsets_list = src_offsets
2580
+ if len(src_offsets) != vec_count:
2581
+ raise ValueError(
2582
+ "src_indices and src_offsets must have the same length")
2583
+
2584
+ max_chunk_size = 10
2585
+
2586
+ chunk_size = builtins.min(vec_count, max_chunk_size)
2587
+
2588
+ def make_func_for_chunk_size(chunk_size):
2589
+ return elementwise.get_take_put_kernel(context,
2590
+ a_dtype, src_indices.dtype,
2591
+ with_offsets=src_offsets is not None,
2592
+ vec_count=chunk_size)
2593
+
2594
+ knl = make_func_for_chunk_size(chunk_size)
2595
+
2596
+ for start_i in range(0, len(arrays), chunk_size):
2597
+ chunk_slice = slice(start_i, start_i+chunk_size)
2598
+
2599
+ if start_i + chunk_size > vec_count:
2600
+ knl = make_func_for_chunk_size(vec_count-start_i)
2601
+
2602
+ gs, ls = src_indices._get_sizes(queue,
2603
+ knl.get_work_group_info(
2604
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2605
+ queue.device))
2606
+
2607
+ wait_for_this = (
2608
+ *dest_indices.events,
2609
+ *src_indices.events,
2610
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2611
+ *[evt for o in out[chunk_slice] for evt in o.events])
2612
+ evt = knl(queue, gs, ls,
2613
+ *out[chunk_slice],
2614
+ dest_indices,
2615
+ src_indices,
2616
+ *arrays[chunk_slice],
2617
+ *src_offsets_list[chunk_slice],
2618
+ src_indices.size,
2619
+ wait_for=wait_for_this)
2620
+ for o in out[chunk_slice]:
2621
+ o.add_event(evt)
2622
+
2623
+ return out
2624
+
2625
+
2626
+ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None,
2627
+ wait_for=None):
2628
+ if not len(arrays):
2629
+ return []
2630
+
2631
+ from pytools import single_valued
2632
+ a_dtype = single_valued(a.dtype for a in arrays)
2633
+ a_allocator = arrays[0].allocator
2634
+ context = dest_indices.context
2635
+ queue = queue or dest_indices.queue
2636
+ if wait_for is None:
2637
+ wait_for = []
2638
+ wait_for = wait_for + dest_indices.events
2639
+
2640
+ vec_count = len(arrays)
2641
+
2642
+ if out is None:
2643
+ out = [type(arrays[i])(queue, dest_shape, a_dtype, allocator=a_allocator)
2644
+ for i in range(vec_count)]
2645
+ else:
2646
+ if a_dtype != single_valued(o.dtype for o in out):
2647
+ raise TypeError("arrays and out must have the same dtype")
2648
+ if len(out) != vec_count:
2649
+ raise ValueError("out and arrays must have the same length")
2650
+
2651
+ if len(dest_indices.shape) != 1:
2652
+ raise ValueError("dest_indices must be 1D")
2653
+
2654
+ chunk_size = builtins.min(vec_count, 10)
2655
+
2656
+ # array of bools to specify whether the array of same index in this chunk
2657
+ # will be filled with a single value.
2658
+ use_fill = np.ndarray((chunk_size,), dtype=np.uint8)
2659
+ array_lengths = np.ndarray((chunk_size,), dtype=np.int64)
2660
+
2661
+ def make_func_for_chunk_size(chunk_size):
2662
+ knl = elementwise.get_put_kernel(
2663
+ context, a_dtype, dest_indices.dtype,
2664
+ vec_count=chunk_size)
2665
+ return knl
2666
+
2667
+ knl = make_func_for_chunk_size(chunk_size)
2668
+
2669
+ for start_i in range(0, len(arrays), chunk_size):
2670
+ chunk_slice = slice(start_i, start_i+chunk_size)
2671
+ for fill_idx, ary in enumerate(arrays[chunk_slice]):
2672
+ # If there is only one value in the values array for this src array
2673
+ # in the chunk then fill every index in `dest_idx` array with it.
2674
+ use_fill[fill_idx] = 1 if ary.size == 1 else 0
2675
+ array_lengths[fill_idx] = len(ary)
2676
+ # Copy the populated `use_fill` array to a buffer on the device.
2677
+ use_fill_cla = to_device(queue, use_fill)
2678
+ array_lengths_cla = to_device(queue, array_lengths)
2679
+
2680
+ if start_i + chunk_size > vec_count:
2681
+ knl = make_func_for_chunk_size(vec_count-start_i)
2682
+
2683
+ gs, ls = dest_indices._get_sizes(queue,
2684
+ knl.get_work_group_info(
2685
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2686
+ queue.device))
2687
+
2688
+ wait_for_this = (
2689
+ *wait_for,
2690
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2691
+ *[evt for o in out[chunk_slice] for evt in o.events])
2692
+ evt = knl(queue, gs, ls,
2693
+ *out[chunk_slice],
2694
+ dest_indices,
2695
+ *arrays[chunk_slice],
2696
+ use_fill_cla, array_lengths_cla, dest_indices.size,
2697
+ wait_for=wait_for_this)
2698
+
2699
+ for o in out[chunk_slice]:
2700
+ o.add_event(evt)
2701
+
2702
+ return out
2703
+
2704
+
2705
+ def concatenate(arrays, axis=0, queue=None, allocator=None):
2706
+ """
2707
+ .. versionadded:: 2013.1
2708
+
2709
+ .. note::
2710
+
2711
+ The returned array is of the same type as the first array in the list.
2712
+ """
2713
+ if not arrays:
2714
+ raise ValueError("need at least one array to concatenate")
2715
+
2716
+ # {{{ find properties of result array
2717
+
2718
+ shape = None
2719
+
2720
+ for i_ary, ary in enumerate(arrays):
2721
+ queue = queue or ary.queue
2722
+ allocator = allocator or ary.allocator
2723
+
2724
+ if shape is None:
2725
+ # first array
2726
+ shape = list(ary.shape)
2727
+ else:
2728
+ if len(ary.shape) != len(shape):
2729
+ raise ValueError(
2730
+ f"{i_ary}-th array has different number of axes: "
2731
+ f"expected {len(ary.shape)}, got {len(shape)})")
2732
+
2733
+ ary_shape_list = list(ary.shape)
2734
+ if (ary_shape_list[:axis] != shape[:axis]
2735
+ or ary_shape_list[axis+1:] != shape[axis+1:]):
2736
+ raise ValueError(
2737
+ f"{i_ary}-th array has residual not matching other arrays")
2738
+
2739
+ # pylint: disable=unsupported-assignment-operation
2740
+ shape[axis] += ary.shape[axis]
2741
+
2742
+ # }}}
2743
+
2744
+ shape = tuple(shape)
2745
+ dtype = np.result_type(*[ary.dtype for ary in arrays])
2746
+
2747
+ if __debug__:
2748
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2749
+ for ary in arrays[1:]):
2750
+ warn("Elements of 'arrays' not of the same type, returning "
2751
+ "an instance of the type of arrays[0]",
2752
+ stacklevel=2)
2753
+
2754
+ result = arrays[0].__class__(queue, shape, dtype, allocator=allocator)
2755
+
2756
+ full_slice = (slice(None),) * len(shape)
2757
+
2758
+ base_idx = 0
2759
+ for ary in arrays:
2760
+ my_len = ary.shape[axis]
2761
+ result.setitem(
2762
+ full_slice[:axis]
2763
+ + (slice(base_idx, base_idx+my_len),)
2764
+ + full_slice[axis+1:],
2765
+ ary)
2766
+
2767
+ base_idx += my_len
2768
+
2769
+ return result
2770
+
2771
+
2772
+ @elwise_kernel_runner
2773
+ def _diff(result, array):
2774
+ return elementwise.get_diff_kernel(array.context, array.dtype)
2775
+
2776
+
2777
+ def diff(array, queue=None, allocator=None):
2778
+ """
2779
+ .. versionadded:: 2013.2
2780
+ """
2781
+
2782
+ if len(array.shape) != 1:
2783
+ raise ValueError("multi-D arrays are not supported")
2784
+
2785
+ n, = array.shape
2786
+
2787
+ queue = queue or array.queue
2788
+ allocator = allocator or array.allocator
2789
+
2790
+ result = array.__class__(queue, (n-1,), array.dtype, allocator=allocator)
2791
+ event1 = _diff(result, array, queue=queue)
2792
+ result.add_event(event1)
2793
+ return result
2794
+
2795
+
2796
+ def hstack(arrays, queue=None):
2797
+ if len(arrays) == 0:
2798
+ raise ValueError("need at least one array to hstack")
2799
+
2800
+ if queue is None:
2801
+ for ary in arrays:
2802
+ if ary.queue is not None:
2803
+ queue = ary.queue
2804
+ break
2805
+
2806
+ from pytools import all_equal, single_valued
2807
+ if not all_equal(len(ary.shape) for ary in arrays):
2808
+ raise ValueError("arguments must all have the same number of axes")
2809
+
2810
+ lead_shape = single_valued(ary.shape[:-1] for ary in arrays)
2811
+
2812
+ w = builtins.sum(ary.shape[-1] for ary in arrays)
2813
+
2814
+ if __debug__:
2815
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2816
+ for ary in arrays[1:]):
2817
+ warn("Elements of 'arrays' not of the same type, returning "
2818
+ "an instance of the type of arrays[0]",
2819
+ stacklevel=2)
2820
+
2821
+ result = arrays[0].__class__(queue, (*lead_shape, w), arrays[0].dtype,
2822
+ allocator=arrays[0].allocator)
2823
+ index = 0
2824
+ for ary in arrays:
2825
+ result[..., index:index+ary.shape[-1]] = ary
2826
+ index += ary.shape[-1]
2827
+
2828
+ return result
2829
+
2830
+
2831
+ def stack(arrays, axis=0, queue=None):
2832
+ """
2833
+ Join a sequence of arrays along a new axis.
2834
+
2835
+ :arg arrays: A sequence of :class:`Array`.
2836
+ :arg axis: Index of the dimension of the new axis in the result array.
2837
+ Can be -1, for the new axis to be last dimension.
2838
+
2839
+ :returns: :class:`Array`
2840
+ """
2841
+ if not arrays:
2842
+ raise ValueError("need at least one array to stack")
2843
+
2844
+ input_shape = arrays[0].shape
2845
+ input_ndim = arrays[0].ndim
2846
+ axis = input_ndim if axis == -1 else axis
2847
+
2848
+ if queue is None:
2849
+ for ary in arrays:
2850
+ if ary.queue is not None:
2851
+ queue = ary.queue
2852
+ break
2853
+
2854
+ if not builtins.all(ary.shape == input_shape for ary in arrays[1:]):
2855
+ raise ValueError("arrays must have the same shape")
2856
+
2857
+ if not (0 <= axis <= input_ndim):
2858
+ raise ValueError("invalid axis")
2859
+
2860
+ if (axis == 0 and not builtins.all(
2861
+ ary.flags.c_contiguous for ary in arrays)):
2862
+ # pyopencl.Array.__setitem__ does not support non-contiguous assignments
2863
+ raise NotImplementedError
2864
+
2865
+ if (axis == input_ndim and not builtins.all(
2866
+ ary.flags.f_contiguous for ary in arrays)):
2867
+ # pyopencl.Array.__setitem__ does not support non-contiguous assignments
2868
+ raise NotImplementedError
2869
+
2870
+ result_shape = input_shape[:axis] + (len(arrays),) + input_shape[axis:]
2871
+
2872
+ if __debug__:
2873
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2874
+ for ary in arrays[1:]):
2875
+ warn("Elements of 'arrays' not of the same type, returning "
2876
+ "an instance of the type of arrays[0]",
2877
+ stacklevel=2)
2878
+
2879
+ result = arrays[0].__class__(queue, result_shape,
2880
+ np.result_type(*(ary.dtype
2881
+ for ary in arrays)),
2882
+ # TODO: reconsider once arrays support
2883
+ # non-contiguous assignments
2884
+ order="C" if axis == 0 else "F",
2885
+ allocator=arrays[0].allocator)
2886
+ for i, ary in enumerate(arrays):
2887
+ idx = (slice(None),)*axis + (i,) + (slice(None),)*(input_ndim-axis)
2888
+ result[idx] = ary
2889
+
2890
+ return result
2891
+
2892
+ # }}}
2893
+
2894
+
2895
+ # {{{ shape manipulation
2896
+
2897
+ def transpose(a, axes=None):
2898
+ """Permute the dimensions of an array.
2899
+
2900
+ :arg a: :class:`Array`
2901
+ :arg axes: list of ints, optional.
2902
+ By default, reverse the dimensions, otherwise permute the axes
2903
+ according to the values given.
2904
+
2905
+ :returns: :class:`Array` A view of the array with its axes permuted.
2906
+ """
2907
+ return a.transpose(axes)
2908
+
2909
+
2910
+ def reshape(a, shape):
2911
+ """Gives a new shape to an array without changing its data.
2912
+
2913
+ .. versionadded:: 2015.2
2914
+ """
2915
+
2916
+ return a.reshape(shape)
2917
+
2918
+ # }}}
2919
+
2920
+
2921
+ # {{{ conditionals
2922
+
2923
+ @elwise_kernel_runner
2924
+ def _if_positive(result, criterion, then_, else_):
2925
+ return elementwise.get_if_positive_kernel(
2926
+ result.context, criterion.dtype, then_.dtype,
2927
+ is_then_array=isinstance(then_, Array),
2928
+ is_else_array=isinstance(else_, Array),
2929
+ is_then_scalar=then_.shape == (),
2930
+ is_else_scalar=else_.shape == (),
2931
+ )
2932
+
2933
+
2934
+ def if_positive(criterion, then_, else_, out=None, queue=None):
2935
+ """Return an array like *then_*, which, for the element at index *i*,
2936
+ contains *then_[i]* if *criterion[i]>0*, else *else_[i]*.
2937
+ """
2938
+
2939
+ is_then_scalar = isinstance(then_, SCALAR_CLASSES)
2940
+ is_else_scalar = isinstance(else_, SCALAR_CLASSES)
2941
+ if isinstance(criterion, SCALAR_CLASSES) and is_then_scalar and is_else_scalar:
2942
+ result = np.where(criterion, then_, else_)
2943
+
2944
+ if out is not None:
2945
+ out[...] = result
2946
+ return out
2947
+
2948
+ return result
2949
+
2950
+ if is_then_scalar:
2951
+ then_ = np.array(then_)
2952
+
2953
+ if is_else_scalar:
2954
+ else_ = np.array(else_)
2955
+
2956
+ if then_.dtype != else_.dtype:
2957
+ raise ValueError(
2958
+ f"dtypes do not match: then_ is '{then_.dtype}' and "
2959
+ f"else_ is '{else_.dtype}'")
2960
+
2961
+ if then_.shape == () and else_.shape == ():
2962
+ pass
2963
+ elif then_.shape != () and else_.shape != ():
2964
+ if not (criterion.shape == then_.shape == else_.shape):
2965
+ raise ValueError(
2966
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
2967
+ f", 'then_' has shape {then_.shape} and 'else_' has shape "
2968
+ f"{else_.shape}")
2969
+ elif then_.shape == ():
2970
+ if criterion.shape != else_.shape:
2971
+ raise ValueError(
2972
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
2973
+ f" and 'else_' has shape {else_.shape}")
2974
+ elif else_.shape == ():
2975
+ if criterion.shape != then_.shape:
2976
+ raise ValueError(
2977
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
2978
+ f" and 'then_' has shape {then_.shape}")
2979
+ else:
2980
+ raise AssertionError()
2981
+
2982
+ if out is None:
2983
+ if then_.shape != ():
2984
+ out = empty_like(
2985
+ then_, criterion.queue, allocator=criterion.allocator)
2986
+ else:
2987
+ # Use same strides as criterion
2988
+ cr_byte_strides = np.array(criterion.strides, dtype=np.int64)
2989
+ cr_item_strides = cr_byte_strides // criterion.dtype.itemsize
2990
+ out_strides = tuple(cr_item_strides*then_.dtype.itemsize)
2991
+
2992
+ out = type(criterion)(
2993
+ criterion.queue, criterion.shape, then_.dtype,
2994
+ allocator=criterion.allocator,
2995
+ strides=out_strides)
2996
+
2997
+ event1 = _if_positive(out, criterion, then_, else_, queue=queue)
2998
+ out.add_event(event1)
2999
+
3000
+ return out
3001
+
3002
+ # }}}
3003
+
3004
+
3005
+ # {{{ minimum/maximum
3006
+
3007
+ @elwise_kernel_runner
3008
+ def _minimum_maximum_backend(out, a, b, minmax):
3009
+ from pyopencl.elementwise import get_minmaximum_kernel
3010
+ return get_minmaximum_kernel(out.context, minmax,
3011
+ out.dtype,
3012
+ a.dtype if isinstance(a, Array) else np.dtype(type(a)),
3013
+ b.dtype if isinstance(b, Array) else np.dtype(type(b)),
3014
+ elementwise.get_argument_kind(a),
3015
+ elementwise.get_argument_kind(b))
3016
+
3017
+
3018
+ def maximum(a, b, out=None, queue=None):
3019
+ """Return the elementwise maximum of *a* and *b*."""
3020
+
3021
+ a_is_scalar = np.isscalar(a)
3022
+ b_is_scalar = np.isscalar(b)
3023
+ if a_is_scalar and b_is_scalar:
3024
+ result = np.maximum(a, b)
3025
+ if out is not None:
3026
+ out[...] = result
3027
+ return out
3028
+
3029
+ return result
3030
+
3031
+ queue = queue or a.queue or b.queue
3032
+
3033
+ if out is None:
3034
+ out_dtype = _get_common_dtype(a, b, queue)
3035
+ if not a_is_scalar:
3036
+ out = a._new_like_me(out_dtype, queue)
3037
+ elif not b_is_scalar:
3038
+ out = b._new_like_me(out_dtype, queue)
3039
+
3040
+ out.add_event(_minimum_maximum_backend(out, a, b, queue=queue, minmax="max"))
3041
+
3042
+ return out
3043
+
3044
+
3045
+ def minimum(a, b, out=None, queue=None):
3046
+ """Return the elementwise minimum of *a* and *b*."""
3047
+ a_is_scalar = np.isscalar(a)
3048
+ b_is_scalar = np.isscalar(b)
3049
+ if a_is_scalar and b_is_scalar:
3050
+ result = np.minimum(a, b)
3051
+ if out is not None:
3052
+ out[...] = result
3053
+ return out
3054
+
3055
+ return result
3056
+
3057
+ queue = queue or a.queue or b.queue
3058
+
3059
+ if out is None:
3060
+ out_dtype = _get_common_dtype(a, b, queue)
3061
+ if not a_is_scalar:
3062
+ out = a._new_like_me(out_dtype, queue)
3063
+ elif not b_is_scalar:
3064
+ out = b._new_like_me(out_dtype, queue)
3065
+
3066
+ out.add_event(_minimum_maximum_backend(out, a, b, queue=queue, minmax="min"))
3067
+
3068
+ return out
3069
+
3070
+ # }}}
3071
+
3072
+
3073
+ # {{{ logical ops
3074
+
3075
+ def _logical_op(x1, x2, out, operator, queue=None):
3076
+ # NOTE: Copied from pycuda.gpuarray
3077
+ assert operator in ["&&", "||"]
3078
+
3079
+ if np.isscalar(x1) and np.isscalar(x2):
3080
+ if out is None:
3081
+ out = empty(queue, shape=(), dtype=np.int8)
3082
+
3083
+ if operator == "&&":
3084
+ out[:] = np.logical_and(x1, x2)
3085
+ else:
3086
+ out[:] = np.logical_or(x1, x2)
3087
+ elif np.isscalar(x1) or np.isscalar(x2):
3088
+ scalar_arg, = (x for x in (x1, x2) if np.isscalar(x))
3089
+ ary_arg, = (x for x in (x1, x2) if not np.isscalar(x))
3090
+ queue = queue or ary_arg.queue
3091
+ allocator = ary_arg.allocator
3092
+
3093
+ if not isinstance(ary_arg, Array):
3094
+ raise ValueError("logical_and can take either scalar or Array"
3095
+ " as inputs")
3096
+
3097
+ out = out or ary_arg._new_like_me(dtype=np.int8)
3098
+
3099
+ assert out.shape == ary_arg.shape and out.dtype == np.int8
3100
+
3101
+ knl = elementwise.get_array_scalar_binop_kernel(
3102
+ queue.context,
3103
+ operator,
3104
+ out.dtype,
3105
+ ary_arg.dtype,
3106
+ np.dtype(type(scalar_arg))
3107
+ )
3108
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, ary_arg, scalar_arg)
3109
+ else:
3110
+ if not (isinstance(x1, Array) and isinstance(x2, Array)):
3111
+ raise ValueError("logical_or/logical_and can take either scalar"
3112
+ " or Arrays as inputs")
3113
+ if x1.shape != x2.shape:
3114
+ raise NotImplementedError("Broadcasting not supported")
3115
+
3116
+ queue = queue or x1.queue or x2.queue
3117
+ allocator = x1.allocator or x2.allocator
3118
+
3119
+ if out is None:
3120
+ out = empty(queue, allocator=allocator,
3121
+ shape=x1.shape, dtype=np.int8)
3122
+
3123
+ assert out.shape == x1.shape and out.dtype == np.int8
3124
+
3125
+ knl = elementwise.get_array_binop_kernel(
3126
+ queue.context,
3127
+ operator,
3128
+ out.dtype,
3129
+ x1.dtype, x2.dtype)
3130
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, x1, x2)
3131
+
3132
+ return out
3133
+
3134
+
3135
+ def logical_and(x1, x2, /, out=None, queue=None):
3136
+ """
3137
+ Returns the element-wise logical AND of *x1* and *x2*.
3138
+ """
3139
+ return _logical_op(x1, x2, out, "&&", queue=queue)
3140
+
3141
+
3142
+ def logical_or(x1, x2, /, out=None, queue=None):
3143
+ """
3144
+ Returns the element-wise logical OR of *x1* and *x2*.
3145
+ """
3146
+ return _logical_op(x1, x2, out, "||", queue=queue)
3147
+
3148
+
3149
+ def logical_not(x, /, out=None, queue=None):
3150
+ """
3151
+ Returns the element-wise logical NOT of *x*.
3152
+ """
3153
+ if np.isscalar(x):
3154
+ out = out or empty(queue, shape=(), dtype=np.int8)
3155
+ out[:] = np.logical_not(x)
3156
+ else:
3157
+ queue = queue or x.queue
3158
+ out = out or empty(queue, shape=x.shape, dtype=np.int8,
3159
+ allocator=x.allocator)
3160
+ knl = elementwise.get_logical_not_kernel(queue.context,
3161
+ x.dtype)
3162
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, x)
3163
+
3164
+ return out
3165
+
3166
+ # }}}
3167
+
3168
+
3169
+ # {{{ reductions
3170
+
3171
+ def sum(a, dtype=None, queue=None, slice=None, initial=np._NoValue):
3172
+ """
3173
+ .. versionadded:: 2011.1
3174
+ """
3175
+ if initial is not np._NoValue and not isinstance(initial, SCALAR_CLASSES):
3176
+ raise ValueError("'initial' is not a scalar")
3177
+
3178
+ if dtype is not None:
3179
+ dtype = np.dtype(dtype)
3180
+
3181
+ from pyopencl.reduction import get_sum_kernel
3182
+ krnl = get_sum_kernel(a.context, dtype, a.dtype)
3183
+ result, event1 = krnl(a, queue=queue, slice=slice, wait_for=a.events,
3184
+ return_event=True)
3185
+ result.add_event(event1)
3186
+
3187
+ # NOTE: neutral element in `get_sum_kernel` is 0 by default
3188
+ if initial is not np._NoValue:
3189
+ result += a.dtype.type(initial)
3190
+
3191
+ return result
3192
+
3193
+
3194
+ def any(a, queue=None, wait_for=None):
3195
+ if len(a) == 0:
3196
+ return _BOOL_DTYPE.type(False)
3197
+
3198
+ return a.any(queue=queue, wait_for=wait_for)
3199
+
3200
+
3201
+ def all(a, queue=None, wait_for=None):
3202
+ if len(a) == 0:
3203
+ return _BOOL_DTYPE.type(True)
3204
+
3205
+ return a.all(queue=queue, wait_for=wait_for)
3206
+
3207
+
3208
+ def dot(a, b, dtype=None, queue=None, slice=None):
3209
+ """
3210
+ .. versionadded:: 2011.1
3211
+ """
3212
+ if dtype is not None:
3213
+ dtype = np.dtype(dtype)
3214
+
3215
+ from pyopencl.reduction import get_dot_kernel
3216
+ krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype)
3217
+
3218
+ result, event1 = krnl(a, b, queue=queue, slice=slice,
3219
+ wait_for=a.events + b.events, return_event=True)
3220
+ result.add_event(event1)
3221
+
3222
+ return result
3223
+
3224
+
3225
+ def vdot(a, b, dtype=None, queue=None, slice=None):
3226
+ """Like :func:`numpy.vdot`.
3227
+
3228
+ .. versionadded:: 2013.1
3229
+ """
3230
+ if dtype is not None:
3231
+ dtype = np.dtype(dtype)
3232
+
3233
+ from pyopencl.reduction import get_dot_kernel
3234
+ krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype,
3235
+ conjugate_first=True)
3236
+
3237
+ result, event1 = krnl(a, b, queue=queue, slice=slice,
3238
+ wait_for=a.events + b.events, return_event=True)
3239
+ result.add_event(event1)
3240
+
3241
+ return result
3242
+
3243
+
3244
+ def subset_dot(subset, a, b, dtype=None, queue=None, slice=None):
3245
+ """
3246
+ .. versionadded:: 2011.1
3247
+ """
3248
+ if dtype is not None:
3249
+ dtype = np.dtype(dtype)
3250
+
3251
+ from pyopencl.reduction import get_subset_dot_kernel
3252
+ krnl = get_subset_dot_kernel(
3253
+ a.context, dtype, subset.dtype, a.dtype, b.dtype)
3254
+
3255
+ result, event1 = krnl(subset, a, b, queue=queue, slice=slice,
3256
+ wait_for=subset.events + a.events + b.events, return_event=True)
3257
+ result.add_event(event1)
3258
+
3259
+ return result
3260
+
3261
+
3262
+ def _make_minmax_kernel(what):
3263
+ def f(a, queue=None, initial=np._NoValue):
3264
+ if isinstance(a, SCALAR_CLASSES):
3265
+ return np.array(a).dtype.type(a)
3266
+
3267
+ if len(a) == 0:
3268
+ if initial is np._NoValue:
3269
+ raise ValueError(
3270
+ f"zero-size array to reduction '{what}' "
3271
+ "which has no identity")
3272
+ else:
3273
+ return initial
3274
+
3275
+ if initial is not np._NoValue and not isinstance(initial, SCALAR_CLASSES):
3276
+ raise ValueError("'initial' is not a scalar")
3277
+
3278
+ from pyopencl.reduction import get_minmax_kernel
3279
+ krnl = get_minmax_kernel(a.context, what, a.dtype)
3280
+ result, event1 = krnl(a, queue=queue, wait_for=a.events,
3281
+ return_event=True)
3282
+ result.add_event(event1)
3283
+
3284
+ if initial is not np._NoValue:
3285
+ initial = a.dtype.type(initial)
3286
+ if what == "min":
3287
+ result = minimum(result, initial, queue=queue)
3288
+ elif what == "max":
3289
+ result = maximum(result, initial, queue=queue)
3290
+ else:
3291
+ raise ValueError(f"unknown minmax reduction type: '{what}'")
3292
+
3293
+ return result
3294
+
3295
+ return f
3296
+
3297
+
3298
+ min = _make_minmax_kernel("min")
3299
+ min.__name__ = "min"
3300
+ min.__doc__ = """
3301
+ .. versionadded:: 2011.1
3302
+ """
3303
+
3304
+ max = _make_minmax_kernel("max")
3305
+ max.__name__ = "max"
3306
+ max.__doc__ = """
3307
+ .. versionadded:: 2011.1
3308
+ """
3309
+
3310
+
3311
+ def _make_subset_minmax_kernel(what):
3312
+ def f(subset, a, queue=None, slice=None):
3313
+ from pyopencl.reduction import get_subset_minmax_kernel
3314
+ krnl = get_subset_minmax_kernel(a.context, what, a.dtype, subset.dtype)
3315
+ result, event1 = krnl(subset, a, queue=queue, slice=slice,
3316
+ wait_for=a.events + subset.events, return_event=True)
3317
+ result.add_event(event1)
3318
+ return result
3319
+ return f
3320
+
3321
+
3322
+ subset_min = _make_subset_minmax_kernel("min")
3323
+ subset_min.__doc__ = """.. versionadded:: 2011.1"""
3324
+ subset_max = _make_subset_minmax_kernel("max")
3325
+ subset_max.__doc__ = """.. versionadded:: 2011.1"""
3326
+
3327
+ # }}}
3328
+
3329
+
3330
+ # {{{ scans
3331
+
3332
+ def cumsum(a, output_dtype=None, queue=None,
3333
+ wait_for=None, return_event=False):
3334
+ # undocumented for now
3335
+
3336
+ """
3337
+ .. versionadded:: 2013.1
3338
+ """
3339
+
3340
+ if output_dtype is None:
3341
+ output_dtype = a.dtype
3342
+ else:
3343
+ output_dtype = np.dtype(output_dtype)
3344
+
3345
+ if wait_for is None:
3346
+ wait_for = []
3347
+
3348
+ result = a._new_like_me(output_dtype)
3349
+
3350
+ from pyopencl.scan import get_cumsum_kernel
3351
+ krnl = get_cumsum_kernel(a.context, a.dtype, output_dtype)
3352
+ evt = krnl(a, result, queue=queue, wait_for=wait_for + a.events)
3353
+ result.add_event(evt)
3354
+
3355
+ if return_event:
3356
+ return evt, result
3357
+ else:
3358
+ return result
3359
+
3360
+ # }}}
3361
+
3362
+ # vim: foldmethod=marker