pyopencl 2024.3__cp38-cp38-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (43) hide show
  1. pyopencl/.libs/libOpenCL-1ef0e16e.so.1.0.0 +0 -0
  2. pyopencl/__init__.py +2410 -0
  3. pyopencl/_cl.cpython-38-x86_64-linux-gnu.so +0 -0
  4. pyopencl/_cluda.py +54 -0
  5. pyopencl/_mymako.py +14 -0
  6. pyopencl/algorithm.py +1449 -0
  7. pyopencl/array.py +3437 -0
  8. pyopencl/bitonic_sort.py +242 -0
  9. pyopencl/bitonic_sort_templates.py +594 -0
  10. pyopencl/cache.py +535 -0
  11. pyopencl/capture_call.py +177 -0
  12. pyopencl/characterize/__init__.py +456 -0
  13. pyopencl/characterize/performance.py +237 -0
  14. pyopencl/cl/pyopencl-airy.cl +324 -0
  15. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  16. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  17. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  18. pyopencl/cl/pyopencl-complex.h +303 -0
  19. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  20. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  21. pyopencl/cl/pyopencl-random123/array.h +325 -0
  22. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  23. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  24. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  25. pyopencl/clmath.py +280 -0
  26. pyopencl/clrandom.py +409 -0
  27. pyopencl/cltypes.py +137 -0
  28. pyopencl/compyte/.gitignore +21 -0
  29. pyopencl/compyte/__init__.py +0 -0
  30. pyopencl/compyte/array.py +214 -0
  31. pyopencl/compyte/dtypes.py +290 -0
  32. pyopencl/compyte/pyproject.toml +54 -0
  33. pyopencl/elementwise.py +1171 -0
  34. pyopencl/invoker.py +421 -0
  35. pyopencl/ipython_ext.py +68 -0
  36. pyopencl/reduction.py +786 -0
  37. pyopencl/scan.py +1915 -0
  38. pyopencl/tools.py +1527 -0
  39. pyopencl/version.py +9 -0
  40. pyopencl-2024.3.dist-info/METADATA +108 -0
  41. pyopencl-2024.3.dist-info/RECORD +43 -0
  42. pyopencl-2024.3.dist-info/WHEEL +5 -0
  43. pyopencl-2024.3.dist-info/licenses/LICENSE +104 -0
pyopencl/array.py ADDED
@@ -0,0 +1,3437 @@
1
+ """CL device arrays."""
2
+
3
+ # NOTE: for elwise_kernel_runner which adds keyword arguments
4
+ # pylint:disable=unexpected-keyword-arg
5
+
6
+ __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
7
+
8
+ __license__ = """
9
+ Permission is hereby granted, free of charge, to any person
10
+ obtaining a copy of this software and associated documentation
11
+ files (the "Software"), to deal in the Software without
12
+ restriction, including without limitation the rights to use,
13
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the
15
+ Software is furnished to do so, subject to the following
16
+ conditions:
17
+
18
+ The above copyright notice and this permission notice shall be
19
+ included in all copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28
+ OTHER DEALINGS IN THE SOFTWARE.
29
+ """
30
+
31
+ import builtins
32
+ from dataclasses import dataclass
33
+ from functools import reduce
34
+ from numbers import Number
35
+ from typing import Any, Dict, Hashable, List, Optional, Tuple, Union
36
+ from warnings import warn
37
+
38
+ import numpy as np
39
+
40
+ import pyopencl as cl
41
+ import pyopencl.elementwise as elementwise
42
+ from pyopencl import cltypes
43
+ from pyopencl.characterize import has_double_support
44
+ from pyopencl.compyte.array import (
45
+ ArrayFlags as _ArrayFlags,
46
+ as_strided as _as_strided,
47
+ c_contiguous_strides as _c_contiguous_strides,
48
+ equal_strides as _equal_strides,
49
+ f_contiguous_strides as _f_contiguous_strides,
50
+ )
51
+
52
+
53
+ SCALAR_CLASSES = (Number, np.bool_, bool)
54
+
55
+ if cl.get_cl_header_version() >= (2, 0):
56
+ _SVMPointer_or_nothing = cl.SVMPointer
57
+ else:
58
+ _SVMPointer_or_nothing = ()
59
+
60
+
61
+ _NUMPY_PRE_2 = np.__version__.startswith("1.")
62
+
63
+
64
+ # {{{ _get_common_dtype
65
+
66
+ _COMMON_DTYPE_CACHE: Dict[Tuple[Hashable, ...], np.dtype] = {}
67
+
68
+
69
+ class DoubleDowncastWarning(UserWarning):
70
+ pass
71
+
72
+
73
+ _DOUBLE_DOWNCAST_WARNING = (
74
+ "The operation you requested would result in a double-precisision "
75
+ "quantity according to numpy semantics. Since your device does not "
76
+ "support double precision, a single-precision quantity is being returned.")
77
+
78
+
79
+ def _get_common_dtype(obj1, obj2, queue):
80
+ if queue is None:
81
+ raise ValueError("PyOpenCL array has no queue; call .with_queue() to "
82
+ "add one in order to be able to perform operations")
83
+
84
+ allow_double = has_double_support(queue.device)
85
+ cache_key = None
86
+ o1_is_array = isinstance(obj1, Array)
87
+ o2_is_array = isinstance(obj2, Array)
88
+
89
+ if o1_is_array and o2_is_array:
90
+ o1_dtype = obj1.dtype
91
+ o2_dtype = obj2.dtype
92
+ cache_key = (obj1.dtype, obj2.dtype, allow_double)
93
+ else:
94
+ o1_dtype = getattr(obj1, "dtype", type(obj1))
95
+ o2_dtype = getattr(obj2, "dtype", type(obj2))
96
+
97
+ o1_is_integral = np.issubdtype(o1_dtype, np.integer)
98
+ o2_is_integral = np.issubdtype(o1_dtype, np.integer)
99
+
100
+ o1_key = obj1 if o1_is_integral and not o1_is_array else o1_dtype
101
+ o2_key = obj2 if o2_is_integral and not o2_is_array else o2_dtype
102
+
103
+ cache_key = (o1_key, o2_key, o1_is_array, o2_is_array, allow_double)
104
+
105
+ try:
106
+ return _COMMON_DTYPE_CACHE[cache_key]
107
+ except KeyError:
108
+ pass
109
+
110
+ # Numpy's behavior around integers is a bit bizarre, and definitely value-
111
+ # and not just type-sensitive when it comes to scalars. We'll just do our
112
+ # best to emulate it.
113
+ #
114
+ # Some samples that are true as of numpy 1.23.1.
115
+ #
116
+ # >>> a = np.zeros(1, dtype=np.int16)
117
+ # >>> (a + 123123123312).dtype
118
+ # dtype('int64')
119
+ # >>> (a + 12312).dtype
120
+ # dtype('int16')
121
+ # >>> (a + 12312444).dtype
122
+ # dtype('int32')
123
+ # >>> (a + np.int32(12312444)).dtype
124
+ # dtype('int32')
125
+ # >>> (a + np.int32(1234)).dtype
126
+ # dtype('int16')
127
+ #
128
+ # Note that np.find_common_type, while appealing, won't be able to tell
129
+ # the full story.
130
+
131
+ if (_NUMPY_PRE_2
132
+ and not (o1_is_array and o2_is_array)
133
+ and o1_is_integral and o2_is_integral):
134
+ if o1_is_array:
135
+ obj1 = np.zeros(1, dtype=o1_dtype)
136
+ if o2_is_array:
137
+ obj2 = np.zeros(1, dtype=o2_dtype)
138
+
139
+ result = (obj1 + obj2).dtype
140
+ else:
141
+ array_types = []
142
+ scalars = []
143
+
144
+ if o1_is_array:
145
+ array_types.append(o1_dtype)
146
+ else:
147
+ scalars.append(obj1)
148
+ if o2_is_array:
149
+ array_types.append(o2_dtype)
150
+ else:
151
+ scalars.append(obj2)
152
+
153
+ result = np.result_type(*array_types, *scalars)
154
+
155
+ if not allow_double:
156
+ if result == np.float64:
157
+ result = np.dtype(np.float32)
158
+ warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
159
+ elif result == np.complex128:
160
+ result = np.dtype(np.complex64)
161
+ warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
162
+
163
+ if cache_key is not None:
164
+ _COMMON_DTYPE_CACHE[cache_key] = result
165
+
166
+ return result
167
+
168
+ # }}}
169
+
170
+
171
+ # {{{ _get_truedivide_dtype
172
+
173
+ def _get_truedivide_dtype(obj1, obj2, queue):
174
+ # the dtype of the division result obj1 / obj2
175
+
176
+ allow_double = has_double_support(queue.device)
177
+
178
+ x1 = obj1 if np.isscalar(obj1) else np.ones(1, obj1.dtype)
179
+ x2 = obj2 if np.isscalar(obj2) else np.ones(1, obj2.dtype)
180
+
181
+ result = (x1/x2).dtype
182
+
183
+ if not allow_double:
184
+ if result == np.float64:
185
+ result = np.dtype(np.float32)
186
+ elif result == np.complex128:
187
+ result = np.dtype(np.complex64)
188
+
189
+ return result
190
+
191
+ # }}}
192
+
193
+
194
+ # {{{ _get_broadcasted_binary_op_result
195
+
196
+ def _get_broadcasted_binary_op_result(obj1, obj2, cq,
197
+ dtype_getter=_get_common_dtype):
198
+
199
+ if obj1.shape == obj2.shape:
200
+ return obj1._new_like_me(dtype_getter(obj1, obj2, cq),
201
+ cq)
202
+ elif obj1.shape == ():
203
+ return obj2._new_like_me(dtype_getter(obj1, obj2, cq),
204
+ cq)
205
+ elif obj2.shape == ():
206
+ return obj1._new_like_me(dtype_getter(obj1, obj2, cq),
207
+ cq)
208
+ else:
209
+ raise NotImplementedError("Broadcasting binary operator with shapes:"
210
+ f" {obj1.shape}, {obj2.shape}.")
211
+
212
+ # }}}
213
+
214
+
215
+ # {{{ VecLookupWarner
216
+
217
+ class VecLookupWarner:
218
+ def __getattr__(self, name):
219
+ warn("pyopencl.array.vec is deprecated. "
220
+ "Please use pyopencl.cltypes for OpenCL vector and scalar types",
221
+ DeprecationWarning, stacklevel=2)
222
+
223
+ if name == "types":
224
+ name = "vec_types"
225
+ elif name == "type_to_scalar_and_count":
226
+ name = "vec_type_to_scalar_and_count"
227
+
228
+ return getattr(cltypes, name)
229
+
230
+
231
+ vec = VecLookupWarner()
232
+
233
+ # }}}
234
+
235
+
236
+ # {{{ helper functionality
237
+
238
+ def _splay(device, n, kernel_specific_max_wg_size=None):
239
+ max_work_items = builtins.min(128, device.max_work_group_size)
240
+
241
+ if kernel_specific_max_wg_size is not None:
242
+ max_work_items = builtins.min(max_work_items, kernel_specific_max_wg_size)
243
+
244
+ min_work_items = builtins.min(32, max_work_items)
245
+ max_groups = device.max_compute_units * 4 * 8
246
+ # 4 to overfill the device
247
+ # 8 is an Nvidia constant--that's how many
248
+ # groups fit onto one compute device
249
+
250
+ if n < min_work_items:
251
+ group_count = 1
252
+ work_items_per_group = min_work_items
253
+ elif n < (max_groups * min_work_items):
254
+ group_count = (n + min_work_items - 1) // min_work_items
255
+ work_items_per_group = min_work_items
256
+ elif n < (max_groups * max_work_items):
257
+ group_count = max_groups
258
+ grp = (n + min_work_items - 1) // min_work_items
259
+ work_items_per_group = (
260
+ (grp + max_groups - 1) // max_groups) * min_work_items
261
+ else:
262
+ group_count = max_groups
263
+ work_items_per_group = max_work_items
264
+
265
+ # print("n:%d gc:%d wipg:%d" % (n, group_count, work_items_per_group))
266
+ return (group_count*work_items_per_group,), (work_items_per_group,)
267
+
268
+
269
+ # deliberately undocumented for now
270
+ ARRAY_KERNEL_EXEC_HOOK = None
271
+
272
+
273
+ def elwise_kernel_runner(kernel_getter):
274
+ """Take a kernel getter of the same signature as the kernel
275
+ and return a function that invokes that kernel.
276
+
277
+ Assumes that the zeroth entry in *args* is an :class:`Array`.
278
+ """
279
+ from functools import wraps
280
+
281
+ @wraps(kernel_getter)
282
+ def kernel_runner(out, *args, **kwargs):
283
+ assert isinstance(out, Array)
284
+
285
+ wait_for = kwargs.pop("wait_for", None)
286
+ queue = kwargs.pop("queue", None)
287
+ if queue is None:
288
+ queue = out.queue
289
+
290
+ assert queue is not None
291
+
292
+ knl = kernel_getter(out, *args, **kwargs)
293
+ work_group_info = knl.get_work_group_info(
294
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
295
+ queue.device)
296
+ gs, ls = out._get_sizes(queue, work_group_info)
297
+
298
+ args = (out, *args, out.size)
299
+ if ARRAY_KERNEL_EXEC_HOOK is not None:
300
+ return ARRAY_KERNEL_EXEC_HOOK( # pylint: disable=not-callable
301
+ knl, queue, gs, ls, *args, wait_for=wait_for)
302
+ else:
303
+ return knl(queue, gs, ls, *args, wait_for=wait_for)
304
+
305
+ return kernel_runner
306
+
307
+
308
+ class DefaultAllocator(cl.tools.DeferredAllocator):
309
+ def __init__(self, *args, **kwargs):
310
+ warn("pyopencl.array.DefaultAllocator is deprecated. "
311
+ "It will be continue to exist throughout the 2013.x "
312
+ "versions of PyOpenCL.",
313
+ DeprecationWarning, stacklevel=2)
314
+ cl.tools.DeferredAllocator.__init__(self, *args, **kwargs)
315
+
316
+ # }}}
317
+
318
+
319
+ # {{{ array class
320
+
321
+ class InconsistentOpenCLQueueWarning(UserWarning):
322
+ pass
323
+
324
+
325
+ class ArrayHasOffsetError(ValueError):
326
+ """
327
+ .. versionadded:: 2013.1
328
+ """
329
+
330
+ def __init__(self, val="The operation you are attempting does not yet "
331
+ "support arrays that start at an offset from the beginning "
332
+ "of their buffer."):
333
+ ValueError.__init__(self, val)
334
+
335
+
336
+ class _copy_queue: # noqa: N801
337
+ pass
338
+
339
+
340
+ _ARRAY_GET_SIZES_CACHE: Dict[Tuple[int, int, int], Tuple[int, int]] = {}
341
+ _BOOL_DTYPE = np.dtype(np.int8)
342
+ _NOT_PRESENT = object()
343
+
344
+
345
+ class Array:
346
+ """A :class:`numpy.ndarray` work-alike that stores its data and performs
347
+ its computations on the compute device. :attr:`shape` and :attr:`dtype` work
348
+ exactly as in :mod:`numpy`. Arithmetic methods in :class:`Array` support the
349
+ broadcasting of scalars. (e.g. ``array + 5``).
350
+
351
+ *cq* must be a :class:`~pyopencl.CommandQueue` or a :class:`~pyopencl.Context`.
352
+
353
+ If it is a queue, *cq* specifies the queue in which the array carries out
354
+ its computations by default. If a default queue (and thereby overloaded
355
+ operators and many other niceties) are not desired, pass a
356
+ :class:`~pyopencl.Context`.
357
+
358
+ *allocator* may be *None* or a callable that, upon being called with an
359
+ argument of the number of bytes to be allocated, returns a
360
+ :class:`pyopencl.Buffer` object. (A :class:`pyopencl.tools.MemoryPool`
361
+ instance is one useful example of an object to pass here.)
362
+
363
+ .. versionchanged:: 2011.1
364
+
365
+ Renamed *context* to *cqa*, made it general-purpose.
366
+
367
+ All arguments beyond *order* should be considered keyword-only.
368
+
369
+ .. versionchanged:: 2015.2
370
+
371
+ Renamed *context* to *cq*, disallowed passing allocators through it.
372
+
373
+ .. attribute :: data
374
+
375
+ The :class:`pyopencl.MemoryObject` instance created for the memory that
376
+ backs this :class:`Array`.
377
+
378
+ .. versionchanged:: 2013.1
379
+
380
+ If a non-zero :attr:`offset` has been specified for this array,
381
+ this will fail with :exc:`ArrayHasOffsetError`.
382
+
383
+ .. attribute :: base_data
384
+
385
+ The :class:`pyopencl.MemoryObject` instance created for the memory that
386
+ backs this :class:`Array`. Unlike :attr:`data`, the base address of
387
+ *base_data* is allowed to be different from the beginning of the array.
388
+ The actual beginning is the base address of *base_data* plus
389
+ :attr:`offset` bytes.
390
+
391
+ Unlike :attr:`data`, retrieving :attr:`base_data` always succeeds.
392
+
393
+ .. versionadded:: 2013.1
394
+
395
+ .. attribute :: offset
396
+
397
+ See :attr:`base_data`.
398
+
399
+ .. versionadded:: 2013.1
400
+
401
+ .. attribute :: shape
402
+
403
+ A tuple of lengths of each dimension in the array.
404
+
405
+ .. attribute :: ndim
406
+
407
+ The number of dimensions in :attr:`shape`.
408
+
409
+ .. attribute :: dtype
410
+
411
+ The :class:`numpy.dtype` of the items in the GPU array.
412
+
413
+ .. attribute :: size
414
+
415
+ The number of meaningful entries in the array. Can also be computed by
416
+ multiplying up the numbers in :attr:`shape`.
417
+
418
+ .. attribute :: nbytes
419
+
420
+ The size of the entire array in bytes. Computed as :attr:`size` times
421
+ ``dtype.itemsize``.
422
+
423
+ .. attribute :: strides
424
+
425
+ A tuple of bytes to step in each dimension when traversing an array.
426
+
427
+ .. attribute :: flags
428
+
429
+ An object with attributes ``c_contiguous``, ``f_contiguous`` and
430
+ ``forc``, which may be used to query contiguity properties in analogy to
431
+ :attr:`numpy.ndarray.flags`.
432
+
433
+ .. rubric:: Methods
434
+
435
+ .. automethod :: with_queue
436
+
437
+ .. automethod :: __len__
438
+ .. automethod :: reshape
439
+ .. automethod :: ravel
440
+ .. automethod :: view
441
+ .. automethod :: squeeze
442
+ .. automethod :: transpose
443
+ .. attribute :: T
444
+ .. automethod :: set
445
+ .. automethod :: get
446
+ .. automethod :: get_async
447
+ .. automethod :: copy
448
+
449
+ .. automethod :: __str__
450
+ .. automethod :: __repr__
451
+
452
+ .. automethod :: mul_add
453
+ .. automethod :: __add__
454
+ .. automethod :: __sub__
455
+ .. automethod :: __iadd__
456
+ .. automethod :: __isub__
457
+ .. automethod :: __pos__
458
+ .. automethod :: __neg__
459
+ .. automethod :: __mul__
460
+ .. automethod :: __div__
461
+ .. automethod :: __rdiv__
462
+ .. automethod :: __pow__
463
+
464
+ .. automethod :: __and__
465
+ .. automethod :: __xor__
466
+ .. automethod :: __or__
467
+ .. automethod :: __iand__
468
+ .. automethod :: __ixor__
469
+ .. automethod :: __ior__
470
+
471
+ .. automethod :: __abs__
472
+ .. automethod :: __invert__
473
+
474
+ .. UNDOC reverse()
475
+
476
+ .. automethod :: fill
477
+
478
+ .. automethod :: astype
479
+
480
+ .. autoattribute :: real
481
+ .. autoattribute :: imag
482
+ .. automethod :: conj
483
+ .. automethod :: conjugate
484
+
485
+ .. automethod :: __getitem__
486
+ .. automethod :: __setitem__
487
+
488
+ .. automethod :: setitem
489
+
490
+ .. automethod :: map_to_host
491
+
492
+ .. rubric:: Comparisons, conditionals, any, all
493
+
494
+ .. versionadded:: 2013.2
495
+
496
+ Boolean arrays are stored as :class:`numpy.int8` because ``bool``
497
+ has an unspecified size in the OpenCL spec.
498
+
499
+ .. automethod :: __bool__
500
+
501
+ Only works for device scalars. (i.e. "arrays" with ``shape == ()``)
502
+
503
+ .. automethod :: any
504
+ .. automethod :: all
505
+
506
+ .. automethod :: __eq__
507
+ .. automethod :: __ne__
508
+ .. automethod :: __lt__
509
+ .. automethod :: __le__
510
+ .. automethod :: __gt__
511
+ .. automethod :: __ge__
512
+
513
+ .. rubric:: Event management
514
+
515
+ If an array is used from within an out-of-order queue, it needs to take
516
+ care of its own operation ordering. The facilities in this section make
517
+ this possible.
518
+
519
+ .. versionadded:: 2014.1.1
520
+
521
+ .. attribute:: events
522
+
523
+ A list of :class:`pyopencl.Event` instances that the current content of
524
+ this array depends on. User code may read, but should never modify this
525
+ list directly. To update this list, instead use the following methods.
526
+
527
+ .. automethod:: add_event
528
+ .. automethod:: finish
529
+ """
530
+
531
+ __array_priority__ = 100
532
+
533
+ def __init__(
534
+ self,
535
+ cq: Optional[Union[cl.Context, cl.CommandQueue]],
536
+ shape: Union[Tuple[int, ...], int],
537
+ dtype: Any,
538
+ order: str = "C",
539
+ allocator: Optional[cl.tools.AllocatorBase] = None,
540
+ data: Any = None,
541
+ offset: int = 0,
542
+ strides: Optional[Tuple[int, ...]] = None,
543
+ events: Optional[List[cl.Event]] = None,
544
+
545
+ # NOTE: following args are used for the fast constructor
546
+ _flags: Any = None,
547
+ _fast: bool = False,
548
+ _size: Optional[int] = None,
549
+ _context: Optional[cl.Context] = None,
550
+ _queue: Optional[cl.CommandQueue] = None) -> None:
551
+ if _fast:
552
+ # Assumptions, should be disabled if not testing
553
+ if 0:
554
+ assert cq is None
555
+ assert isinstance(_context, cl.Context)
556
+ assert _queue is None or isinstance(_queue, cl.CommandQueue)
557
+ assert isinstance(shape, tuple)
558
+ assert isinstance(strides, tuple)
559
+ assert isinstance(dtype, np.dtype)
560
+ assert _size is not None
561
+
562
+ size = _size
563
+ context = _context
564
+ queue = _queue
565
+ alloc_nbytes = dtype.itemsize * size
566
+
567
+ else:
568
+ # {{{ backward compatibility
569
+
570
+ if cq is None:
571
+ context = _context
572
+ queue = _queue
573
+
574
+ elif isinstance(cq, cl.CommandQueue):
575
+ queue = cq
576
+ context = queue.context
577
+
578
+ elif isinstance(cq, cl.Context):
579
+ context = cq
580
+ queue = None
581
+
582
+ else:
583
+ raise TypeError(
584
+ f"cq may be a queue or a context, not '{type(cq).__name__}'")
585
+
586
+ if allocator is not None:
587
+ # "is" would be wrong because two Python objects are allowed
588
+ # to hold handles to the same context.
589
+
590
+ # FIXME It would be nice to check this. But it would require
591
+ # changing the allocator interface. Trust the user for now.
592
+
593
+ # assert allocator.context == context
594
+ pass
595
+
596
+ # Queue-less arrays do have a purpose in life.
597
+ # They don't do very much, but at least they don't run kernels
598
+ # in random queues.
599
+ #
600
+ # See also :meth:`with_queue`.
601
+
602
+ del cq
603
+
604
+ # }}}
605
+
606
+ # invariant here: allocator, queue set
607
+
608
+ # {{{ determine shape, size, and strides
609
+
610
+ dtype = np.dtype(dtype)
611
+
612
+ try:
613
+ shape = tuple(shape) # type: ignore[arg-type]
614
+ except TypeError as err:
615
+ if not isinstance(shape, (int, np.integer)):
616
+ raise TypeError(
617
+ "shape must either be iterable or castable to an integer: "
618
+ f"got a '{type(shape).__name__}'") from err
619
+
620
+ shape = (shape,)
621
+
622
+ shape_array = np.array(shape)
623
+
624
+ # Previously, the size was computed as
625
+ # "size = 1; size *= dim for dim in shape"
626
+ # However this can fail when using certain data types,
627
+ # eg numpy.uint64(1) * 2 returns 2.0 !
628
+ if np.any(shape_array < 0):
629
+ raise ValueError(f"negative dimensions are not allowed: {shape}")
630
+ if np.any([np.array([s]).dtype.kind not in ["u", "i"] for s in shape]):
631
+ raise ValueError(
632
+ "Invalid shape %s ; dimensions, must be integer" % (str(shape)))
633
+ size = np.prod(shape_array, dtype=np.uint64).item()
634
+
635
+ if strides is None:
636
+ if order in "cC":
637
+ # inlined from compyte.array.c_contiguous_strides
638
+ if shape:
639
+ strides_tmp = [dtype.itemsize]
640
+ for s in shape[:0:-1]:
641
+ # NOTE: https://github.com/inducer/compyte/pull/36
642
+ strides_tmp.append(strides_tmp[-1]*builtins.max(1, s))
643
+ strides = tuple(strides_tmp[::-1])
644
+ else:
645
+ strides = ()
646
+ elif order in "fF":
647
+ strides = _f_contiguous_strides(dtype.itemsize, shape)
648
+ else:
649
+ raise ValueError(f"invalid order: {order}")
650
+
651
+ else:
652
+ # FIXME: We should possibly perform some plausibility
653
+ # checking on 'strides' here.
654
+
655
+ strides = tuple(strides)
656
+
657
+ # }}}
658
+
659
+ assert dtype != object, \
660
+ "object arrays on the compute device are not allowed" # noqa: E721
661
+ assert isinstance(shape, tuple)
662
+ assert isinstance(strides, tuple)
663
+
664
+ alloc_nbytes = dtype.itemsize * size
665
+
666
+ if alloc_nbytes < 0:
667
+ raise ValueError("cannot allocate CL buffer with negative size")
668
+
669
+ self.queue = queue
670
+ self.shape = shape
671
+ self.dtype = dtype
672
+ self.strides = strides
673
+ self.events = [] if events is None else events
674
+ self.nbytes = alloc_nbytes
675
+ self.size = size
676
+ self.allocator = allocator
677
+
678
+ if data is None:
679
+ if alloc_nbytes == 0:
680
+ self.base_data = None
681
+
682
+ else:
683
+ if self.allocator is None:
684
+ if context is None and queue is not None:
685
+ context = queue.context
686
+
687
+ self.base_data = cl.Buffer(
688
+ context, cl.mem_flags.READ_WRITE, alloc_nbytes)
689
+ else:
690
+ self.base_data = self.allocator(alloc_nbytes)
691
+ else:
692
+ self.base_data = data
693
+
694
+ self.offset = offset
695
+ self.context = context
696
+ self._flags = _flags
697
+
698
+ if __debug__:
699
+ if queue is not None and isinstance(
700
+ self.base_data, _SVMPointer_or_nothing):
701
+ mem_queue = getattr(self.base_data, "_queue", _NOT_PRESENT)
702
+ if mem_queue is not _NOT_PRESENT and mem_queue != queue:
703
+ warn("Array has different queue from backing SVM memory. "
704
+ "This may lead to the array getting deallocated sooner "
705
+ "than expected, potentially leading to crashes.",
706
+ InconsistentOpenCLQueueWarning, stacklevel=2)
707
+
708
+ @property
709
+ def ndim(self):
710
+ return len(self.shape)
711
+
712
+ @property
713
+ def data(self):
714
+ if self.offset:
715
+ raise ArrayHasOffsetError()
716
+ else:
717
+ return self.base_data
718
+
719
+ @property
720
+ def flags(self):
721
+ f = self._flags
722
+ if f is None:
723
+ self._flags = f = _ArrayFlags(self)
724
+ return f
725
+
726
+ def _new_with_changes(self, data, offset, shape=None, dtype=None,
727
+ strides=None, queue=_copy_queue, allocator=None):
728
+ """
729
+ :arg data: *None* means allocate a new array.
730
+ """
731
+ fast = True
732
+ size = self.size
733
+ if shape is None:
734
+ shape = self.shape
735
+ else:
736
+ fast = False
737
+ size = None
738
+
739
+ if dtype is None:
740
+ dtype = self.dtype
741
+ if strides is None:
742
+ strides = self.strides
743
+ if queue is _copy_queue:
744
+ queue = self.queue
745
+ if allocator is None:
746
+ allocator = self.allocator
747
+
748
+ # If we're allocating new data, then there's not likely to be
749
+ # a data dependency. Otherwise, the two arrays should probably
750
+ # share the same events list.
751
+
752
+ if data is None:
753
+ events = None
754
+ else:
755
+ events = self.events
756
+
757
+ return self.__class__(None, shape, dtype, allocator=allocator,
758
+ strides=strides, data=data, offset=offset,
759
+ events=events,
760
+ _fast=fast, _context=self.context, _queue=queue, _size=size)
761
+
762
+ def with_queue(self, queue):
763
+ """Return a copy of *self* with the default queue set to *queue*.
764
+
765
+ *None* is allowed as a value for *queue*.
766
+
767
+ .. versionadded:: 2013.1
768
+ """
769
+
770
+ if queue is not None:
771
+ assert queue.context == self.context
772
+
773
+ return self._new_with_changes(self.base_data, self.offset,
774
+ queue=queue)
775
+
776
+ def _get_sizes(self, queue, kernel_specific_max_wg_size=None):
777
+ if not self.flags.forc:
778
+ raise NotImplementedError("cannot operate on non-contiguous array")
779
+ cache_key = (queue.device.int_ptr, self.size, kernel_specific_max_wg_size)
780
+ try:
781
+ return _ARRAY_GET_SIZES_CACHE[cache_key]
782
+ except KeyError:
783
+ sizes = _splay(queue.device, self.size,
784
+ kernel_specific_max_wg_size=kernel_specific_max_wg_size)
785
+ _ARRAY_GET_SIZES_CACHE[cache_key] = sizes
786
+ return sizes
787
+
788
+ def set(self, ary, queue=None, async_=None, **kwargs):
789
+ """Transfer the contents the :class:`numpy.ndarray` object *ary*
790
+ onto the device.
791
+
792
+ *ary* must have the same dtype and size (not necessarily shape) as
793
+ *self*.
794
+
795
+ *async_* is a Boolean indicating whether the function is allowed
796
+ to return before the transfer completes. To avoid synchronization
797
+ bugs, this defaults to *False*.
798
+
799
+ .. versionchanged:: 2017.2.1
800
+
801
+ Python 3.7 makes ``async`` a reserved keyword. On older Pythons,
802
+ we will continue to accept *async* as a parameter, however this
803
+ should be considered deprecated. *async_* is the new, official
804
+ spelling.
805
+ """
806
+
807
+ # {{{ handle 'async' deprecation
808
+
809
+ async_arg = kwargs.pop("async", None)
810
+ if async_arg is not None:
811
+ if async_ is not None:
812
+ raise TypeError("may not specify both 'async' and 'async_'")
813
+ async_ = async_arg
814
+
815
+ if async_ is None:
816
+ async_ = False
817
+
818
+ if kwargs:
819
+ raise TypeError("extra keyword arguments specified: %s"
820
+ % ", ".join(kwargs))
821
+
822
+ # }}}
823
+
824
+ assert ary.size == self.size
825
+ assert ary.dtype == self.dtype
826
+
827
+ if not ary.flags.forc:
828
+ raise RuntimeError("cannot set from non-contiguous array")
829
+
830
+ if not _equal_strides(ary.strides, self.strides, self.shape):
831
+ warn("Setting array from one with different "
832
+ "strides/storage order. This will cease to work "
833
+ "in 2013.x.",
834
+ stacklevel=2)
835
+
836
+ if self.size:
837
+ event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
838
+ dst_offset=self.offset,
839
+ is_blocking=not async_)
840
+
841
+ self.add_event(event1)
842
+
843
+ def _get(self, queue=None, ary=None, async_=None, **kwargs):
844
+ # {{{ handle 'async' deprecation
845
+
846
+ async_arg = kwargs.pop("async", None)
847
+ if async_arg is not None:
848
+ if async_ is not None:
849
+ raise TypeError("may not specify both 'async' and 'async_'")
850
+ async_ = async_arg
851
+
852
+ if async_ is None:
853
+ async_ = False
854
+
855
+ if kwargs:
856
+ raise TypeError("extra keyword arguments specified: %s"
857
+ % ", ".join(kwargs))
858
+
859
+ # }}}
860
+
861
+ if ary is None:
862
+ ary = np.empty(self.shape, self.dtype)
863
+
864
+ if self.strides != ary.strides:
865
+ ary = _as_strided(ary, strides=self.strides)
866
+ else:
867
+ if ary.size != self.size:
868
+ raise TypeError("'ary' has non-matching size")
869
+ if ary.dtype != self.dtype:
870
+ raise TypeError("'ary' has non-matching type")
871
+
872
+ if self.shape != ary.shape:
873
+ warn("get() between arrays of different shape is deprecated "
874
+ "and will be removed in PyCUDA 2017.x",
875
+ DeprecationWarning, stacklevel=2)
876
+
877
+ assert self.flags.forc, "Array in get() must be contiguous"
878
+
879
+ queue = queue or self.queue
880
+ if queue is None:
881
+ raise ValueError("Cannot copy array to host. "
882
+ "Array has no queue. Use "
883
+ "'new_array = array.with_queue(queue)' "
884
+ "to associate one.")
885
+
886
+ if self.size:
887
+ event1 = cl.enqueue_copy(queue, ary, self.base_data,
888
+ src_offset=self.offset,
889
+ wait_for=self.events, is_blocking=not async_)
890
+
891
+ self.add_event(event1)
892
+ else:
893
+ event1 = None
894
+
895
+ return ary, event1
896
+
897
+ def get(self, queue=None, ary=None, async_=None, **kwargs):
898
+ """Transfer the contents of *self* into *ary* or a newly allocated
899
+ :class:`numpy.ndarray`. If *ary* is given, it must have the same
900
+ shape and dtype.
901
+
902
+ .. versionchanged:: 2019.1.2
903
+
904
+ Calling with ``async_=True`` was deprecated and replaced by
905
+ :meth:`get_async`.
906
+ The event returned by :meth:`pyopencl.enqueue_copy` is now stored into
907
+ :attr:`events` to ensure data is not modified before the copy is
908
+ complete.
909
+
910
+ .. versionchanged:: 2015.2
911
+
912
+ *ary* with different shape was deprecated.
913
+
914
+ .. versionchanged:: 2017.2.1
915
+
916
+ Python 3.7 makes ``async`` a reserved keyword. On older Pythons,
917
+ we will continue to accept *async* as a parameter, however this
918
+ should be considered deprecated. *async_* is the new, official
919
+ spelling.
920
+ """
921
+
922
+ if async_:
923
+ warn("calling pyopencl.Array.get with 'async_=True' is deprecated. "
924
+ "Please use pyopencl.Array.get_async for asynchronous "
925
+ "device-to-host transfers",
926
+ DeprecationWarning, stacklevel=2)
927
+
928
+ ary, _event1 = self._get(queue=queue, ary=ary, async_=async_, **kwargs)
929
+
930
+ return ary
931
+
932
+ def get_async(self, queue=None, ary=None, **kwargs):
933
+ """
934
+ Asynchronous version of :meth:`get` which returns a tuple ``(ary, event)``
935
+ containing the host array ``ary``
936
+ and the :class:`pyopencl.NannyEvent` ``event`` returned by
937
+ :meth:`pyopencl.enqueue_copy`.
938
+
939
+ .. versionadded:: 2019.1.2
940
+ """
941
+
942
+ return self._get(queue=queue, ary=ary, async_=True, **kwargs)
943
+
944
+ def copy(self, queue=_copy_queue):
945
+ """
946
+ :arg queue: The :class:`~pyopencl.CommandQueue` for the returned array.
947
+
948
+ .. versionchanged:: 2017.1.2
949
+
950
+ Updates the queue of the returned array.
951
+
952
+ .. versionadded:: 2013.1
953
+ """
954
+
955
+ if queue is _copy_queue:
956
+ queue = self.queue
957
+
958
+ result = self._new_like_me(queue=queue)
959
+
960
+ # result.queue won't be the same as queue if queue is None.
961
+ # We force them to be the same here.
962
+ if result.queue is not queue:
963
+ result = result.with_queue(queue)
964
+
965
+ if not self.flags.forc:
966
+ raise RuntimeError("cannot copy non-contiguous array")
967
+
968
+ if self.nbytes:
969
+ event1 = cl.enqueue_copy(queue or self.queue,
970
+ result.base_data, self.base_data,
971
+ src_offset=self.offset, byte_count=self.nbytes,
972
+ wait_for=self.events)
973
+ result.add_event(event1)
974
+
975
+ return result
976
+
977
+ def __str__(self):
978
+ if self.queue is None:
979
+ return (f"<cl.{type(self).__name__} {self.shape} of {self.dtype} "
980
+ "without queue, call with_queue()>")
981
+
982
+ return str(self.get())
983
+
984
+ def __repr__(self):
985
+ if self.queue is None:
986
+ return (f"<cl.{type(self).__name__} {self.shape} of {self.dtype} "
987
+ f"at {id(self):x} without queue, call with_queue()>")
988
+
989
+ result = repr(self.get())
990
+ if result[:5] == "array":
991
+ result = f"cl.{type(self).__name__}" + result[5:]
992
+ else:
993
+ warn(
994
+ f"{type(result).__name__}.__repr__ was expected to return a "
995
+ f"string starting with 'array', got '{result[:10]!r}'",
996
+ stacklevel=2)
997
+
998
+ return result
999
+
1000
+ def safely_stringify_for_pudb(self):
1001
+ return f"cl.{type(self).__name__} {self.dtype} {self.shape}"
1002
+
1003
+ def __hash__(self):
1004
+ raise TypeError("pyopencl arrays are not hashable.")
1005
+
1006
+ # {{{ kernel invocation wrappers
1007
+
1008
+ @staticmethod
1009
+ @elwise_kernel_runner
1010
+ def _axpbyz(out, afac, a, bfac, b, queue=None):
1011
+ """Compute ``out = selffac * self + otherfac*other``,
1012
+ where *other* is an array."""
1013
+ a_shape = a.shape
1014
+ b_shape = b.shape
1015
+ out_shape = out.shape
1016
+ assert (a_shape == b_shape == out_shape
1017
+ or (a_shape == () and b_shape == out_shape)
1018
+ or (b_shape == () and a_shape == out_shape))
1019
+ return elementwise.get_axpbyz_kernel(
1020
+ out.context, a.dtype, b.dtype, out.dtype,
1021
+ x_is_scalar=(a_shape == ()),
1022
+ y_is_scalar=(b_shape == ()))
1023
+
1024
+ @staticmethod
1025
+ @elwise_kernel_runner
1026
+ def _axpbz(out, a, x, b, queue=None):
1027
+ """Compute ``z = a * x + b``, where *b* is a scalar."""
1028
+ a = np.array(a)
1029
+ b = np.array(b)
1030
+ assert out.shape == x.shape
1031
+ return elementwise.get_axpbz_kernel(out.context,
1032
+ a.dtype, x.dtype, b.dtype, out.dtype)
1033
+
1034
+ @staticmethod
1035
+ @elwise_kernel_runner
1036
+ def _elwise_multiply(out, a, b, queue=None):
1037
+ a_shape = a.shape
1038
+ b_shape = b.shape
1039
+ out_shape = out.shape
1040
+ assert (a_shape == b_shape == out_shape
1041
+ or (a_shape == () and b_shape == out_shape)
1042
+ or (b_shape == () and a_shape == out_shape))
1043
+ return elementwise.get_multiply_kernel(
1044
+ a.context, a.dtype, b.dtype, out.dtype,
1045
+ x_is_scalar=(a_shape == ()),
1046
+ y_is_scalar=(b_shape == ())
1047
+ )
1048
+
1049
+ @staticmethod
1050
+ @elwise_kernel_runner
1051
+ def _rdiv_scalar(out, ary, other, queue=None):
1052
+ other = np.array(other)
1053
+ assert out.shape == ary.shape
1054
+ return elementwise.get_rdivide_elwise_kernel(
1055
+ out.context, ary.dtype, other.dtype, out.dtype)
1056
+
1057
+ @staticmethod
1058
+ @elwise_kernel_runner
1059
+ def _div(out, self, other, queue=None):
1060
+ """Divides an array by another array."""
1061
+ assert (self.shape == other.shape == out.shape
1062
+ or (self.shape == () and other.shape == out.shape)
1063
+ or (other.shape == () and self.shape == out.shape))
1064
+
1065
+ return elementwise.get_divide_kernel(self.context,
1066
+ self.dtype, other.dtype, out.dtype,
1067
+ x_is_scalar=(self.shape == ()),
1068
+ y_is_scalar=(other.shape == ()))
1069
+
1070
+ @staticmethod
1071
+ @elwise_kernel_runner
1072
+ def _fill(result, scalar):
1073
+ return elementwise.get_fill_kernel(result.context, result.dtype)
1074
+
1075
+ @staticmethod
1076
+ @elwise_kernel_runner
1077
+ def _abs(result, arg):
1078
+ if arg.dtype.kind == "c":
1079
+ from pyopencl.elementwise import complex_dtype_to_name
1080
+ fname = "%s_abs" % complex_dtype_to_name(arg.dtype)
1081
+ elif arg.dtype.kind == "f":
1082
+ fname = "fabs"
1083
+ elif arg.dtype.kind in ["u", "i"]:
1084
+ fname = "abs"
1085
+ else:
1086
+ raise TypeError("unsupported dtype in _abs()")
1087
+
1088
+ return elementwise.get_unary_func_kernel(
1089
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1090
+
1091
+ @staticmethod
1092
+ @elwise_kernel_runner
1093
+ def _real(result, arg):
1094
+ from pyopencl.elementwise import complex_dtype_to_name
1095
+ fname = "%s_real" % complex_dtype_to_name(arg.dtype)
1096
+ return elementwise.get_unary_func_kernel(
1097
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1098
+
1099
+ @staticmethod
1100
+ @elwise_kernel_runner
1101
+ def _imag(result, arg):
1102
+ from pyopencl.elementwise import complex_dtype_to_name
1103
+ fname = "%s_imag" % complex_dtype_to_name(arg.dtype)
1104
+ return elementwise.get_unary_func_kernel(
1105
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1106
+
1107
+ @staticmethod
1108
+ @elwise_kernel_runner
1109
+ def _conj(result, arg):
1110
+ from pyopencl.elementwise import complex_dtype_to_name
1111
+ fname = "%s_conj" % complex_dtype_to_name(arg.dtype)
1112
+ return elementwise.get_unary_func_kernel(
1113
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1114
+
1115
+ @staticmethod
1116
+ @elwise_kernel_runner
1117
+ def _pow_scalar(result, ary, exponent):
1118
+ exponent = np.array(exponent)
1119
+ return elementwise.get_pow_kernel(result.context,
1120
+ ary.dtype, exponent.dtype, result.dtype,
1121
+ is_base_array=True, is_exp_array=False)
1122
+
1123
+ @staticmethod
1124
+ @elwise_kernel_runner
1125
+ def _rpow_scalar(result, base, exponent):
1126
+ base = np.array(base)
1127
+ return elementwise.get_pow_kernel(result.context,
1128
+ base.dtype, exponent.dtype, result.dtype,
1129
+ is_base_array=False, is_exp_array=True)
1130
+
1131
+ @staticmethod
1132
+ @elwise_kernel_runner
1133
+ def _pow_array(result, base, exponent):
1134
+ return elementwise.get_pow_kernel(
1135
+ result.context, base.dtype, exponent.dtype, result.dtype,
1136
+ is_base_array=True, is_exp_array=True)
1137
+
1138
+ @staticmethod
1139
+ @elwise_kernel_runner
1140
+ def _reverse(result, ary):
1141
+ return elementwise.get_reverse_kernel(result.context, ary.dtype)
1142
+
1143
+ @staticmethod
1144
+ @elwise_kernel_runner
1145
+ def _copy(dest, src):
1146
+ return elementwise.get_copy_kernel(
1147
+ dest.context, dest.dtype, src.dtype)
1148
+
1149
+ def _new_like_me(self, dtype=None, queue=None):
1150
+ if dtype is None:
1151
+ dtype = self.dtype
1152
+ strides = self.strides
1153
+ flags = self.flags
1154
+ fast = True
1155
+ else:
1156
+ strides = None
1157
+ flags = None
1158
+ if dtype == self.dtype:
1159
+ strides = self.strides
1160
+ flags = self.flags
1161
+ fast = True
1162
+ else:
1163
+ fast = False
1164
+
1165
+ queue = queue or self.queue
1166
+ return self.__class__(None, self.shape, dtype,
1167
+ allocator=self.allocator, strides=strides, _flags=flags,
1168
+ _fast=fast,
1169
+ _size=self.size, _queue=queue, _context=self.context)
1170
+
1171
+ @staticmethod
1172
+ @elwise_kernel_runner
1173
+ def _scalar_binop(out, a, b, queue=None, op=None):
1174
+ return elementwise.get_array_scalar_binop_kernel(
1175
+ out.context, op, out.dtype, a.dtype,
1176
+ np.array(b).dtype)
1177
+
1178
+ @staticmethod
1179
+ @elwise_kernel_runner
1180
+ def _array_binop(out, a, b, queue=None, op=None):
1181
+ a_shape = a.shape
1182
+ b_shape = b.shape
1183
+ out_shape = out.shape
1184
+ assert (a_shape == b_shape == out_shape
1185
+ or (a_shape == () and b_shape == out_shape)
1186
+ or (b_shape == () and a_shape == out_shape))
1187
+ return elementwise.get_array_binop_kernel(
1188
+ out.context, op, out.dtype, a.dtype, b.dtype,
1189
+ a_is_scalar=(a_shape == ()),
1190
+ b_is_scalar=(b_shape == ()))
1191
+
1192
+ @staticmethod
1193
+ @elwise_kernel_runner
1194
+ def _unop(out, a, queue=None, op=None):
1195
+ if out.shape != a.shape:
1196
+ raise ValueError("shapes of arguments do not match")
1197
+ return elementwise.get_unop_kernel(
1198
+ out.context, op, a.dtype, out.dtype)
1199
+
1200
+ # }}}
1201
+
1202
+ # {{{ operators
1203
+
1204
+ def mul_add(self, selffac, other, otherfac, queue=None):
1205
+ """Return ``selffac * self + otherfac * other``.
1206
+ """
1207
+ queue = queue or self.queue
1208
+
1209
+ if isinstance(other, Array):
1210
+ result = _get_broadcasted_binary_op_result(self, other, queue)
1211
+ result.add_event(
1212
+ self._axpbyz(
1213
+ result, selffac, self, otherfac, other,
1214
+ queue=queue))
1215
+ return result
1216
+ elif np.isscalar(other):
1217
+ common_dtype = _get_common_dtype(self, other, queue)
1218
+ result = self._new_like_me(common_dtype, queue=queue)
1219
+ result.add_event(
1220
+ self._axpbz(result, selffac,
1221
+ self, common_dtype.type(otherfac * other),
1222
+ queue=queue))
1223
+ return result
1224
+ else:
1225
+ raise NotImplementedError
1226
+
1227
+ def __add__(self, other):
1228
+ """Add an array with an array or an array with a scalar."""
1229
+
1230
+ if isinstance(other, Array):
1231
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1232
+ result.add_event(
1233
+ self._axpbyz(result,
1234
+ self.dtype.type(1), self,
1235
+ other.dtype.type(1), other))
1236
+
1237
+ return result
1238
+ elif np.isscalar(other):
1239
+ if other == 0:
1240
+ return self.copy()
1241
+ else:
1242
+ common_dtype = _get_common_dtype(self, other, self.queue)
1243
+ result = self._new_like_me(common_dtype)
1244
+ result.add_event(
1245
+ self._axpbz(result, self.dtype.type(1),
1246
+ self, common_dtype.type(other)))
1247
+ return result
1248
+ else:
1249
+ return NotImplemented
1250
+
1251
+ __radd__ = __add__
1252
+
1253
+ def __sub__(self, other):
1254
+ """Subtract an array from an array or a scalar from an array."""
1255
+
1256
+ if isinstance(other, Array):
1257
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1258
+ result.add_event(
1259
+ self._axpbyz(result,
1260
+ self.dtype.type(1), self,
1261
+ result.dtype.type(-1), other))
1262
+
1263
+ return result
1264
+ elif np.isscalar(other):
1265
+ if other == 0:
1266
+ return self.copy()
1267
+ else:
1268
+ result = self._new_like_me(
1269
+ _get_common_dtype(self, other, self.queue))
1270
+ result.add_event(
1271
+ self._axpbz(result, self.dtype.type(1), self, -other))
1272
+ return result
1273
+ else:
1274
+ return NotImplemented
1275
+
1276
+ def __rsub__(self, other):
1277
+ """Subtracts an array by a scalar or an array::
1278
+
1279
+ x = n - self
1280
+ """
1281
+ if np.isscalar(other):
1282
+ common_dtype = _get_common_dtype(self, other, self.queue)
1283
+ result = self._new_like_me(common_dtype)
1284
+ result.add_event(
1285
+ self._axpbz(result, result.dtype.type(-1), self,
1286
+ common_dtype.type(other)))
1287
+
1288
+ return result
1289
+ else:
1290
+ return NotImplemented
1291
+
1292
+ def __iadd__(self, other):
1293
+ if isinstance(other, Array):
1294
+ if other.shape != self.shape and other.shape != ():
1295
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1296
+ f" {self.shape}, {other.shape}.")
1297
+ self.add_event(
1298
+ self._axpbyz(self,
1299
+ self.dtype.type(1), self,
1300
+ other.dtype.type(1), other))
1301
+
1302
+ return self
1303
+ elif np.isscalar(other):
1304
+ self.add_event(
1305
+ self._axpbz(self, self.dtype.type(1), self, other))
1306
+ return self
1307
+ else:
1308
+ return NotImplemented
1309
+
1310
+ def __isub__(self, other):
1311
+ if isinstance(other, Array):
1312
+ if other.shape != self.shape and other.shape != ():
1313
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1314
+ f" {self.shape}, {other.shape}.")
1315
+ self.add_event(
1316
+ self._axpbyz(self, self.dtype.type(1), self,
1317
+ other.dtype.type(-1), other))
1318
+ return self
1319
+ elif np.isscalar(other):
1320
+ self._axpbz(self, self.dtype.type(1), self, -other)
1321
+ return self
1322
+ else:
1323
+ return NotImplemented
1324
+
1325
+ def __pos__(self):
1326
+ return self
1327
+
1328
+ def __neg__(self):
1329
+ result = self._new_like_me()
1330
+ result.add_event(self._axpbz(result, -1, self, 0))
1331
+ return result
1332
+
1333
+ def __mul__(self, other):
1334
+ if isinstance(other, Array):
1335
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1336
+ result.add_event(
1337
+ self._elwise_multiply(result, self, other))
1338
+ return result
1339
+ elif np.isscalar(other):
1340
+ common_dtype = _get_common_dtype(self, other, self.queue)
1341
+ result = self._new_like_me(common_dtype)
1342
+ result.add_event(
1343
+ self._axpbz(result,
1344
+ common_dtype.type(other), self, self.dtype.type(0)))
1345
+ return result
1346
+ else:
1347
+ return NotImplemented
1348
+
1349
+ def __rmul__(self, other):
1350
+ if np.isscalar(other):
1351
+ common_dtype = _get_common_dtype(self, other, self.queue)
1352
+ result = self._new_like_me(common_dtype)
1353
+ result.add_event(
1354
+ self._axpbz(result,
1355
+ common_dtype.type(other), self, self.dtype.type(0)))
1356
+ return result
1357
+ else:
1358
+ return NotImplemented
1359
+
1360
+ def __imul__(self, other):
1361
+ if isinstance(other, Array):
1362
+ if other.shape != self.shape and other.shape != ():
1363
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1364
+ f" {self.shape}, {other.shape}.")
1365
+ self.add_event(
1366
+ self._elwise_multiply(self, self, other))
1367
+ return self
1368
+ elif np.isscalar(other):
1369
+ self.add_event(
1370
+ self._axpbz(self, other, self, self.dtype.type(0)))
1371
+ return self
1372
+ else:
1373
+ return NotImplemented
1374
+
1375
+ def __div__(self, other):
1376
+ """Divides an array by an array or a scalar, i.e. ``self / other``.
1377
+ """
1378
+ if isinstance(other, Array):
1379
+ result = _get_broadcasted_binary_op_result(
1380
+ self, other, self.queue,
1381
+ dtype_getter=_get_truedivide_dtype)
1382
+ result.add_event(self._div(result, self, other))
1383
+
1384
+ return result
1385
+ elif np.isscalar(other):
1386
+ if other == 1:
1387
+ return self.copy()
1388
+ else:
1389
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1390
+ result = self._new_like_me(common_dtype)
1391
+ result.add_event(
1392
+ self._axpbz(result,
1393
+ np.true_divide(common_dtype.type(1), other),
1394
+ self, self.dtype.type(0)))
1395
+ return result
1396
+ else:
1397
+ return NotImplemented
1398
+
1399
+ __truediv__ = __div__
1400
+
1401
+ def __rdiv__(self, other):
1402
+ """Divides an array by a scalar or an array, i.e. ``other / self``.
1403
+ """
1404
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1405
+
1406
+ if isinstance(other, Array):
1407
+ result = self._new_like_me(common_dtype)
1408
+ result.add_event(other._div(result, self))
1409
+ return result
1410
+ elif np.isscalar(other):
1411
+ result = self._new_like_me(common_dtype)
1412
+ result.add_event(
1413
+ self._rdiv_scalar(result, self, common_dtype.type(other)))
1414
+ return result
1415
+ else:
1416
+ return NotImplemented
1417
+
1418
+ __rtruediv__ = __rdiv__
1419
+
1420
+ def __itruediv__(self, other):
1421
+ # raise an error if the result cannot be cast to self
1422
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1423
+ if not np.can_cast(common_dtype, self.dtype.type, "same_kind"):
1424
+ raise TypeError(
1425
+ "Cannot cast {!r} to {!r}".format(self.dtype, common_dtype))
1426
+
1427
+ if isinstance(other, Array):
1428
+ if other.shape != self.shape and other.shape != ():
1429
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1430
+ f" {self.shape}, {other.shape}.")
1431
+ self.add_event(
1432
+ self._div(self, self, other))
1433
+ return self
1434
+ elif np.isscalar(other):
1435
+ if other == 1:
1436
+ return self
1437
+ else:
1438
+ self.add_event(
1439
+ self._axpbz(self, common_dtype.type(np.true_divide(1, other)),
1440
+ self, self.dtype.type(0)))
1441
+ return self
1442
+ else:
1443
+ return NotImplemented
1444
+
1445
+ def __and__(self, other):
1446
+ common_dtype = _get_common_dtype(self, other, self.queue)
1447
+
1448
+ if not np.issubdtype(common_dtype, np.integer):
1449
+ raise TypeError(f"Integral types only: {common_dtype}")
1450
+
1451
+ if isinstance(other, Array):
1452
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1453
+ result.add_event(self._array_binop(result, self, other, op="&"))
1454
+ return result
1455
+ elif np.isscalar(other):
1456
+ result = self._new_like_me(common_dtype)
1457
+ result.add_event(
1458
+ self._scalar_binop(result, self, other, op="&"))
1459
+ return result
1460
+ else:
1461
+ return NotImplemented
1462
+
1463
+ __rand__ = __and__ # commutes
1464
+
1465
+ def __or__(self, other):
1466
+ common_dtype = _get_common_dtype(self, other, self.queue)
1467
+
1468
+ if not np.issubdtype(common_dtype, np.integer):
1469
+ raise TypeError("Integral types only")
1470
+
1471
+ if isinstance(other, Array):
1472
+ result = _get_broadcasted_binary_op_result(self, other,
1473
+ self.queue)
1474
+ result.add_event(self._array_binop(result, self, other, op="|"))
1475
+ return result
1476
+ elif np.isscalar(other):
1477
+ result = self._new_like_me(common_dtype)
1478
+ result.add_event(
1479
+ self._scalar_binop(result, self, other, op="|"))
1480
+ return result
1481
+ else:
1482
+ return NotImplemented
1483
+
1484
+ __ror__ = __or__ # commutes
1485
+
1486
+ def __xor__(self, other):
1487
+ common_dtype = _get_common_dtype(self, other, self.queue)
1488
+
1489
+ if not np.issubdtype(common_dtype, np.integer):
1490
+ raise TypeError(f"Integral types only: {common_dtype}")
1491
+
1492
+ if isinstance(other, Array):
1493
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1494
+ result.add_event(self._array_binop(result, self, other, op="^"))
1495
+ return result
1496
+ elif np.isscalar(other):
1497
+ result = self._new_like_me(common_dtype)
1498
+ result.add_event(
1499
+ self._scalar_binop(result, self, other, op="^"))
1500
+ return result
1501
+ else:
1502
+ return NotImplemented
1503
+
1504
+ __rxor__ = __xor__ # commutes
1505
+
1506
+ def __iand__(self, other):
1507
+ common_dtype = _get_common_dtype(self, other, self.queue)
1508
+
1509
+ if not np.issubdtype(common_dtype, np.integer):
1510
+ raise TypeError(f"Integral types only: {common_dtype}")
1511
+
1512
+ if isinstance(other, Array):
1513
+ if other.shape != self.shape and other.shape != ():
1514
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1515
+ f" {self.shape}, {other.shape}.")
1516
+ self.add_event(self._array_binop(self, self, other, op="&"))
1517
+ return self
1518
+ elif np.isscalar(other):
1519
+ self.add_event(
1520
+ self._scalar_binop(self, self, other, op="&"))
1521
+ return self
1522
+ else:
1523
+ return NotImplemented
1524
+
1525
+ def __ior__(self, other):
1526
+ common_dtype = _get_common_dtype(self, other, self.queue)
1527
+
1528
+ if not np.issubdtype(common_dtype, np.integer):
1529
+ raise TypeError(f"Integral types only: {common_dtype}")
1530
+
1531
+ if isinstance(other, Array):
1532
+ if other.shape != self.shape and other.shape != ():
1533
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1534
+ f" {self.shape}, {other.shape}.")
1535
+ self.add_event(self._array_binop(self, self, other, op="|"))
1536
+ return self
1537
+ elif np.isscalar(other):
1538
+ self.add_event(
1539
+ self._scalar_binop(self, self, other, op="|"))
1540
+ return self
1541
+ else:
1542
+ return NotImplemented
1543
+
1544
+ def __ixor__(self, other):
1545
+ common_dtype = _get_common_dtype(self, other, self.queue)
1546
+
1547
+ if not np.issubdtype(common_dtype, np.integer):
1548
+ raise TypeError(f"Integral types only: {common_dtype}")
1549
+
1550
+ if isinstance(other, Array):
1551
+ if other.shape != self.shape and other.shape != ():
1552
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1553
+ f" {self.shape}, {other.shape}.")
1554
+ self.add_event(self._array_binop(self, self, other, op="^"))
1555
+ return self
1556
+ elif np.isscalar(other):
1557
+ self.add_event(
1558
+ self._scalar_binop(self, self, other, op="^"))
1559
+ return self
1560
+ else:
1561
+ return NotImplemented
1562
+
1563
+ def _zero_fill(self, queue=None, wait_for=None):
1564
+ queue = queue or self.queue
1565
+
1566
+ if not self.size:
1567
+ return
1568
+
1569
+ cl_version_gtr_1_2 = (
1570
+ queue._get_cl_version() >= (1, 2)
1571
+ and cl.get_cl_header_version() >= (1, 2)
1572
+ )
1573
+ on_nvidia = queue.device.vendor.startswith("NVIDIA")
1574
+
1575
+ # circumvent bug with large buffers on NVIDIA
1576
+ # https://github.com/inducer/pyopencl/issues/395
1577
+ if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31):
1578
+ self.add_event(
1579
+ cl.enqueue_fill(queue, self.base_data, np.int8(0),
1580
+ self.nbytes, offset=self.offset, wait_for=wait_for))
1581
+ else:
1582
+ zero = np.zeros((), self.dtype)
1583
+ self.fill(zero, queue=queue)
1584
+
1585
+ def fill(self, value, queue=None, wait_for=None):
1586
+ """Fill the array with *scalar*.
1587
+
1588
+ :returns: *self*.
1589
+ """
1590
+
1591
+ self.add_event(
1592
+ self._fill(self, value, queue=queue, wait_for=wait_for))
1593
+
1594
+ return self
1595
+
1596
+ def __len__(self):
1597
+ """Returns the size of the leading dimension of *self*."""
1598
+ if len(self.shape):
1599
+ return self.shape[0]
1600
+ else:
1601
+ return TypeError("len() of unsized object")
1602
+
1603
+ def __abs__(self):
1604
+ """Return an ``Array`` of the absolute values of the elements
1605
+ of *self*.
1606
+ """
1607
+
1608
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1609
+ result.add_event(self._abs(result, self))
1610
+ return result
1611
+
1612
+ def __pow__(self, other):
1613
+ """Exponentiation by a scalar or elementwise by another
1614
+ :class:`Array`.
1615
+ """
1616
+
1617
+ if isinstance(other, Array):
1618
+ assert self.shape == other.shape
1619
+
1620
+ result = self._new_like_me(
1621
+ _get_common_dtype(self, other, self.queue))
1622
+ result.add_event(
1623
+ self._pow_array(result, self, other))
1624
+ return result
1625
+ elif np.isscalar(other):
1626
+ result = self._new_like_me(
1627
+ _get_common_dtype(self, other, self.queue))
1628
+ result.add_event(self._pow_scalar(result, self, other))
1629
+ return result
1630
+ else:
1631
+ return NotImplemented
1632
+
1633
+ def __rpow__(self, other):
1634
+ if np.isscalar(other):
1635
+ common_dtype = _get_common_dtype(self, other, self.queue)
1636
+ result = self._new_like_me(common_dtype)
1637
+ result.add_event(
1638
+ self._rpow_scalar(result, common_dtype.type(other), self))
1639
+ return result
1640
+ else:
1641
+ return NotImplemented
1642
+
1643
+ def __invert__(self):
1644
+ if not np.issubdtype(self.dtype, np.integer):
1645
+ raise TypeError(f"Integral types only: {self.dtype}")
1646
+
1647
+ result = self._new_like_me()
1648
+ result.add_event(self._unop(result, self, op="~"))
1649
+
1650
+ return result
1651
+
1652
+ # }}}
1653
+
1654
+ def reverse(self, queue=None):
1655
+ """Return this array in reversed order. The array is treated
1656
+ as one-dimensional.
1657
+ """
1658
+
1659
+ result = self._new_like_me()
1660
+ result.add_event(self._reverse(result, self))
1661
+ return result
1662
+
1663
+ def astype(self, dtype, queue=None):
1664
+ """Return a copy of *self*, cast to *dtype*."""
1665
+ if dtype == self.dtype:
1666
+ return self.copy()
1667
+
1668
+ result = self._new_like_me(dtype=dtype)
1669
+ result.add_event(self._copy(result, self, queue=queue))
1670
+ return result
1671
+
1672
+ # {{{ rich comparisons, any, all
1673
+
1674
+ def __bool__(self):
1675
+ if self.shape == ():
1676
+ return bool(self.get())
1677
+ else:
1678
+ raise ValueError("The truth value of an array with "
1679
+ "more than one element is ambiguous. Use a.any() or a.all()")
1680
+
1681
+ def any(self, queue=None, wait_for=None):
1682
+ from pyopencl.reduction import get_any_kernel
1683
+ krnl = get_any_kernel(self.context, self.dtype)
1684
+ if wait_for is None:
1685
+ wait_for = []
1686
+ result, event1 = krnl(self, queue=queue,
1687
+ wait_for=wait_for + self.events, return_event=True)
1688
+ result.add_event(event1)
1689
+ return result
1690
+
1691
+ def all(self, queue=None, wait_for=None):
1692
+ from pyopencl.reduction import get_all_kernel
1693
+ krnl = get_all_kernel(self.context, self.dtype)
1694
+ if wait_for is None:
1695
+ wait_for = []
1696
+ result, event1 = krnl(self, queue=queue,
1697
+ wait_for=wait_for + self.events, return_event=True)
1698
+ result.add_event(event1)
1699
+ return result
1700
+
1701
+ @staticmethod
1702
+ @elwise_kernel_runner
1703
+ def _scalar_comparison(out, a, b, queue=None, op=None):
1704
+ return elementwise.get_array_scalar_comparison_kernel(
1705
+ out.context, op, a.dtype)
1706
+
1707
+ @staticmethod
1708
+ @elwise_kernel_runner
1709
+ def _array_comparison(out, a, b, queue=None, op=None):
1710
+ if a.shape != b.shape:
1711
+ raise ValueError("shapes of comparison arguments do not match")
1712
+ return elementwise.get_array_comparison_kernel(
1713
+ out.context, op, a.dtype, b.dtype)
1714
+
1715
+ def __eq__(self, other):
1716
+ if isinstance(other, Array):
1717
+ result = self._new_like_me(_BOOL_DTYPE)
1718
+ result.add_event(
1719
+ self._array_comparison(result, self, other, op="=="))
1720
+ return result
1721
+ elif np.isscalar(other):
1722
+ result = self._new_like_me(_BOOL_DTYPE)
1723
+ result.add_event(
1724
+ self._scalar_comparison(result, self, other, op="=="))
1725
+ return result
1726
+ else:
1727
+ return NotImplemented
1728
+
1729
+ def __ne__(self, other):
1730
+ if isinstance(other, Array):
1731
+ result = self._new_like_me(_BOOL_DTYPE)
1732
+ result.add_event(
1733
+ self._array_comparison(result, self, other, op="!="))
1734
+ return result
1735
+ elif np.isscalar(other):
1736
+ result = self._new_like_me(_BOOL_DTYPE)
1737
+ result.add_event(
1738
+ self._scalar_comparison(result, self, other, op="!="))
1739
+ return result
1740
+ else:
1741
+ return NotImplemented
1742
+
1743
+ def __le__(self, other):
1744
+ if isinstance(other, Array):
1745
+ result = self._new_like_me(_BOOL_DTYPE)
1746
+ result.add_event(
1747
+ self._array_comparison(result, self, other, op="<="))
1748
+ return result
1749
+ elif np.isscalar(other):
1750
+ result = self._new_like_me(_BOOL_DTYPE)
1751
+ self._scalar_comparison(result, self, other, op="<=")
1752
+ return result
1753
+ else:
1754
+ return NotImplemented
1755
+
1756
+ def __ge__(self, other):
1757
+ if isinstance(other, Array):
1758
+ result = self._new_like_me(_BOOL_DTYPE)
1759
+ result.add_event(
1760
+ self._array_comparison(result, self, other, op=">="))
1761
+ return result
1762
+ elif np.isscalar(other):
1763
+ result = self._new_like_me(_BOOL_DTYPE)
1764
+ result.add_event(
1765
+ self._scalar_comparison(result, self, other, op=">="))
1766
+ return result
1767
+ else:
1768
+ return NotImplemented
1769
+
1770
+ def __lt__(self, other):
1771
+ if isinstance(other, Array):
1772
+ result = self._new_like_me(_BOOL_DTYPE)
1773
+ result.add_event(
1774
+ self._array_comparison(result, self, other, op="<"))
1775
+ return result
1776
+ elif np.isscalar(other):
1777
+ result = self._new_like_me(_BOOL_DTYPE)
1778
+ result.add_event(
1779
+ self._scalar_comparison(result, self, other, op="<"))
1780
+ return result
1781
+ else:
1782
+ return NotImplemented
1783
+
1784
+ def __gt__(self, other):
1785
+ if isinstance(other, Array):
1786
+ result = self._new_like_me(_BOOL_DTYPE)
1787
+ result.add_event(
1788
+ self._array_comparison(result, self, other, op=">"))
1789
+ return result
1790
+ elif np.isscalar(other):
1791
+ result = self._new_like_me(_BOOL_DTYPE)
1792
+ result.add_event(
1793
+ self._scalar_comparison(result, self, other, op=">"))
1794
+ return result
1795
+ else:
1796
+ return NotImplemented
1797
+
1798
+ # }}}
1799
+
1800
+ # {{{ complex-valued business
1801
+
1802
+ @property
1803
+ def real(self):
1804
+ """
1805
+ .. versionadded:: 2012.1
1806
+ """
1807
+ if self.dtype.kind == "c":
1808
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1809
+ result.add_event(
1810
+ self._real(result, self))
1811
+ return result
1812
+ else:
1813
+ return self
1814
+
1815
+ @property
1816
+ def imag(self):
1817
+ """
1818
+ .. versionadded:: 2012.1
1819
+ """
1820
+ if self.dtype.kind == "c":
1821
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1822
+ result.add_event(
1823
+ self._imag(result, self))
1824
+ return result
1825
+ else:
1826
+ return zeros_like(self)
1827
+
1828
+ def conj(self):
1829
+ """
1830
+ .. versionadded:: 2012.1
1831
+ """
1832
+ if self.dtype.kind == "c":
1833
+ result = self._new_like_me()
1834
+ result.add_event(self._conj(result, self))
1835
+ return result
1836
+ else:
1837
+ return self
1838
+
1839
+ conjugate = conj
1840
+
1841
+ # }}}
1842
+
1843
+ # {{{ event management
1844
+
1845
+ def add_event(self, evt):
1846
+ """Add *evt* to :attr:`events`. If :attr:`events` is too long, this method
1847
+ may implicitly wait for a subset of :attr:`events` and clear them from the
1848
+ list.
1849
+ """
1850
+ n_wait = 4
1851
+
1852
+ self.events.append(evt)
1853
+
1854
+ if len(self.events) > 3*n_wait:
1855
+ wait_events = self.events[:n_wait]
1856
+ cl.wait_for_events(wait_events)
1857
+ del self.events[:n_wait]
1858
+
1859
+ def finish(self):
1860
+ """Wait for the entire contents of :attr:`events`, clear it."""
1861
+
1862
+ if self.events:
1863
+ cl.wait_for_events(self.events)
1864
+ del self.events[:]
1865
+
1866
+ # }}}
1867
+
1868
+ # {{{ views
1869
+
1870
+ def reshape(self, *shape, **kwargs):
1871
+ """Returns an array containing the same data with a new shape."""
1872
+
1873
+ order = kwargs.pop("order", "C")
1874
+ if kwargs:
1875
+ raise TypeError("unexpected keyword arguments: %s"
1876
+ % list(kwargs.keys()))
1877
+
1878
+ if order not in "CF":
1879
+ raise ValueError("order must be either 'C' or 'F'")
1880
+
1881
+ # TODO: add more error-checking, perhaps
1882
+
1883
+ # FIXME: The following is overly conservative. As long as we don't change
1884
+ # our memory footprint, we're good.
1885
+
1886
+ # if not self.flags.forc:
1887
+ # raise RuntimeError("only contiguous arrays may "
1888
+ # "be used as arguments to this operation")
1889
+
1890
+ if isinstance(shape[0], tuple) or isinstance(shape[0], list):
1891
+ shape = tuple(shape[0])
1892
+
1893
+ if -1 in shape:
1894
+ shape = list(shape)
1895
+ idx = shape.index(-1)
1896
+ size = -reduce(lambda x, y: x * y, shape, 1)
1897
+ if size == 0:
1898
+ shape[idx] = 0
1899
+ else:
1900
+ shape[idx] = self.size // size
1901
+ if builtins.any(s < 0 for s in shape):
1902
+ raise ValueError("can only specify one unknown dimension")
1903
+ shape = tuple(shape)
1904
+
1905
+ if shape == self.shape:
1906
+ return self._new_with_changes(
1907
+ data=self.base_data, offset=self.offset, shape=shape,
1908
+ strides=self.strides)
1909
+
1910
+ import operator
1911
+ size = reduce(operator.mul, shape, 1)
1912
+ if size != self.size:
1913
+ raise ValueError("total size of new array must be unchanged")
1914
+
1915
+ if self.size == 0:
1916
+ return self._new_with_changes(
1917
+ data=None, offset=0, shape=shape,
1918
+ strides=(
1919
+ _f_contiguous_strides(self.dtype.itemsize, shape)
1920
+ if order == "F" else
1921
+ _c_contiguous_strides(self.dtype.itemsize, shape)
1922
+ ))
1923
+
1924
+ # {{{ determine reshaped strides
1925
+
1926
+ # copied and translated from
1927
+ # https://github.com/numpy/numpy/blob/4083883228d61a3b571dec640185b5a5d983bf59/numpy/core/src/multiarray/shape.c # noqa: E501
1928
+
1929
+ newdims = shape
1930
+ newnd = len(newdims)
1931
+
1932
+ # Remove axes with dimension 1 from the old array. They have no effect
1933
+ # but would need special cases since their strides do not matter.
1934
+
1935
+ olddims = []
1936
+ oldstrides = []
1937
+ for oi in range(len(self.shape)):
1938
+ s = self.shape[oi]
1939
+ if s != 1:
1940
+ olddims.append(s)
1941
+ oldstrides.append(self.strides[oi])
1942
+
1943
+ oldnd = len(olddims)
1944
+
1945
+ newstrides = [-1]*len(newdims)
1946
+
1947
+ # oi to oj and ni to nj give the axis ranges currently worked with
1948
+ oi = 0
1949
+ oj = 1
1950
+ ni = 0
1951
+ nj = 1
1952
+ while ni < newnd and oi < oldnd:
1953
+ np = newdims[ni]
1954
+ op = olddims[oi]
1955
+
1956
+ while np != op:
1957
+ if np < op:
1958
+ # Misses trailing 1s, these are handled later
1959
+ np *= newdims[nj]
1960
+ nj += 1
1961
+ else:
1962
+ op *= olddims[oj]
1963
+ oj += 1
1964
+
1965
+ # Check whether the original axes can be combined
1966
+ for ok in range(oi, oj-1):
1967
+ if order == "F":
1968
+ if oldstrides[ok+1] != olddims[ok]*oldstrides[ok]:
1969
+ raise ValueError("cannot reshape without copy")
1970
+ else:
1971
+ # C order
1972
+ if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]):
1973
+ raise ValueError("cannot reshape without copy")
1974
+
1975
+ # Calculate new strides for all axes currently worked with
1976
+ if order == "F":
1977
+ newstrides[ni] = oldstrides[oi]
1978
+ for nk in range(ni+1, nj):
1979
+ newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1]
1980
+ else:
1981
+ # C order
1982
+ newstrides[nj - 1] = oldstrides[oj - 1]
1983
+ for nk in range(nj-1, ni, -1):
1984
+ newstrides[nk - 1] = newstrides[nk]*newdims[nk]
1985
+
1986
+ ni = nj
1987
+ nj += 1
1988
+
1989
+ oi = oj
1990
+ oj += 1
1991
+
1992
+ # Set strides corresponding to trailing 1s of the new shape.
1993
+ if ni >= 1:
1994
+ last_stride = newstrides[ni - 1]
1995
+ else:
1996
+ last_stride = self.dtype.itemsize
1997
+
1998
+ if order == "F":
1999
+ last_stride *= newdims[ni - 1]
2000
+
2001
+ for nk in range(ni, len(shape)):
2002
+ newstrides[nk] = last_stride
2003
+
2004
+ # }}}
2005
+
2006
+ return self._new_with_changes(
2007
+ data=self.base_data, offset=self.offset, shape=shape,
2008
+ strides=tuple(newstrides))
2009
+
2010
+ def ravel(self, order="C"):
2011
+ """Returns flattened array containing the same data."""
2012
+ return self.reshape(self.size, order=order)
2013
+
2014
+ def view(self, dtype=None):
2015
+ """Returns view of array with the same data. If *dtype* is different
2016
+ from current dtype, the actual bytes of memory will be reinterpreted.
2017
+ """
2018
+
2019
+ if dtype is None:
2020
+ dtype = self.dtype
2021
+
2022
+ old_itemsize = self.dtype.itemsize
2023
+ itemsize = np.dtype(dtype).itemsize
2024
+
2025
+ from pytools import argmin2
2026
+ min_stride_axis = argmin2(
2027
+ (axis, abs(stride))
2028
+ for axis, stride in enumerate(self.strides))
2029
+
2030
+ if self.shape[min_stride_axis] * old_itemsize % itemsize != 0:
2031
+ raise ValueError("new type not compatible with array")
2032
+
2033
+ new_shape = (
2034
+ self.shape[:min_stride_axis]
2035
+ + (self.shape[min_stride_axis] * old_itemsize // itemsize,)
2036
+ + self.shape[min_stride_axis+1:])
2037
+ new_strides = (
2038
+ self.strides[:min_stride_axis]
2039
+ + (self.strides[min_stride_axis] * itemsize // old_itemsize,)
2040
+ + self.strides[min_stride_axis+1:])
2041
+
2042
+ return self._new_with_changes(
2043
+ self.base_data, self.offset,
2044
+ shape=new_shape, dtype=dtype,
2045
+ strides=new_strides)
2046
+
2047
+ def squeeze(self):
2048
+ """Returns a view of the array with dimensions of
2049
+ length 1 removed.
2050
+
2051
+ .. versionadded:: 2015.2
2052
+ """
2053
+ new_shape = tuple(dim for dim in self.shape if dim > 1)
2054
+ new_strides = tuple(
2055
+ self.strides[i] for i, dim in enumerate(self.shape)
2056
+ if dim > 1)
2057
+
2058
+ return self._new_with_changes(
2059
+ self.base_data, self.offset,
2060
+ shape=new_shape, strides=new_strides)
2061
+
2062
+ def transpose(self, axes=None):
2063
+ """Permute the dimensions of an array.
2064
+
2065
+ :arg axes: list of ints, optional.
2066
+ By default, reverse the dimensions, otherwise permute the axes
2067
+ according to the values given.
2068
+
2069
+ :returns: :class:`Array` A view of the array with its axes permuted.
2070
+
2071
+ .. versionadded:: 2015.2
2072
+ """
2073
+
2074
+ if axes is None:
2075
+ axes = range(self.ndim-1, -1, -1)
2076
+
2077
+ if len(axes) != len(self.shape):
2078
+ raise ValueError("axes don't match array")
2079
+
2080
+ new_shape = [self.shape[axes[i]] for i in range(len(axes))]
2081
+ new_strides = [self.strides[axes[i]] for i in range(len(axes))]
2082
+
2083
+ return self._new_with_changes(
2084
+ self.base_data, self.offset,
2085
+ shape=tuple(new_shape),
2086
+ strides=tuple(new_strides))
2087
+
2088
+ @property
2089
+ def T(self): # noqa: N802
2090
+ """
2091
+ .. versionadded:: 2015.2
2092
+ """
2093
+ return self.transpose()
2094
+
2095
+ # }}}
2096
+
2097
+ def map_to_host(self, queue=None, flags=None, is_blocking=True, wait_for=None):
2098
+ """If *is_blocking*, return a :class:`numpy.ndarray` corresponding to the
2099
+ same memory as *self*.
2100
+
2101
+ If *is_blocking* is not true, return a tuple ``(ary, evt)``, where
2102
+ *ary* is the above-mentioned array.
2103
+
2104
+ The host array is obtained using :func:`pyopencl.enqueue_map_buffer`.
2105
+ See there for further details.
2106
+
2107
+ :arg flags: A combination of :class:`pyopencl.map_flags`.
2108
+ Defaults to read-write.
2109
+
2110
+ .. versionadded :: 2013.2
2111
+ """
2112
+
2113
+ if flags is None:
2114
+ flags = cl.map_flags.READ | cl.map_flags.WRITE
2115
+ if wait_for is None:
2116
+ wait_for = []
2117
+
2118
+ ary, evt = cl.enqueue_map_buffer(
2119
+ queue or self.queue, self.base_data, flags, self.offset,
2120
+ self.shape, self.dtype, strides=self.strides,
2121
+ wait_for=wait_for + self.events, is_blocking=is_blocking)
2122
+
2123
+ if is_blocking:
2124
+ return ary
2125
+ else:
2126
+ return ary, evt
2127
+
2128
+ # {{{ getitem/setitem
2129
+
2130
+ def __getitem__(self, index):
2131
+ """
2132
+ .. versionadded:: 2013.1
2133
+ """
2134
+
2135
+ if isinstance(index, Array):
2136
+ if index.dtype.kind not in ("i", "u"):
2137
+ raise TypeError(
2138
+ "fancy indexing is only allowed with integers")
2139
+ if len(index.shape) != 1:
2140
+ raise NotImplementedError(
2141
+ "multidimensional fancy indexing is not supported")
2142
+ if len(self.shape) != 1:
2143
+ raise NotImplementedError(
2144
+ "fancy indexing into a multi-d array is not supported")
2145
+
2146
+ return take(self, index)
2147
+
2148
+ if not isinstance(index, tuple):
2149
+ index = (index,)
2150
+
2151
+ new_shape = []
2152
+ new_offset = self.offset
2153
+ new_strides = []
2154
+
2155
+ seen_ellipsis = False
2156
+
2157
+ index_axis = 0
2158
+ array_axis = 0
2159
+ while index_axis < len(index):
2160
+ index_entry = index[index_axis]
2161
+
2162
+ if array_axis > len(self.shape):
2163
+ raise IndexError("too many axes in index")
2164
+
2165
+ if isinstance(index_entry, slice):
2166
+ start, stop, idx_stride = index_entry.indices(
2167
+ self.shape[array_axis])
2168
+
2169
+ array_stride = self.strides[array_axis]
2170
+
2171
+ new_shape.append((abs(stop-start)-1)//abs(idx_stride)+1)
2172
+ new_strides.append(idx_stride*array_stride)
2173
+ new_offset += array_stride*start
2174
+
2175
+ index_axis += 1
2176
+ array_axis += 1
2177
+
2178
+ elif isinstance(index_entry, (int, np.integer)):
2179
+ array_shape = self.shape[array_axis]
2180
+ if index_entry < 0:
2181
+ index_entry += array_shape
2182
+
2183
+ if not (0 <= index_entry < array_shape):
2184
+ raise IndexError(
2185
+ "subindex in axis %d out of range" % index_axis)
2186
+
2187
+ new_offset += self.strides[array_axis]*index_entry
2188
+
2189
+ index_axis += 1
2190
+ array_axis += 1
2191
+
2192
+ elif index_entry is Ellipsis:
2193
+ index_axis += 1
2194
+
2195
+ remaining_index_count = len(index) - index_axis
2196
+ new_array_axis = len(self.shape) - remaining_index_count
2197
+ if new_array_axis < array_axis:
2198
+ raise IndexError("invalid use of ellipsis in index")
2199
+ while array_axis < new_array_axis:
2200
+ new_shape.append(self.shape[array_axis])
2201
+ new_strides.append(self.strides[array_axis])
2202
+ array_axis += 1
2203
+
2204
+ if seen_ellipsis:
2205
+ raise IndexError(
2206
+ "more than one ellipsis not allowed in index")
2207
+ seen_ellipsis = True
2208
+
2209
+ elif index_entry is np.newaxis:
2210
+ new_shape.append(1)
2211
+ new_strides.append(0)
2212
+ index_axis += 1
2213
+
2214
+ else:
2215
+ raise IndexError("invalid subindex in axis %d" % index_axis)
2216
+
2217
+ while array_axis < len(self.shape):
2218
+ new_shape.append(self.shape[array_axis])
2219
+ new_strides.append(self.strides[array_axis])
2220
+
2221
+ array_axis += 1
2222
+
2223
+ return self._new_with_changes(
2224
+ self.base_data, offset=new_offset,
2225
+ shape=tuple(new_shape),
2226
+ strides=tuple(new_strides))
2227
+
2228
+ def setitem(self, subscript, value, queue=None, wait_for=None):
2229
+ """Like :meth:`__setitem__`, but with the ability to specify
2230
+ a *queue* and *wait_for*.
2231
+
2232
+ .. versionadded:: 2013.1
2233
+
2234
+ .. versionchanged:: 2013.2
2235
+
2236
+ Added *wait_for*.
2237
+ """
2238
+
2239
+ queue = queue or self.queue or value.queue
2240
+ if wait_for is None:
2241
+ wait_for = []
2242
+ wait_for = wait_for + self.events
2243
+
2244
+ if isinstance(subscript, Array):
2245
+ if subscript.dtype.kind not in ("i", "u"):
2246
+ raise TypeError(
2247
+ "fancy indexing is only allowed with integers")
2248
+ if len(subscript.shape) != 1:
2249
+ raise NotImplementedError(
2250
+ "multidimensional fancy indexing is not supported")
2251
+ if len(self.shape) != 1:
2252
+ raise NotImplementedError(
2253
+ "fancy indexing into a multi-d array is not supported")
2254
+
2255
+ multi_put([value], subscript, out=[self], queue=queue,
2256
+ wait_for=wait_for)
2257
+ return
2258
+
2259
+ subarray = self[subscript]
2260
+
2261
+ if not subarray.size:
2262
+ # This prevents errors about mismatched strides that neither we
2263
+ # nor numpy worry about in the empty case.
2264
+ return
2265
+
2266
+ if isinstance(value, np.ndarray):
2267
+ if subarray.shape == value.shape and subarray.strides == value.strides:
2268
+ self.add_event(
2269
+ cl.enqueue_copy(queue, subarray.base_data,
2270
+ value, dst_offset=subarray.offset, wait_for=wait_for))
2271
+ return
2272
+ else:
2273
+ value = to_device(queue, value, self.allocator)
2274
+
2275
+ if isinstance(value, Array):
2276
+ if len(subarray.shape) != len(value.shape):
2277
+ raise NotImplementedError("broadcasting is not "
2278
+ "supported in __setitem__")
2279
+ if subarray.shape != value.shape:
2280
+ raise ValueError("cannot assign between arrays of "
2281
+ "differing shapes")
2282
+ if subarray.strides != value.strides:
2283
+ raise NotImplementedError("cannot assign between arrays of "
2284
+ "differing strides")
2285
+
2286
+ self.add_event(
2287
+ self._copy(subarray, value, queue=queue, wait_for=wait_for))
2288
+
2289
+ else:
2290
+ # Let's assume it's a scalar
2291
+ subarray.fill(value, queue=queue, wait_for=wait_for)
2292
+
2293
+ def __setitem__(self, subscript, value):
2294
+ """Set the slice of *self* identified *subscript* to *value*.
2295
+
2296
+ *value* is allowed to be:
2297
+
2298
+ * A :class:`Array` of the same :attr:`shape` and (for now) :attr:`strides`,
2299
+ but with potentially different :attr:`dtype`.
2300
+ * A :class:`numpy.ndarray` of the same :attr:`shape` and (for now)
2301
+ :attr:`strides`, but with potentially different :attr:`dtype`.
2302
+ * A scalar.
2303
+
2304
+ Non-scalar broadcasting is not currently supported.
2305
+
2306
+ .. versionadded:: 2013.1
2307
+ """
2308
+ self.setitem(subscript, value)
2309
+
2310
+ # }}}
2311
+
2312
+ # }}}
2313
+
2314
+
2315
+ # {{{ creation helpers
2316
+
2317
+ def as_strided(ary, shape=None, strides=None):
2318
+ """Make an :class:`Array` from the given array with the given
2319
+ shape and strides.
2320
+ """
2321
+
2322
+ # undocumented for the moment
2323
+
2324
+ if shape is None:
2325
+ shape = ary.shape
2326
+ if strides is None:
2327
+ strides = ary.strides
2328
+
2329
+ return Array(ary.queue, shape, ary.dtype, allocator=ary.allocator,
2330
+ data=ary.data, strides=strides)
2331
+
2332
+
2333
+ class _same_as_transfer: # noqa: N801
2334
+ pass
2335
+
2336
+
2337
+ def to_device(queue, ary, allocator=None, async_=None,
2338
+ array_queue=_same_as_transfer, **kwargs):
2339
+ """Return a :class:`Array` that is an exact copy of the
2340
+ :class:`numpy.ndarray` instance *ary*.
2341
+
2342
+ :arg array_queue: The :class:`~pyopencl.CommandQueue` which will
2343
+ be stored in the resulting array. Useful
2344
+ to make sure there is no implicit queue associated
2345
+ with the array by passing *None*.
2346
+
2347
+ See :class:`Array` for the meaning of *allocator*.
2348
+
2349
+ .. versionchanged:: 2015.2
2350
+ *array_queue* argument was added.
2351
+
2352
+ .. versionchanged:: 2017.2.1
2353
+
2354
+ Python 3.7 makes ``async`` a reserved keyword. On older Pythons,
2355
+ we will continue to accept *async* as a parameter, however this
2356
+ should be considered deprecated. *async_* is the new, official
2357
+ spelling.
2358
+ """
2359
+
2360
+ # {{{ handle 'async' deprecation
2361
+
2362
+ async_arg = kwargs.pop("async", None)
2363
+ if async_arg is not None:
2364
+ if async_ is not None:
2365
+ raise TypeError("may not specify both 'async' and 'async_'")
2366
+ async_ = async_arg
2367
+
2368
+ if async_ is None:
2369
+ async_ = False
2370
+
2371
+ if kwargs:
2372
+ raise TypeError("extra keyword arguments specified: %s"
2373
+ % ", ".join(kwargs))
2374
+
2375
+ # }}}
2376
+
2377
+ if ary.dtype == object:
2378
+ raise RuntimeError("to_device does not work on object arrays.")
2379
+
2380
+ if array_queue is _same_as_transfer:
2381
+ first_arg = queue
2382
+ else:
2383
+ first_arg = queue.context
2384
+
2385
+ result = Array(first_arg, ary.shape, ary.dtype,
2386
+ allocator=allocator, strides=ary.strides)
2387
+ result.set(ary, async_=async_, queue=queue)
2388
+ return result
2389
+
2390
+
2391
+ empty = Array
2392
+
2393
+
2394
+ def zeros(queue, shape, dtype, order="C", allocator=None):
2395
+ """Same as :func:`empty`, but the :class:`Array` is zero-initialized before
2396
+ being returned.
2397
+
2398
+ .. versionchanged:: 2011.1
2399
+ *context* argument was deprecated.
2400
+ """
2401
+
2402
+ result = Array(None, shape, dtype,
2403
+ order=order, allocator=allocator,
2404
+ _context=queue.context, _queue=queue)
2405
+ result._zero_fill()
2406
+ return result
2407
+
2408
+
2409
+ def empty_like(ary, queue=_copy_queue, allocator=None):
2410
+ """Make a new, uninitialized :class:`Array` having the same properties
2411
+ as *other_ary*.
2412
+ """
2413
+
2414
+ return ary._new_with_changes(data=None, offset=0, queue=queue,
2415
+ allocator=allocator)
2416
+
2417
+
2418
+ def zeros_like(ary):
2419
+ """Make a new, zero-initialized :class:`Array` having the same properties
2420
+ as *other_ary*.
2421
+ """
2422
+
2423
+ result = ary._new_like_me()
2424
+ result._zero_fill()
2425
+ return result
2426
+
2427
+
2428
+ @dataclass
2429
+ class _ArangeInfo:
2430
+ start: Optional[int] = None
2431
+ stop: Optional[int] = None
2432
+ step: Optional[int] = None
2433
+ dtype: Optional["np.dtype"] = None
2434
+ allocator: Optional[Any] = None
2435
+
2436
+
2437
+ @elwise_kernel_runner
2438
+ def _arange_knl(result, start, step):
2439
+ return elementwise.get_arange_kernel(
2440
+ result.context, result.dtype)
2441
+
2442
+
2443
+ def arange(queue, *args, **kwargs):
2444
+ """arange(queue, [start, ] stop [, step], **kwargs)
2445
+ Create a :class:`Array` filled with numbers spaced *step* apart,
2446
+ starting from *start* and ending at *stop*. If not given, *start*
2447
+ defaults to 0, *step* defaults to 1.
2448
+
2449
+ For floating point arguments, the length of the result is
2450
+ ``ceil((stop - start)/step)``. This rule may result in the last
2451
+ element of the result being greater than *stop*.
2452
+
2453
+ *dtype* is a required keyword argument.
2454
+
2455
+ .. versionchanged:: 2011.1
2456
+ *context* argument was deprecated.
2457
+
2458
+ .. versionchanged:: 2011.2
2459
+ *allocator* keyword argument was added.
2460
+ """
2461
+
2462
+ # {{{ argument processing
2463
+
2464
+ # Yuck. Thanks, numpy developers. ;)
2465
+
2466
+ explicit_dtype = False
2467
+ inf = _ArangeInfo()
2468
+
2469
+ if isinstance(args[-1], np.dtype):
2470
+ inf.dtype = args[-1]
2471
+ args = args[:-1]
2472
+ explicit_dtype = True
2473
+
2474
+ argc = len(args)
2475
+ if argc == 0:
2476
+ raise ValueError("stop argument required")
2477
+ elif argc == 1:
2478
+ inf.stop = args[0]
2479
+ elif argc == 2:
2480
+ inf.start = args[0]
2481
+ inf.stop = args[1]
2482
+ elif argc == 3:
2483
+ inf.start = args[0]
2484
+ inf.stop = args[1]
2485
+ inf.step = args[2]
2486
+ else:
2487
+ raise ValueError("too many arguments")
2488
+
2489
+ admissible_names = ["start", "stop", "step", "dtype", "allocator"]
2490
+ for k, v in kwargs.items():
2491
+ if k in admissible_names:
2492
+ if getattr(inf, k) is None:
2493
+ setattr(inf, k, v)
2494
+ if k == "dtype":
2495
+ explicit_dtype = True
2496
+ else:
2497
+ raise ValueError(f"may not specify '{k}' by position and keyword")
2498
+ else:
2499
+ raise ValueError(f"unexpected keyword argument '{k}'")
2500
+
2501
+ if inf.start is None:
2502
+ inf.start = 0
2503
+ if inf.step is None:
2504
+ inf.step = 1
2505
+ if inf.dtype is None:
2506
+ inf.dtype = np.array([inf.start, inf.stop, inf.step]).dtype
2507
+
2508
+ # }}}
2509
+
2510
+ # {{{ actual functionality
2511
+
2512
+ dtype = np.dtype(inf.dtype)
2513
+ start = dtype.type(inf.start)
2514
+ step = dtype.type(inf.step)
2515
+ stop = dtype.type(inf.stop)
2516
+
2517
+ if not explicit_dtype:
2518
+ raise TypeError("arange requires a dtype argument")
2519
+
2520
+ from math import ceil
2521
+ size = int(ceil((stop-start)/step))
2522
+
2523
+ result = Array(queue, (size,), dtype, allocator=inf.allocator)
2524
+ result.add_event(_arange_knl(result, start, step, queue=queue))
2525
+
2526
+ # }}}
2527
+
2528
+ return result
2529
+
2530
+ # }}}
2531
+
2532
+
2533
+ # {{{ take/put/concatenate/diff/(h?stack)
2534
+
2535
+ @elwise_kernel_runner
2536
+ def _take(result, ary, indices):
2537
+ return elementwise.get_take_kernel(
2538
+ result.context, result.dtype, indices.dtype)
2539
+
2540
+
2541
+ def take(a, indices, out=None, queue=None, wait_for=None):
2542
+ """Return the :class:`Array` ``[a[indices[0]], ..., a[indices[n]]]``.
2543
+ For the moment, *a* must be a type that can be bound to a texture.
2544
+ """
2545
+
2546
+ queue = queue or a.queue
2547
+ if out is None:
2548
+ out = type(a)(queue, indices.shape, a.dtype, allocator=a.allocator)
2549
+
2550
+ assert len(indices.shape) == 1
2551
+ out.add_event(
2552
+ _take(out, a, indices, queue=queue, wait_for=wait_for))
2553
+ return out
2554
+
2555
+
2556
+ def multi_take(arrays, indices, out=None, queue=None):
2557
+ if not len(arrays):
2558
+ return []
2559
+
2560
+ assert len(indices.shape) == 1
2561
+
2562
+ from pytools import single_valued
2563
+ a_dtype = single_valued(a.dtype for a in arrays)
2564
+ a_allocator = arrays[0].dtype
2565
+ context = indices.context
2566
+ queue = queue or indices.queue
2567
+
2568
+ vec_count = len(arrays)
2569
+
2570
+ if out is None:
2571
+ out = [
2572
+ type(arrays[i])(
2573
+ context, queue, indices.shape, a_dtype,
2574
+ allocator=a_allocator)
2575
+ for i in range(vec_count)]
2576
+ else:
2577
+ if len(out) != len(arrays):
2578
+ raise ValueError("out and arrays must have the same length")
2579
+
2580
+ chunk_size = builtins.min(vec_count, 10)
2581
+
2582
+ def make_func_for_chunk_size(chunk_size):
2583
+ knl = elementwise.get_take_kernel(
2584
+ indices.context, a_dtype, indices.dtype,
2585
+ vec_count=chunk_size)
2586
+ knl.set_block_shape(*indices._block)
2587
+ return knl
2588
+
2589
+ knl = make_func_for_chunk_size(chunk_size)
2590
+
2591
+ for start_i in range(0, len(arrays), chunk_size):
2592
+ chunk_slice = slice(start_i, start_i+chunk_size)
2593
+
2594
+ if start_i + chunk_size > vec_count:
2595
+ knl = make_func_for_chunk_size(vec_count-start_i)
2596
+
2597
+ gs, ls = indices._get_sizes(queue,
2598
+ knl.get_work_group_info(
2599
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2600
+ queue.device))
2601
+
2602
+ wait_for_this = (
2603
+ *indices.events,
2604
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2605
+ *[evt for o in out[chunk_slice] for evt in o.events])
2606
+ evt = knl(queue, gs, ls,
2607
+ indices.data,
2608
+ *[o.data for o in out[chunk_slice]],
2609
+ *[i.data for i in arrays[chunk_slice]],
2610
+ *[indices.size],
2611
+ wait_for=wait_for_this)
2612
+ for o in out[chunk_slice]:
2613
+ o.add_event(evt)
2614
+
2615
+ return out
2616
+
2617
+
2618
+ def multi_take_put(arrays, dest_indices, src_indices, dest_shape=None,
2619
+ out=None, queue=None, src_offsets=None):
2620
+ if not len(arrays):
2621
+ return []
2622
+
2623
+ from pytools import single_valued
2624
+ a_dtype = single_valued(a.dtype for a in arrays)
2625
+ a_allocator = arrays[0].allocator
2626
+ context = src_indices.context
2627
+ queue = queue or src_indices.queue
2628
+
2629
+ vec_count = len(arrays)
2630
+
2631
+ if out is None:
2632
+ out = [type(arrays[i])(queue, dest_shape, a_dtype, allocator=a_allocator)
2633
+ for i in range(vec_count)]
2634
+ else:
2635
+ if a_dtype != single_valued(o.dtype for o in out):
2636
+ raise TypeError("arrays and out must have the same dtype")
2637
+ if len(out) != vec_count:
2638
+ raise ValueError("out and arrays must have the same length")
2639
+
2640
+ if src_indices.dtype != dest_indices.dtype:
2641
+ raise TypeError(
2642
+ "src_indices and dest_indices must have the same dtype")
2643
+
2644
+ if len(src_indices.shape) != 1:
2645
+ raise ValueError("src_indices must be 1D")
2646
+
2647
+ if src_indices.shape != dest_indices.shape:
2648
+ raise ValueError(
2649
+ "src_indices and dest_indices must have the same shape")
2650
+
2651
+ if src_offsets is None:
2652
+ src_offsets_list = []
2653
+ else:
2654
+ src_offsets_list = src_offsets
2655
+ if len(src_offsets) != vec_count:
2656
+ raise ValueError(
2657
+ "src_indices and src_offsets must have the same length")
2658
+
2659
+ max_chunk_size = 10
2660
+
2661
+ chunk_size = builtins.min(vec_count, max_chunk_size)
2662
+
2663
+ def make_func_for_chunk_size(chunk_size):
2664
+ return elementwise.get_take_put_kernel(context,
2665
+ a_dtype, src_indices.dtype,
2666
+ with_offsets=src_offsets is not None,
2667
+ vec_count=chunk_size)
2668
+
2669
+ knl = make_func_for_chunk_size(chunk_size)
2670
+
2671
+ for start_i in range(0, len(arrays), chunk_size):
2672
+ chunk_slice = slice(start_i, start_i+chunk_size)
2673
+
2674
+ if start_i + chunk_size > vec_count:
2675
+ knl = make_func_for_chunk_size(vec_count-start_i)
2676
+
2677
+ gs, ls = src_indices._get_sizes(queue,
2678
+ knl.get_work_group_info(
2679
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2680
+ queue.device))
2681
+
2682
+ wait_for_this = (
2683
+ *dest_indices.events,
2684
+ *src_indices.events,
2685
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2686
+ *[evt for o in out[chunk_slice] for evt in o.events])
2687
+ evt = knl(queue, gs, ls,
2688
+ *out[chunk_slice],
2689
+ dest_indices,
2690
+ src_indices,
2691
+ *arrays[chunk_slice],
2692
+ *src_offsets_list[chunk_slice],
2693
+ src_indices.size,
2694
+ wait_for=wait_for_this)
2695
+ for o in out[chunk_slice]:
2696
+ o.add_event(evt)
2697
+
2698
+ return out
2699
+
2700
+
2701
+ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None,
2702
+ wait_for=None):
2703
+ if not len(arrays):
2704
+ return []
2705
+
2706
+ from pytools import single_valued
2707
+ a_dtype = single_valued(a.dtype for a in arrays)
2708
+ a_allocator = arrays[0].allocator
2709
+ context = dest_indices.context
2710
+ queue = queue or dest_indices.queue
2711
+ if wait_for is None:
2712
+ wait_for = []
2713
+ wait_for = wait_for + dest_indices.events
2714
+
2715
+ vec_count = len(arrays)
2716
+
2717
+ if out is None:
2718
+ out = [type(arrays[i])(queue, dest_shape, a_dtype, allocator=a_allocator)
2719
+ for i in range(vec_count)]
2720
+ else:
2721
+ if a_dtype != single_valued(o.dtype for o in out):
2722
+ raise TypeError("arrays and out must have the same dtype")
2723
+ if len(out) != vec_count:
2724
+ raise ValueError("out and arrays must have the same length")
2725
+
2726
+ if len(dest_indices.shape) != 1:
2727
+ raise ValueError("dest_indices must be 1D")
2728
+
2729
+ chunk_size = builtins.min(vec_count, 10)
2730
+
2731
+ # array of bools to specify whether the array of same index in this chunk
2732
+ # will be filled with a single value.
2733
+ use_fill = np.ndarray((chunk_size,), dtype=np.uint8)
2734
+ array_lengths = np.ndarray((chunk_size,), dtype=np.int64)
2735
+
2736
+ def make_func_for_chunk_size(chunk_size):
2737
+ knl = elementwise.get_put_kernel(
2738
+ context, a_dtype, dest_indices.dtype,
2739
+ vec_count=chunk_size)
2740
+ return knl
2741
+
2742
+ knl = make_func_for_chunk_size(chunk_size)
2743
+
2744
+ for start_i in range(0, len(arrays), chunk_size):
2745
+ chunk_slice = slice(start_i, start_i+chunk_size)
2746
+ for fill_idx, ary in enumerate(arrays[chunk_slice]):
2747
+ # If there is only one value in the values array for this src array
2748
+ # in the chunk then fill every index in `dest_idx` array with it.
2749
+ use_fill[fill_idx] = 1 if ary.size == 1 else 0
2750
+ array_lengths[fill_idx] = len(ary)
2751
+ # Copy the populated `use_fill` array to a buffer on the device.
2752
+ use_fill_cla = to_device(queue, use_fill)
2753
+ array_lengths_cla = to_device(queue, array_lengths)
2754
+
2755
+ if start_i + chunk_size > vec_count:
2756
+ knl = make_func_for_chunk_size(vec_count-start_i)
2757
+
2758
+ gs, ls = dest_indices._get_sizes(queue,
2759
+ knl.get_work_group_info(
2760
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2761
+ queue.device))
2762
+
2763
+ wait_for_this = (
2764
+ *wait_for,
2765
+ *[evt for i in arrays[chunk_slice] for evt in i.events],
2766
+ *[evt for o in out[chunk_slice] for evt in o.events])
2767
+ evt = knl(queue, gs, ls,
2768
+ *out[chunk_slice],
2769
+ dest_indices,
2770
+ *arrays[chunk_slice],
2771
+ use_fill_cla, array_lengths_cla, dest_indices.size,
2772
+ wait_for=wait_for_this)
2773
+
2774
+ for o in out[chunk_slice]:
2775
+ o.add_event(evt)
2776
+
2777
+ return out
2778
+
2779
+
2780
+ def concatenate(arrays, axis=0, queue=None, allocator=None):
2781
+ """
2782
+ .. versionadded:: 2013.1
2783
+
2784
+ .. note::
2785
+
2786
+ The returned array is of the same type as the first array in the list.
2787
+ """
2788
+ if not arrays:
2789
+ raise ValueError("need at least one array to concatenate")
2790
+
2791
+ # {{{ find properties of result array
2792
+
2793
+ shape = None
2794
+
2795
+ for i_ary, ary in enumerate(arrays):
2796
+ queue = queue or ary.queue
2797
+ allocator = allocator or ary.allocator
2798
+
2799
+ if shape is None:
2800
+ # first array
2801
+ shape = list(ary.shape)
2802
+ else:
2803
+ if len(ary.shape) != len(shape):
2804
+ raise ValueError(
2805
+ f"{i_ary}-th array has different number of axes: "
2806
+ f"expected {len(ary.shape)}, got {len(shape)})")
2807
+
2808
+ ary_shape_list = list(ary.shape)
2809
+ if (ary_shape_list[:axis] != shape[:axis]
2810
+ or ary_shape_list[axis+1:] != shape[axis+1:]):
2811
+ raise ValueError(
2812
+ f"{i_ary}-th array has residual not matching other arrays")
2813
+
2814
+ # pylint: disable=unsupported-assignment-operation
2815
+ shape[axis] += ary.shape[axis]
2816
+
2817
+ # }}}
2818
+
2819
+ shape = tuple(shape)
2820
+ dtype = np.result_type(*[ary.dtype for ary in arrays])
2821
+
2822
+ if __debug__:
2823
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2824
+ for ary in arrays[1:]):
2825
+ warn("Elements of 'arrays' not of the same type, returning "
2826
+ "an instance of the type of arrays[0]",
2827
+ stacklevel=2)
2828
+
2829
+ result = arrays[0].__class__(queue, shape, dtype, allocator=allocator)
2830
+
2831
+ full_slice = (slice(None),) * len(shape)
2832
+
2833
+ base_idx = 0
2834
+ for ary in arrays:
2835
+ my_len = ary.shape[axis]
2836
+ result.setitem(
2837
+ full_slice[:axis]
2838
+ + (slice(base_idx, base_idx+my_len),)
2839
+ + full_slice[axis+1:],
2840
+ ary)
2841
+
2842
+ base_idx += my_len
2843
+
2844
+ return result
2845
+
2846
+
2847
+ @elwise_kernel_runner
2848
+ def _diff(result, array):
2849
+ return elementwise.get_diff_kernel(array.context, array.dtype)
2850
+
2851
+
2852
+ def diff(array, queue=None, allocator=None):
2853
+ """
2854
+ .. versionadded:: 2013.2
2855
+ """
2856
+
2857
+ if len(array.shape) != 1:
2858
+ raise ValueError("multi-D arrays are not supported")
2859
+
2860
+ n, = array.shape
2861
+
2862
+ queue = queue or array.queue
2863
+ allocator = allocator or array.allocator
2864
+
2865
+ result = array.__class__(queue, (n-1,), array.dtype, allocator=allocator)
2866
+ event1 = _diff(result, array, queue=queue)
2867
+ result.add_event(event1)
2868
+ return result
2869
+
2870
+
2871
+ def hstack(arrays, queue=None):
2872
+ if len(arrays) == 0:
2873
+ raise ValueError("need at least one array to hstack")
2874
+
2875
+ if queue is None:
2876
+ for ary in arrays:
2877
+ if ary.queue is not None:
2878
+ queue = ary.queue
2879
+ break
2880
+
2881
+ from pytools import all_equal, single_valued
2882
+ if not all_equal(len(ary.shape) for ary in arrays):
2883
+ raise ValueError("arguments must all have the same number of axes")
2884
+
2885
+ lead_shape = single_valued(ary.shape[:-1] for ary in arrays)
2886
+
2887
+ w = builtins.sum(ary.shape[-1] for ary in arrays)
2888
+
2889
+ if __debug__:
2890
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2891
+ for ary in arrays[1:]):
2892
+ warn("Elements of 'arrays' not of the same type, returning "
2893
+ "an instance of the type of arrays[0]",
2894
+ stacklevel=2)
2895
+
2896
+ result = arrays[0].__class__(queue, (*lead_shape, w), arrays[0].dtype,
2897
+ allocator=arrays[0].allocator)
2898
+ index = 0
2899
+ for ary in arrays:
2900
+ result[..., index:index+ary.shape[-1]] = ary
2901
+ index += ary.shape[-1]
2902
+
2903
+ return result
2904
+
2905
+
2906
+ def stack(arrays, axis=0, queue=None):
2907
+ """
2908
+ Join a sequence of arrays along a new axis.
2909
+
2910
+ :arg arrays: A sequence of :class:`Array`.
2911
+ :arg axis: Index of the dimension of the new axis in the result array.
2912
+ Can be -1, for the new axis to be last dimension.
2913
+
2914
+ :returns: :class:`Array`
2915
+ """
2916
+ if not arrays:
2917
+ raise ValueError("need at least one array to stack")
2918
+
2919
+ input_shape = arrays[0].shape
2920
+ input_ndim = arrays[0].ndim
2921
+ axis = input_ndim if axis == -1 else axis
2922
+
2923
+ if queue is None:
2924
+ for ary in arrays:
2925
+ if ary.queue is not None:
2926
+ queue = ary.queue
2927
+ break
2928
+
2929
+ if not builtins.all(ary.shape == input_shape for ary in arrays[1:]):
2930
+ raise ValueError("arrays must have the same shape")
2931
+
2932
+ if not (0 <= axis <= input_ndim):
2933
+ raise ValueError("invalid axis")
2934
+
2935
+ if (axis == 0 and not builtins.all(
2936
+ ary.flags.c_contiguous for ary in arrays)):
2937
+ # pyopencl.Array.__setitem__ does not support non-contiguous assignments
2938
+ raise NotImplementedError
2939
+
2940
+ if (axis == input_ndim and not builtins.all(
2941
+ ary.flags.f_contiguous for ary in arrays)):
2942
+ # pyopencl.Array.__setitem__ does not support non-contiguous assignments
2943
+ raise NotImplementedError
2944
+
2945
+ result_shape = input_shape[:axis] + (len(arrays),) + input_shape[axis:]
2946
+
2947
+ if __debug__:
2948
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2949
+ for ary in arrays[1:]):
2950
+ warn("Elements of 'arrays' not of the same type, returning "
2951
+ "an instance of the type of arrays[0]",
2952
+ stacklevel=2)
2953
+
2954
+ result = arrays[0].__class__(queue, result_shape,
2955
+ np.result_type(*(ary.dtype
2956
+ for ary in arrays)),
2957
+ # TODO: reconsider once arrays support
2958
+ # non-contiguous assignments
2959
+ order="C" if axis == 0 else "F",
2960
+ allocator=arrays[0].allocator)
2961
+ for i, ary in enumerate(arrays):
2962
+ idx = (slice(None),)*axis + (i,) + (slice(None),)*(input_ndim-axis)
2963
+ result[idx] = ary
2964
+
2965
+ return result
2966
+
2967
+ # }}}
2968
+
2969
+
2970
+ # {{{ shape manipulation
2971
+
2972
+ def transpose(a, axes=None):
2973
+ """Permute the dimensions of an array.
2974
+
2975
+ :arg a: :class:`Array`
2976
+ :arg axes: list of ints, optional.
2977
+ By default, reverse the dimensions, otherwise permute the axes
2978
+ according to the values given.
2979
+
2980
+ :returns: :class:`Array` A view of the array with its axes permuted.
2981
+ """
2982
+ return a.transpose(axes)
2983
+
2984
+
2985
+ def reshape(a, shape):
2986
+ """Gives a new shape to an array without changing its data.
2987
+
2988
+ .. versionadded:: 2015.2
2989
+ """
2990
+
2991
+ return a.reshape(shape)
2992
+
2993
+ # }}}
2994
+
2995
+
2996
+ # {{{ conditionals
2997
+
2998
+ @elwise_kernel_runner
2999
+ def _if_positive(result, criterion, then_, else_):
3000
+ return elementwise.get_if_positive_kernel(
3001
+ result.context, criterion.dtype, then_.dtype,
3002
+ is_then_array=isinstance(then_, Array),
3003
+ is_else_array=isinstance(else_, Array),
3004
+ is_then_scalar=then_.shape == (),
3005
+ is_else_scalar=else_.shape == (),
3006
+ )
3007
+
3008
+
3009
+ def if_positive(criterion, then_, else_, out=None, queue=None):
3010
+ """Return an array like *then_*, which, for the element at index *i*,
3011
+ contains *then_[i]* if *criterion[i]>0*, else *else_[i]*.
3012
+ """
3013
+
3014
+ is_then_scalar = isinstance(then_, SCALAR_CLASSES)
3015
+ is_else_scalar = isinstance(else_, SCALAR_CLASSES)
3016
+ if isinstance(criterion, SCALAR_CLASSES) and is_then_scalar and is_else_scalar:
3017
+ result = np.where(criterion, then_, else_)
3018
+
3019
+ if out is not None:
3020
+ out[...] = result
3021
+ return out
3022
+
3023
+ return result
3024
+
3025
+ if is_then_scalar:
3026
+ then_ = np.array(then_)
3027
+
3028
+ if is_else_scalar:
3029
+ else_ = np.array(else_)
3030
+
3031
+ if then_.dtype != else_.dtype:
3032
+ raise ValueError(
3033
+ f"dtypes do not match: then_ is '{then_.dtype}' and "
3034
+ f"else_ is '{else_.dtype}'")
3035
+
3036
+ if then_.shape == () and else_.shape == ():
3037
+ pass
3038
+ elif then_.shape != () and else_.shape != ():
3039
+ if not (criterion.shape == then_.shape == else_.shape):
3040
+ raise ValueError(
3041
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3042
+ f", 'then_' has shape {then_.shape} and 'else_' has shape "
3043
+ f"{else_.shape}")
3044
+ elif then_.shape == ():
3045
+ if criterion.shape != else_.shape:
3046
+ raise ValueError(
3047
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3048
+ f" and 'else_' has shape {else_.shape}")
3049
+ elif else_.shape == ():
3050
+ if criterion.shape != then_.shape:
3051
+ raise ValueError(
3052
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3053
+ f" and 'then_' has shape {then_.shape}")
3054
+ else:
3055
+ raise AssertionError()
3056
+
3057
+ if out is None:
3058
+ if then_.shape != ():
3059
+ out = empty_like(
3060
+ then_, criterion.queue, allocator=criterion.allocator)
3061
+ else:
3062
+ # Use same strides as criterion
3063
+ cr_byte_strides = np.array(criterion.strides, dtype=np.int64)
3064
+ cr_item_strides = cr_byte_strides // criterion.dtype.itemsize
3065
+ out_strides = tuple(cr_item_strides*then_.dtype.itemsize)
3066
+
3067
+ out = type(criterion)(
3068
+ criterion.queue, criterion.shape, then_.dtype,
3069
+ allocator=criterion.allocator,
3070
+ strides=out_strides)
3071
+
3072
+ event1 = _if_positive(out, criterion, then_, else_, queue=queue)
3073
+ out.add_event(event1)
3074
+
3075
+ return out
3076
+
3077
+ # }}}
3078
+
3079
+
3080
+ # {{{ minimum/maximum
3081
+
3082
+ @elwise_kernel_runner
3083
+ def _minimum_maximum_backend(out, a, b, minmax):
3084
+ from pyopencl.elementwise import get_minmaximum_kernel
3085
+ return get_minmaximum_kernel(out.context, minmax,
3086
+ out.dtype,
3087
+ a.dtype if isinstance(a, Array) else np.dtype(type(a)),
3088
+ b.dtype if isinstance(b, Array) else np.dtype(type(b)),
3089
+ elementwise.get_argument_kind(a),
3090
+ elementwise.get_argument_kind(b))
3091
+
3092
+
3093
+ def maximum(a, b, out=None, queue=None):
3094
+ """Return the elementwise maximum of *a* and *b*."""
3095
+
3096
+ a_is_scalar = np.isscalar(a)
3097
+ b_is_scalar = np.isscalar(b)
3098
+ if a_is_scalar and b_is_scalar:
3099
+ result = np.maximum(a, b)
3100
+ if out is not None:
3101
+ out[...] = result
3102
+ return out
3103
+
3104
+ return result
3105
+
3106
+ queue = queue or a.queue or b.queue
3107
+
3108
+ if out is None:
3109
+ out_dtype = _get_common_dtype(a, b, queue)
3110
+ if not a_is_scalar:
3111
+ out = a._new_like_me(out_dtype, queue)
3112
+ elif not b_is_scalar:
3113
+ out = b._new_like_me(out_dtype, queue)
3114
+
3115
+ out.add_event(_minimum_maximum_backend(out, a, b, queue=queue, minmax="max"))
3116
+
3117
+ return out
3118
+
3119
+
3120
+ def minimum(a, b, out=None, queue=None):
3121
+ """Return the elementwise minimum of *a* and *b*."""
3122
+ a_is_scalar = np.isscalar(a)
3123
+ b_is_scalar = np.isscalar(b)
3124
+ if a_is_scalar and b_is_scalar:
3125
+ result = np.minimum(a, b)
3126
+ if out is not None:
3127
+ out[...] = result
3128
+ return out
3129
+
3130
+ return result
3131
+
3132
+ queue = queue or a.queue or b.queue
3133
+
3134
+ if out is None:
3135
+ out_dtype = _get_common_dtype(a, b, queue)
3136
+ if not a_is_scalar:
3137
+ out = a._new_like_me(out_dtype, queue)
3138
+ elif not b_is_scalar:
3139
+ out = b._new_like_me(out_dtype, queue)
3140
+
3141
+ out.add_event(_minimum_maximum_backend(out, a, b, queue=queue, minmax="min"))
3142
+
3143
+ return out
3144
+
3145
+ # }}}
3146
+
3147
+
3148
+ # {{{ logical ops
3149
+
3150
+ def _logical_op(x1, x2, out, operator, queue=None):
3151
+ # NOTE: Copied from pycuda.gpuarray
3152
+ assert operator in ["&&", "||"]
3153
+
3154
+ if np.isscalar(x1) and np.isscalar(x2):
3155
+ if out is None:
3156
+ out = empty(queue, shape=(), dtype=np.int8)
3157
+
3158
+ if operator == "&&":
3159
+ out[:] = np.logical_and(x1, x2)
3160
+ else:
3161
+ out[:] = np.logical_or(x1, x2)
3162
+ elif np.isscalar(x1) or np.isscalar(x2):
3163
+ scalar_arg, = (x for x in (x1, x2) if np.isscalar(x))
3164
+ ary_arg, = (x for x in (x1, x2) if not np.isscalar(x))
3165
+ queue = queue or ary_arg.queue
3166
+ allocator = ary_arg.allocator
3167
+
3168
+ if not isinstance(ary_arg, Array):
3169
+ raise ValueError("logical_and can take either scalar or Array"
3170
+ " as inputs")
3171
+
3172
+ out = out or ary_arg._new_like_me(dtype=np.int8)
3173
+
3174
+ assert out.shape == ary_arg.shape and out.dtype == np.int8
3175
+
3176
+ knl = elementwise.get_array_scalar_binop_kernel(
3177
+ queue.context,
3178
+ operator,
3179
+ out.dtype,
3180
+ ary_arg.dtype,
3181
+ np.dtype(type(scalar_arg))
3182
+ )
3183
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, ary_arg, scalar_arg)
3184
+ else:
3185
+ if not (isinstance(x1, Array) and isinstance(x2, Array)):
3186
+ raise ValueError("logical_or/logical_and can take either scalar"
3187
+ " or Arrays as inputs")
3188
+ if x1.shape != x2.shape:
3189
+ raise NotImplementedError("Broadcasting not supported")
3190
+
3191
+ queue = queue or x1.queue or x2.queue
3192
+ allocator = x1.allocator or x2.allocator
3193
+
3194
+ if out is None:
3195
+ out = empty(queue, allocator=allocator,
3196
+ shape=x1.shape, dtype=np.int8)
3197
+
3198
+ assert out.shape == x1.shape and out.dtype == np.int8
3199
+
3200
+ knl = elementwise.get_array_binop_kernel(
3201
+ queue.context,
3202
+ operator,
3203
+ out.dtype,
3204
+ x1.dtype, x2.dtype)
3205
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, x1, x2)
3206
+
3207
+ return out
3208
+
3209
+
3210
+ def logical_and(x1, x2, /, out=None, queue=None):
3211
+ """
3212
+ Returns the element-wise logical AND of *x1* and *x2*.
3213
+ """
3214
+ return _logical_op(x1, x2, out, "&&", queue=queue)
3215
+
3216
+
3217
+ def logical_or(x1, x2, /, out=None, queue=None):
3218
+ """
3219
+ Returns the element-wise logical OR of *x1* and *x2*.
3220
+ """
3221
+ return _logical_op(x1, x2, out, "||", queue=queue)
3222
+
3223
+
3224
+ def logical_not(x, /, out=None, queue=None):
3225
+ """
3226
+ Returns the element-wise logical NOT of *x*.
3227
+ """
3228
+ if np.isscalar(x):
3229
+ out = out or empty(queue, shape=(), dtype=np.int8)
3230
+ out[:] = np.logical_not(x)
3231
+ else:
3232
+ queue = queue or x.queue
3233
+ out = out or empty(queue, shape=x.shape, dtype=np.int8,
3234
+ allocator=x.allocator)
3235
+ knl = elementwise.get_logical_not_kernel(queue.context,
3236
+ x.dtype)
3237
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, x)
3238
+
3239
+ return out
3240
+
3241
+ # }}}
3242
+
3243
+
3244
+ # {{{ reductions
3245
+
3246
+ def sum(a, dtype=None, queue=None, slice=None, initial=np._NoValue):
3247
+ """
3248
+ .. versionadded:: 2011.1
3249
+ """
3250
+ if initial is not np._NoValue and not isinstance(initial, SCALAR_CLASSES):
3251
+ raise ValueError("'initial' is not a scalar")
3252
+
3253
+ if dtype is not None:
3254
+ dtype = np.dtype(dtype)
3255
+
3256
+ from pyopencl.reduction import get_sum_kernel
3257
+ krnl = get_sum_kernel(a.context, dtype, a.dtype)
3258
+ result, event1 = krnl(a, queue=queue, slice=slice, wait_for=a.events,
3259
+ return_event=True)
3260
+ result.add_event(event1)
3261
+
3262
+ # NOTE: neutral element in `get_sum_kernel` is 0 by default
3263
+ if initial is not np._NoValue:
3264
+ result += a.dtype.type(initial)
3265
+
3266
+ return result
3267
+
3268
+
3269
+ def any(a, queue=None, wait_for=None):
3270
+ if len(a) == 0:
3271
+ return _BOOL_DTYPE.type(False)
3272
+
3273
+ return a.any(queue=queue, wait_for=wait_for)
3274
+
3275
+
3276
+ def all(a, queue=None, wait_for=None):
3277
+ if len(a) == 0:
3278
+ return _BOOL_DTYPE.type(True)
3279
+
3280
+ return a.all(queue=queue, wait_for=wait_for)
3281
+
3282
+
3283
+ def dot(a, b, dtype=None, queue=None, slice=None):
3284
+ """
3285
+ .. versionadded:: 2011.1
3286
+ """
3287
+ if dtype is not None:
3288
+ dtype = np.dtype(dtype)
3289
+
3290
+ from pyopencl.reduction import get_dot_kernel
3291
+ krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype)
3292
+
3293
+ result, event1 = krnl(a, b, queue=queue, slice=slice,
3294
+ wait_for=a.events + b.events, return_event=True)
3295
+ result.add_event(event1)
3296
+
3297
+ return result
3298
+
3299
+
3300
+ def vdot(a, b, dtype=None, queue=None, slice=None):
3301
+ """Like :func:`numpy.vdot`.
3302
+
3303
+ .. versionadded:: 2013.1
3304
+ """
3305
+ if dtype is not None:
3306
+ dtype = np.dtype(dtype)
3307
+
3308
+ from pyopencl.reduction import get_dot_kernel
3309
+ krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype,
3310
+ conjugate_first=True)
3311
+
3312
+ result, event1 = krnl(a, b, queue=queue, slice=slice,
3313
+ wait_for=a.events + b.events, return_event=True)
3314
+ result.add_event(event1)
3315
+
3316
+ return result
3317
+
3318
+
3319
+ def subset_dot(subset, a, b, dtype=None, queue=None, slice=None):
3320
+ """
3321
+ .. versionadded:: 2011.1
3322
+ """
3323
+ if dtype is not None:
3324
+ dtype = np.dtype(dtype)
3325
+
3326
+ from pyopencl.reduction import get_subset_dot_kernel
3327
+ krnl = get_subset_dot_kernel(
3328
+ a.context, dtype, subset.dtype, a.dtype, b.dtype)
3329
+
3330
+ result, event1 = krnl(subset, a, b, queue=queue, slice=slice,
3331
+ wait_for=subset.events + a.events + b.events, return_event=True)
3332
+ result.add_event(event1)
3333
+
3334
+ return result
3335
+
3336
+
3337
+ def _make_minmax_kernel(what):
3338
+ def f(a, queue=None, initial=np._NoValue):
3339
+ if isinstance(a, SCALAR_CLASSES):
3340
+ return np.array(a).dtype.type(a)
3341
+
3342
+ if len(a) == 0:
3343
+ if initial is np._NoValue:
3344
+ raise ValueError(
3345
+ f"zero-size array to reduction '{what}' "
3346
+ "which has no identity")
3347
+ else:
3348
+ return initial
3349
+
3350
+ if initial is not np._NoValue and not isinstance(initial, SCALAR_CLASSES):
3351
+ raise ValueError("'initial' is not a scalar")
3352
+
3353
+ from pyopencl.reduction import get_minmax_kernel
3354
+ krnl = get_minmax_kernel(a.context, what, a.dtype)
3355
+ result, event1 = krnl(a, queue=queue, wait_for=a.events,
3356
+ return_event=True)
3357
+ result.add_event(event1)
3358
+
3359
+ if initial is not np._NoValue:
3360
+ initial = a.dtype.type(initial)
3361
+ if what == "min":
3362
+ result = minimum(result, initial, queue=queue)
3363
+ elif what == "max":
3364
+ result = maximum(result, initial, queue=queue)
3365
+ else:
3366
+ raise ValueError(f"unknown minmax reduction type: '{what}'")
3367
+
3368
+ return result
3369
+
3370
+ return f
3371
+
3372
+
3373
+ min = _make_minmax_kernel("min")
3374
+ min.__name__ = "min"
3375
+ min.__doc__ = """
3376
+ .. versionadded:: 2011.1
3377
+ """
3378
+
3379
+ max = _make_minmax_kernel("max")
3380
+ max.__name__ = "max"
3381
+ max.__doc__ = """
3382
+ .. versionadded:: 2011.1
3383
+ """
3384
+
3385
+
3386
+ def _make_subset_minmax_kernel(what):
3387
+ def f(subset, a, queue=None, slice=None):
3388
+ from pyopencl.reduction import get_subset_minmax_kernel
3389
+ krnl = get_subset_minmax_kernel(a.context, what, a.dtype, subset.dtype)
3390
+ result, event1 = krnl(subset, a, queue=queue, slice=slice,
3391
+ wait_for=a.events + subset.events, return_event=True)
3392
+ result.add_event(event1)
3393
+ return result
3394
+ return f
3395
+
3396
+
3397
+ subset_min = _make_subset_minmax_kernel("min")
3398
+ subset_min.__doc__ = """.. versionadded:: 2011.1"""
3399
+ subset_max = _make_subset_minmax_kernel("max")
3400
+ subset_max.__doc__ = """.. versionadded:: 2011.1"""
3401
+
3402
+ # }}}
3403
+
3404
+
3405
+ # {{{ scans
3406
+
3407
+ def cumsum(a, output_dtype=None, queue=None,
3408
+ wait_for=None, return_event=False):
3409
+ # undocumented for now
3410
+
3411
+ """
3412
+ .. versionadded:: 2013.1
3413
+ """
3414
+
3415
+ if output_dtype is None:
3416
+ output_dtype = a.dtype
3417
+ else:
3418
+ output_dtype = np.dtype(output_dtype)
3419
+
3420
+ if wait_for is None:
3421
+ wait_for = []
3422
+
3423
+ result = a._new_like_me(output_dtype)
3424
+
3425
+ from pyopencl.scan import get_cumsum_kernel
3426
+ krnl = get_cumsum_kernel(a.context, a.dtype, output_dtype)
3427
+ evt = krnl(a, result, queue=queue, wait_for=wait_for + a.events)
3428
+ result.add_event(evt)
3429
+
3430
+ if return_event:
3431
+ return evt, result
3432
+ else:
3433
+ return result
3434
+
3435
+ # }}}
3436
+
3437
+ # vim: foldmethod=marker