pyopencl 2024.2__cp312-cp312-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (122) hide show
  1. pyopencl/__init__.py +2393 -0
  2. pyopencl/_cl.cpython-312-darwin.so +0 -0
  3. pyopencl/_cluda.py +54 -0
  4. pyopencl/_mymako.py +14 -0
  5. pyopencl/algorithm.py +1444 -0
  6. pyopencl/array.py +3427 -0
  7. pyopencl/bitonic_sort.py +238 -0
  8. pyopencl/bitonic_sort_templates.py +594 -0
  9. pyopencl/cache.py +534 -0
  10. pyopencl/capture_call.py +176 -0
  11. pyopencl/characterize/__init__.py +433 -0
  12. pyopencl/characterize/performance.py +237 -0
  13. pyopencl/cl/pyopencl-airy.cl +324 -0
  14. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  15. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  16. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  17. pyopencl/cl/pyopencl-complex.h +303 -0
  18. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  19. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  20. pyopencl/cl/pyopencl-random123/array.h +325 -0
  21. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  22. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  23. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  24. pyopencl/clmath.py +280 -0
  25. pyopencl/clrandom.py +408 -0
  26. pyopencl/cltypes.py +137 -0
  27. pyopencl/compyte/__init__.py +0 -0
  28. pyopencl/compyte/array.py +214 -0
  29. pyopencl/compyte/dtypes.py +290 -0
  30. pyopencl/compyte/ndarray/__init__.py +0 -0
  31. pyopencl/compyte/ndarray/gen_elemwise.py +1907 -0
  32. pyopencl/compyte/ndarray/gen_reduction.py +1511 -0
  33. pyopencl/compyte/ndarray/setup_opencl.py +101 -0
  34. pyopencl/compyte/ndarray/test_gpu_elemwise.py +411 -0
  35. pyopencl/compyte/ndarray/test_gpu_ndarray.py +487 -0
  36. pyopencl/elementwise.py +1164 -0
  37. pyopencl/invoker.py +418 -0
  38. pyopencl/ipython_ext.py +68 -0
  39. pyopencl/reduction.py +780 -0
  40. pyopencl/scan.py +1898 -0
  41. pyopencl/tools.py +1513 -0
  42. pyopencl/version.py +3 -0
  43. pyopencl-2024.2.data/data/CITATION.cff +74 -0
  44. pyopencl-2024.2.data/data/LICENSE +282 -0
  45. pyopencl-2024.2.data/data/Makefile.in +21 -0
  46. pyopencl-2024.2.data/data/README.rst +70 -0
  47. pyopencl-2024.2.data/data/README_SETUP.txt +34 -0
  48. pyopencl-2024.2.data/data/aksetup_helper.py +1013 -0
  49. pyopencl-2024.2.data/data/configure.py +6 -0
  50. pyopencl-2024.2.data/data/contrib/cldis.py +91 -0
  51. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/README +29 -0
  52. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/translate.py +1441 -0
  53. pyopencl-2024.2.data/data/contrib/pyopencl.vim +84 -0
  54. pyopencl-2024.2.data/data/doc/Makefile +23 -0
  55. pyopencl-2024.2.data/data/doc/algorithm.rst +214 -0
  56. pyopencl-2024.2.data/data/doc/array.rst +305 -0
  57. pyopencl-2024.2.data/data/doc/conf.py +26 -0
  58. pyopencl-2024.2.data/data/doc/howto.rst +105 -0
  59. pyopencl-2024.2.data/data/doc/index.rst +137 -0
  60. pyopencl-2024.2.data/data/doc/make_constants.py +561 -0
  61. pyopencl-2024.2.data/data/doc/misc.rst +885 -0
  62. pyopencl-2024.2.data/data/doc/runtime.rst +51 -0
  63. pyopencl-2024.2.data/data/doc/runtime_const.rst +30 -0
  64. pyopencl-2024.2.data/data/doc/runtime_gl.rst +78 -0
  65. pyopencl-2024.2.data/data/doc/runtime_memory.rst +527 -0
  66. pyopencl-2024.2.data/data/doc/runtime_platform.rst +184 -0
  67. pyopencl-2024.2.data/data/doc/runtime_program.rst +364 -0
  68. pyopencl-2024.2.data/data/doc/runtime_queue.rst +182 -0
  69. pyopencl-2024.2.data/data/doc/subst.rst +36 -0
  70. pyopencl-2024.2.data/data/doc/tools.rst +4 -0
  71. pyopencl-2024.2.data/data/doc/types.rst +42 -0
  72. pyopencl-2024.2.data/data/examples/black-hole-accretion.py +2227 -0
  73. pyopencl-2024.2.data/data/examples/demo-struct-reduce.py +75 -0
  74. pyopencl-2024.2.data/data/examples/demo.py +39 -0
  75. pyopencl-2024.2.data/data/examples/demo_array.py +32 -0
  76. pyopencl-2024.2.data/data/examples/demo_array_svm.py +37 -0
  77. pyopencl-2024.2.data/data/examples/demo_elementwise.py +34 -0
  78. pyopencl-2024.2.data/data/examples/demo_elementwise_complex.py +53 -0
  79. pyopencl-2024.2.data/data/examples/demo_mandelbrot.py +183 -0
  80. pyopencl-2024.2.data/data/examples/demo_meta_codepy.py +56 -0
  81. pyopencl-2024.2.data/data/examples/demo_meta_template.py +55 -0
  82. pyopencl-2024.2.data/data/examples/dump-performance.py +38 -0
  83. pyopencl-2024.2.data/data/examples/dump-properties.py +86 -0
  84. pyopencl-2024.2.data/data/examples/gl_interop_demo.py +84 -0
  85. pyopencl-2024.2.data/data/examples/gl_particle_animation.py +218 -0
  86. pyopencl-2024.2.data/data/examples/ipython-demo.ipynb +203 -0
  87. pyopencl-2024.2.data/data/examples/median-filter.py +99 -0
  88. pyopencl-2024.2.data/data/examples/n-body.py +1070 -0
  89. pyopencl-2024.2.data/data/examples/narray.py +37 -0
  90. pyopencl-2024.2.data/data/examples/noisyImage.jpg +0 -0
  91. pyopencl-2024.2.data/data/examples/pi-monte-carlo.py +1166 -0
  92. pyopencl-2024.2.data/data/examples/svm.py +82 -0
  93. pyopencl-2024.2.data/data/examples/transpose.py +229 -0
  94. pyopencl-2024.2.data/data/pytest.ini +3 -0
  95. pyopencl-2024.2.data/data/src/bitlog.cpp +51 -0
  96. pyopencl-2024.2.data/data/src/bitlog.hpp +83 -0
  97. pyopencl-2024.2.data/data/src/clinfo_ext.h +134 -0
  98. pyopencl-2024.2.data/data/src/mempool.hpp +444 -0
  99. pyopencl-2024.2.data/data/src/pyopencl_ext.h +77 -0
  100. pyopencl-2024.2.data/data/src/tools.hpp +90 -0
  101. pyopencl-2024.2.data/data/src/wrap_cl.cpp +61 -0
  102. pyopencl-2024.2.data/data/src/wrap_cl.hpp +5853 -0
  103. pyopencl-2024.2.data/data/src/wrap_cl_part_1.cpp +369 -0
  104. pyopencl-2024.2.data/data/src/wrap_cl_part_2.cpp +702 -0
  105. pyopencl-2024.2.data/data/src/wrap_constants.cpp +1274 -0
  106. pyopencl-2024.2.data/data/src/wrap_helpers.hpp +213 -0
  107. pyopencl-2024.2.data/data/src/wrap_mempool.cpp +731 -0
  108. pyopencl-2024.2.data/data/test/add-vectors-32.spv +0 -0
  109. pyopencl-2024.2.data/data/test/add-vectors-64.spv +0 -0
  110. pyopencl-2024.2.data/data/test/empty-header.h +1 -0
  111. pyopencl-2024.2.data/data/test/test_algorithm.py +1180 -0
  112. pyopencl-2024.2.data/data/test/test_array.py +2392 -0
  113. pyopencl-2024.2.data/data/test/test_arrays_in_structs.py +100 -0
  114. pyopencl-2024.2.data/data/test/test_clmath.py +529 -0
  115. pyopencl-2024.2.data/data/test/test_clrandom.py +75 -0
  116. pyopencl-2024.2.data/data/test/test_enqueue_copy.py +271 -0
  117. pyopencl-2024.2.data/data/test/test_wrapper.py +1554 -0
  118. pyopencl-2024.2.dist-info/LICENSE +282 -0
  119. pyopencl-2024.2.dist-info/METADATA +105 -0
  120. pyopencl-2024.2.dist-info/RECORD +122 -0
  121. pyopencl-2024.2.dist-info/WHEEL +5 -0
  122. pyopencl-2024.2.dist-info/top_level.txt +1 -0
pyopencl/array.py ADDED
@@ -0,0 +1,3427 @@
1
+ """CL device arrays."""
2
+
3
+ # NOTE: for elwise_kernel_runner which adds keyword arguments
4
+ # pylint:disable=unexpected-keyword-arg
5
+
6
+ __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
7
+
8
+ __license__ = """
9
+ Permission is hereby granted, free of charge, to any person
10
+ obtaining a copy of this software and associated documentation
11
+ files (the "Software"), to deal in the Software without
12
+ restriction, including without limitation the rights to use,
13
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the
15
+ Software is furnished to do so, subject to the following
16
+ conditions:
17
+
18
+ The above copyright notice and this permission notice shall be
19
+ included in all copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28
+ OTHER DEALINGS IN THE SOFTWARE.
29
+ """
30
+
31
+ import builtins
32
+ from dataclasses import dataclass
33
+ from functools import reduce
34
+ from numbers import Number
35
+ from typing import Any, Dict, Hashable, List, Optional, Tuple, Union
36
+ from warnings import warn
37
+
38
+ import numpy as np
39
+
40
+ import pyopencl as cl
41
+ import pyopencl.elementwise as elementwise
42
+ from pyopencl import cltypes
43
+ from pyopencl.characterize import has_double_support
44
+ from pyopencl.compyte.array import (
45
+ ArrayFlags as _ArrayFlags, as_strided as _as_strided,
46
+ c_contiguous_strides as _c_contiguous_strides, equal_strides as _equal_strides,
47
+ f_contiguous_strides as _f_contiguous_strides)
48
+
49
+
50
+ SCALAR_CLASSES = (Number, np.bool_, bool)
51
+
52
+ if cl.get_cl_header_version() >= (2, 0):
53
+ _SVMPointer_or_nothing = cl.SVMPointer
54
+ else:
55
+ _SVMPointer_or_nothing = ()
56
+
57
+
58
+ _NUMPY_PRE_2 = np.__version__.startswith("1.")
59
+
60
+
61
+ # {{{ _get_common_dtype
62
+
63
+ _COMMON_DTYPE_CACHE: Dict[Tuple[Hashable, ...], np.dtype] = {}
64
+
65
+
66
+ class DoubleDowncastWarning(UserWarning):
67
+ pass
68
+
69
+
70
+ _DOUBLE_DOWNCAST_WARNING = (
71
+ "The operation you requested would result in a double-precisision "
72
+ "quantity according to numpy semantics. Since your device does not "
73
+ "support double precision, a single-precision quantity is being returned.")
74
+
75
+
76
+ def _get_common_dtype(obj1, obj2, queue):
77
+ if queue is None:
78
+ raise ValueError("PyOpenCL array has no queue; call .with_queue() to "
79
+ "add one in order to be able to perform operations")
80
+
81
+ allow_double = has_double_support(queue.device)
82
+ cache_key = None
83
+ o1_is_array = isinstance(obj1, Array)
84
+ o2_is_array = isinstance(obj2, Array)
85
+
86
+ if o1_is_array and o2_is_array:
87
+ o1_dtype = obj1.dtype
88
+ o2_dtype = obj2.dtype
89
+ cache_key = (obj1.dtype, obj2.dtype, allow_double)
90
+ else:
91
+ o1_dtype = getattr(obj1, "dtype", type(obj1))
92
+ o2_dtype = getattr(obj2, "dtype", type(obj2))
93
+
94
+ o1_is_integral = np.issubdtype(o1_dtype, np.integer)
95
+ o2_is_integral = np.issubdtype(o1_dtype, np.integer)
96
+
97
+ o1_key = obj1 if o1_is_integral and not o1_is_array else o1_dtype
98
+ o2_key = obj2 if o2_is_integral and not o2_is_array else o2_dtype
99
+
100
+ cache_key = (o1_key, o2_key, o1_is_array, o2_is_array, allow_double)
101
+
102
+ try:
103
+ return _COMMON_DTYPE_CACHE[cache_key]
104
+ except KeyError:
105
+ pass
106
+
107
+ # Numpy's behavior around integers is a bit bizarre, and definitely value-
108
+ # and not just type-sensitive when it comes to scalars. We'll just do our
109
+ # best to emulate it.
110
+ #
111
+ # Some samples that are true as of numpy 1.23.1.
112
+ #
113
+ # >>> a = np.zeros(1, dtype=np.int16)
114
+ # >>> (a + 123123123312).dtype
115
+ # dtype('int64')
116
+ # >>> (a + 12312).dtype
117
+ # dtype('int16')
118
+ # >>> (a + 12312444).dtype
119
+ # dtype('int32')
120
+ # >>> (a + np.int32(12312444)).dtype
121
+ # dtype('int32')
122
+ # >>> (a + np.int32(1234)).dtype
123
+ # dtype('int16')
124
+ #
125
+ # Note that np.find_common_type, while appealing, won't be able to tell
126
+ # the full story.
127
+
128
+ if (_NUMPY_PRE_2
129
+ and not (o1_is_array and o2_is_array)
130
+ and o1_is_integral and o2_is_integral):
131
+ if o1_is_array:
132
+ obj1 = np.zeros(1, dtype=o1_dtype)
133
+ if o2_is_array:
134
+ obj2 = np.zeros(1, dtype=o2_dtype)
135
+
136
+ result = (obj1 + obj2).dtype
137
+ else:
138
+ array_types = []
139
+ scalars = []
140
+
141
+ if o1_is_array:
142
+ array_types.append(o1_dtype)
143
+ else:
144
+ scalars.append(obj1)
145
+ if o2_is_array:
146
+ array_types.append(o2_dtype)
147
+ else:
148
+ scalars.append(obj2)
149
+
150
+ result = np.result_type(*array_types, *scalars)
151
+
152
+ if not allow_double:
153
+ if result == np.float64:
154
+ result = np.dtype(np.float32)
155
+ warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
156
+ elif result == np.complex128:
157
+ result = np.dtype(np.complex64)
158
+ warn(_DOUBLE_DOWNCAST_WARNING, DoubleDowncastWarning, stacklevel=3)
159
+
160
+ if cache_key is not None:
161
+ _COMMON_DTYPE_CACHE[cache_key] = result
162
+
163
+ return result
164
+
165
+ # }}}
166
+
167
+
168
+ # {{{ _get_truedivide_dtype
169
+
170
+ def _get_truedivide_dtype(obj1, obj2, queue):
171
+ # the dtype of the division result obj1 / obj2
172
+
173
+ allow_double = has_double_support(queue.device)
174
+
175
+ x1 = obj1 if np.isscalar(obj1) else np.ones(1, obj1.dtype)
176
+ x2 = obj2 if np.isscalar(obj2) else np.ones(1, obj2.dtype)
177
+
178
+ result = (x1/x2).dtype
179
+
180
+ if not allow_double:
181
+ if result == np.float64:
182
+ result = np.dtype(np.float32)
183
+ elif result == np.complex128:
184
+ result = np.dtype(np.complex64)
185
+
186
+ return result
187
+
188
+ # }}}
189
+
190
+
191
+ # {{{ _get_broadcasted_binary_op_result
192
+
193
+ def _get_broadcasted_binary_op_result(obj1, obj2, cq,
194
+ dtype_getter=_get_common_dtype):
195
+
196
+ if obj1.shape == obj2.shape:
197
+ return obj1._new_like_me(dtype_getter(obj1, obj2, cq),
198
+ cq)
199
+ elif obj1.shape == ():
200
+ return obj2._new_like_me(dtype_getter(obj1, obj2, cq),
201
+ cq)
202
+ elif obj2.shape == ():
203
+ return obj1._new_like_me(dtype_getter(obj1, obj2, cq),
204
+ cq)
205
+ else:
206
+ raise NotImplementedError("Broadcasting binary operator with shapes:"
207
+ f" {obj1.shape}, {obj2.shape}.")
208
+
209
+ # }}}
210
+
211
+
212
+ # {{{ VecLookupWarner
213
+
214
+ class VecLookupWarner:
215
+ def __getattr__(self, name):
216
+ warn("pyopencl.array.vec is deprecated. "
217
+ "Please use pyopencl.cltypes for OpenCL vector and scalar types",
218
+ DeprecationWarning, stacklevel=2)
219
+
220
+ if name == "types":
221
+ name = "vec_types"
222
+ elif name == "type_to_scalar_and_count":
223
+ name = "vec_type_to_scalar_and_count"
224
+
225
+ return getattr(cltypes, name)
226
+
227
+
228
+ vec = VecLookupWarner()
229
+
230
+ # }}}
231
+
232
+
233
+ # {{{ helper functionality
234
+
235
+ def _splay(device, n, kernel_specific_max_wg_size=None):
236
+ max_work_items = builtins.min(128, device.max_work_group_size)
237
+
238
+ if kernel_specific_max_wg_size is not None:
239
+ max_work_items = builtins.min(max_work_items, kernel_specific_max_wg_size)
240
+
241
+ min_work_items = builtins.min(32, max_work_items)
242
+ max_groups = device.max_compute_units * 4 * 8
243
+ # 4 to overfill the device
244
+ # 8 is an Nvidia constant--that's how many
245
+ # groups fit onto one compute device
246
+
247
+ if n < min_work_items:
248
+ group_count = 1
249
+ work_items_per_group = min_work_items
250
+ elif n < (max_groups * min_work_items):
251
+ group_count = (n + min_work_items - 1) // min_work_items
252
+ work_items_per_group = min_work_items
253
+ elif n < (max_groups * max_work_items):
254
+ group_count = max_groups
255
+ grp = (n + min_work_items - 1) // min_work_items
256
+ work_items_per_group = (
257
+ (grp + max_groups - 1) // max_groups) * min_work_items
258
+ else:
259
+ group_count = max_groups
260
+ work_items_per_group = max_work_items
261
+
262
+ #print "n:%d gc:%d wipg:%d" % (n, group_count, work_items_per_group)
263
+ return (group_count*work_items_per_group,), (work_items_per_group,)
264
+
265
+
266
+ # deliberately undocumented for now
267
+ ARRAY_KERNEL_EXEC_HOOK = None
268
+
269
+
270
+ def elwise_kernel_runner(kernel_getter):
271
+ """Take a kernel getter of the same signature as the kernel
272
+ and return a function that invokes that kernel.
273
+
274
+ Assumes that the zeroth entry in *args* is an :class:`Array`.
275
+ """
276
+ from functools import wraps
277
+
278
+ @wraps(kernel_getter)
279
+ def kernel_runner(out, *args, **kwargs):
280
+ assert isinstance(out, Array)
281
+
282
+ wait_for = kwargs.pop("wait_for", None)
283
+ queue = kwargs.pop("queue", None)
284
+ if queue is None:
285
+ queue = out.queue
286
+
287
+ assert queue is not None
288
+
289
+ knl = kernel_getter(out, *args, **kwargs)
290
+ work_group_info = knl.get_work_group_info(
291
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
292
+ queue.device)
293
+ gs, ls = out._get_sizes(queue, work_group_info)
294
+
295
+ args = (out,) + args + (out.size,)
296
+ if ARRAY_KERNEL_EXEC_HOOK is not None:
297
+ return ARRAY_KERNEL_EXEC_HOOK( # pylint: disable=not-callable
298
+ knl, queue, gs, ls, *args, wait_for=wait_for)
299
+ else:
300
+ return knl(queue, gs, ls, *args, wait_for=wait_for)
301
+
302
+ return kernel_runner
303
+
304
+
305
+ class DefaultAllocator(cl.tools.DeferredAllocator):
306
+ def __init__(self, *args, **kwargs):
307
+ warn("pyopencl.array.DefaultAllocator is deprecated. "
308
+ "It will be continue to exist throughout the 2013.x "
309
+ "versions of PyOpenCL.",
310
+ DeprecationWarning, stacklevel=2)
311
+ cl.tools.DeferredAllocator.__init__(self, *args, **kwargs)
312
+
313
+ # }}}
314
+
315
+
316
+ # {{{ array class
317
+
318
+ class InconsistentOpenCLQueueWarning(UserWarning):
319
+ pass
320
+
321
+
322
+ class ArrayHasOffsetError(ValueError):
323
+ """
324
+ .. versionadded:: 2013.1
325
+ """
326
+
327
+ def __init__(self, val="The operation you are attempting does not yet "
328
+ "support arrays that start at an offset from the beginning "
329
+ "of their buffer."):
330
+ ValueError.__init__(self, val)
331
+
332
+
333
+ class _copy_queue: # noqa: N801
334
+ pass
335
+
336
+
337
+ _ARRAY_GET_SIZES_CACHE: Dict[Tuple[int, int, int], Tuple[int, int]] = {}
338
+ _BOOL_DTYPE = np.dtype(np.int8)
339
+ _NOT_PRESENT = object()
340
+
341
+
342
+ class Array:
343
+ """A :class:`numpy.ndarray` work-alike that stores its data and performs
344
+ its computations on the compute device. :attr:`shape` and :attr:`dtype` work
345
+ exactly as in :mod:`numpy`. Arithmetic methods in :class:`Array` support the
346
+ broadcasting of scalars. (e.g. ``array + 5``).
347
+
348
+ *cq* must be a :class:`~pyopencl.CommandQueue` or a :class:`~pyopencl.Context`.
349
+
350
+ If it is a queue, *cq* specifies the queue in which the array carries out
351
+ its computations by default. If a default queue (and thereby overloaded
352
+ operators and many other niceties) are not desired, pass a
353
+ :class:`~pyopencl.Context`.
354
+
355
+ *allocator* may be *None* or a callable that, upon being called with an
356
+ argument of the number of bytes to be allocated, returns a
357
+ :class:`pyopencl.Buffer` object. (A :class:`pyopencl.tools.MemoryPool`
358
+ instance is one useful example of an object to pass here.)
359
+
360
+ .. versionchanged:: 2011.1
361
+
362
+ Renamed *context* to *cqa*, made it general-purpose.
363
+
364
+ All arguments beyond *order* should be considered keyword-only.
365
+
366
+ .. versionchanged:: 2015.2
367
+
368
+ Renamed *context* to *cq*, disallowed passing allocators through it.
369
+
370
+ .. attribute :: data
371
+
372
+ The :class:`pyopencl.MemoryObject` instance created for the memory that
373
+ backs this :class:`Array`.
374
+
375
+ .. versionchanged:: 2013.1
376
+
377
+ If a non-zero :attr:`offset` has been specified for this array,
378
+ this will fail with :exc:`ArrayHasOffsetError`.
379
+
380
+ .. attribute :: base_data
381
+
382
+ The :class:`pyopencl.MemoryObject` instance created for the memory that
383
+ backs this :class:`Array`. Unlike :attr:`data`, the base address of
384
+ *base_data* is allowed to be different from the beginning of the array.
385
+ The actual beginning is the base address of *base_data* plus
386
+ :attr:`offset` bytes.
387
+
388
+ Unlike :attr:`data`, retrieving :attr:`base_data` always succeeds.
389
+
390
+ .. versionadded:: 2013.1
391
+
392
+ .. attribute :: offset
393
+
394
+ See :attr:`base_data`.
395
+
396
+ .. versionadded:: 2013.1
397
+
398
+ .. attribute :: shape
399
+
400
+ A tuple of lengths of each dimension in the array.
401
+
402
+ .. attribute :: ndim
403
+
404
+ The number of dimensions in :attr:`shape`.
405
+
406
+ .. attribute :: dtype
407
+
408
+ The :class:`numpy.dtype` of the items in the GPU array.
409
+
410
+ .. attribute :: size
411
+
412
+ The number of meaningful entries in the array. Can also be computed by
413
+ multiplying up the numbers in :attr:`shape`.
414
+
415
+ .. attribute :: nbytes
416
+
417
+ The size of the entire array in bytes. Computed as :attr:`size` times
418
+ ``dtype.itemsize``.
419
+
420
+ .. attribute :: strides
421
+
422
+ A tuple of bytes to step in each dimension when traversing an array.
423
+
424
+ .. attribute :: flags
425
+
426
+ An object with attributes ``c_contiguous``, ``f_contiguous`` and
427
+ ``forc``, which may be used to query contiguity properties in analogy to
428
+ :attr:`numpy.ndarray.flags`.
429
+
430
+ .. rubric:: Methods
431
+
432
+ .. automethod :: with_queue
433
+
434
+ .. automethod :: __len__
435
+ .. automethod :: reshape
436
+ .. automethod :: ravel
437
+ .. automethod :: view
438
+ .. automethod :: squeeze
439
+ .. automethod :: transpose
440
+ .. attribute :: T
441
+ .. automethod :: set
442
+ .. automethod :: get
443
+ .. automethod :: get_async
444
+ .. automethod :: copy
445
+
446
+ .. automethod :: __str__
447
+ .. automethod :: __repr__
448
+
449
+ .. automethod :: mul_add
450
+ .. automethod :: __add__
451
+ .. automethod :: __sub__
452
+ .. automethod :: __iadd__
453
+ .. automethod :: __isub__
454
+ .. automethod :: __pos__
455
+ .. automethod :: __neg__
456
+ .. automethod :: __mul__
457
+ .. automethod :: __div__
458
+ .. automethod :: __rdiv__
459
+ .. automethod :: __pow__
460
+
461
+ .. automethod :: __and__
462
+ .. automethod :: __xor__
463
+ .. automethod :: __or__
464
+ .. automethod :: __iand__
465
+ .. automethod :: __ixor__
466
+ .. automethod :: __ior__
467
+
468
+ .. automethod :: __abs__
469
+ .. automethod :: __invert__
470
+
471
+ .. UNDOC reverse()
472
+
473
+ .. automethod :: fill
474
+
475
+ .. automethod :: astype
476
+
477
+ .. autoattribute :: real
478
+ .. autoattribute :: imag
479
+ .. automethod :: conj
480
+ .. automethod :: conjugate
481
+
482
+ .. automethod :: __getitem__
483
+ .. automethod :: __setitem__
484
+
485
+ .. automethod :: setitem
486
+
487
+ .. automethod :: map_to_host
488
+
489
+ .. rubric:: Comparisons, conditionals, any, all
490
+
491
+ .. versionadded:: 2013.2
492
+
493
+ Boolean arrays are stored as :class:`numpy.int8` because ``bool``
494
+ has an unspecified size in the OpenCL spec.
495
+
496
+ .. automethod :: __bool__
497
+
498
+ Only works for device scalars. (i.e. "arrays" with ``shape == ()``)
499
+
500
+ .. automethod :: any
501
+ .. automethod :: all
502
+
503
+ .. automethod :: __eq__
504
+ .. automethod :: __ne__
505
+ .. automethod :: __lt__
506
+ .. automethod :: __le__
507
+ .. automethod :: __gt__
508
+ .. automethod :: __ge__
509
+
510
+ .. rubric:: Event management
511
+
512
+ If an array is used from within an out-of-order queue, it needs to take
513
+ care of its own operation ordering. The facilities in this section make
514
+ this possible.
515
+
516
+ .. versionadded:: 2014.1.1
517
+
518
+ .. attribute:: events
519
+
520
+ A list of :class:`pyopencl.Event` instances that the current content of
521
+ this array depends on. User code may read, but should never modify this
522
+ list directly. To update this list, instead use the following methods.
523
+
524
+ .. automethod:: add_event
525
+ .. automethod:: finish
526
+ """
527
+
528
+ __array_priority__ = 100
529
+
530
+ def __init__(
531
+ self,
532
+ cq: Optional[Union[cl.Context, cl.CommandQueue]],
533
+ shape: Union[Tuple[int, ...], int],
534
+ dtype: Any,
535
+ order: str = "C",
536
+ allocator: Optional[cl.tools.AllocatorBase] = None,
537
+ data: Any = None,
538
+ offset: int = 0,
539
+ strides: Optional[Tuple[int, ...]] = None,
540
+ events: Optional[List[cl.Event]] = None,
541
+
542
+ # NOTE: following args are used for the fast constructor
543
+ _flags: Any = None,
544
+ _fast: bool = False,
545
+ _size: Optional[int] = None,
546
+ _context: Optional[cl.Context] = None,
547
+ _queue: Optional[cl.CommandQueue] = None) -> None:
548
+ if _fast:
549
+ # Assumptions, should be disabled if not testing
550
+ if 0:
551
+ assert cq is None
552
+ assert isinstance(_context, cl.Context)
553
+ assert _queue is None or isinstance(_queue, cl.CommandQueue)
554
+ assert isinstance(shape, tuple)
555
+ assert isinstance(strides, tuple)
556
+ assert isinstance(dtype, np.dtype)
557
+ assert _size is not None
558
+
559
+ size = _size
560
+ context = _context
561
+ queue = _queue
562
+ alloc_nbytes = dtype.itemsize * size
563
+
564
+ else:
565
+ # {{{ backward compatibility
566
+
567
+ if cq is None:
568
+ context = _context
569
+ queue = _queue
570
+
571
+ elif isinstance(cq, cl.CommandQueue):
572
+ queue = cq
573
+ context = queue.context
574
+
575
+ elif isinstance(cq, cl.Context):
576
+ context = cq
577
+ queue = None
578
+
579
+ else:
580
+ raise TypeError(
581
+ f"cq may be a queue or a context, not '{type(cq).__name__}'")
582
+
583
+ if allocator is not None:
584
+ # "is" would be wrong because two Python objects are allowed
585
+ # to hold handles to the same context.
586
+
587
+ # FIXME It would be nice to check this. But it would require
588
+ # changing the allocator interface. Trust the user for now.
589
+
590
+ #assert allocator.context == context
591
+ pass
592
+
593
+ # Queue-less arrays do have a purpose in life.
594
+ # They don't do very much, but at least they don't run kernels
595
+ # in random queues.
596
+ #
597
+ # See also :meth:`with_queue`.
598
+
599
+ del cq
600
+
601
+ # }}}
602
+
603
+ # invariant here: allocator, queue set
604
+
605
+ # {{{ determine shape, size, and strides
606
+
607
+ dtype = np.dtype(dtype)
608
+
609
+ try:
610
+ shape = tuple(shape) # type: ignore[arg-type]
611
+ except TypeError:
612
+ if not isinstance(shape, (int, np.integer)):
613
+ raise TypeError(
614
+ "shape must either be iterable or castable to an integer: "
615
+ f"got a '{type(shape).__name__}'")
616
+
617
+ shape = (shape,)
618
+
619
+ shape_array = np.array(shape)
620
+
621
+ # Previously, the size was computed as
622
+ # "size = 1; size *= dim for dim in shape"
623
+ # However this can fail when using certain data types,
624
+ # eg numpy.uint64(1) * 2 returns 2.0 !
625
+ if np.any(shape_array < 0):
626
+ raise ValueError(f"negative dimensions are not allowed: {shape}")
627
+ if np.any([np.array([s]).dtype.kind not in ["u", "i"] for s in shape]):
628
+ raise ValueError(
629
+ "Invalid shape %s ; dimensions, must be integer" % (str(shape)))
630
+ size = np.prod(shape_array, dtype=np.uint64).item()
631
+
632
+ if strides is None:
633
+ if order in "cC":
634
+ # inlined from compyte.array.c_contiguous_strides
635
+ if shape:
636
+ strides_tmp = [dtype.itemsize]
637
+ for s in shape[:0:-1]:
638
+ # NOTE: https://github.com/inducer/compyte/pull/36
639
+ strides_tmp.append(strides_tmp[-1]*builtins.max(1, s))
640
+ strides = tuple(strides_tmp[::-1])
641
+ else:
642
+ strides = ()
643
+ elif order in "fF":
644
+ strides = _f_contiguous_strides(dtype.itemsize, shape)
645
+ else:
646
+ raise ValueError(f"invalid order: {order}")
647
+
648
+ else:
649
+ # FIXME: We should possibly perform some plausibility
650
+ # checking on 'strides' here.
651
+
652
+ strides = tuple(strides)
653
+
654
+ # }}}
655
+
656
+ assert dtype != object, \
657
+ "object arrays on the compute device are not allowed"
658
+ assert isinstance(shape, tuple)
659
+ assert isinstance(strides, tuple)
660
+
661
+ alloc_nbytes = dtype.itemsize * size
662
+
663
+ if alloc_nbytes < 0:
664
+ raise ValueError("cannot allocate CL buffer with negative size")
665
+
666
+ self.queue = queue
667
+ self.shape = shape
668
+ self.dtype = dtype
669
+ self.strides = strides
670
+ self.events = [] if events is None else events
671
+ self.nbytes = alloc_nbytes
672
+ self.size = size
673
+ self.allocator = allocator
674
+
675
+ if data is None:
676
+ if alloc_nbytes == 0:
677
+ self.base_data = None
678
+
679
+ else:
680
+ if self.allocator is None:
681
+ if context is None and queue is not None:
682
+ context = queue.context
683
+
684
+ self.base_data = cl.Buffer(
685
+ context, cl.mem_flags.READ_WRITE, alloc_nbytes)
686
+ else:
687
+ self.base_data = self.allocator(alloc_nbytes)
688
+ else:
689
+ self.base_data = data
690
+
691
+ self.offset = offset
692
+ self.context = context
693
+ self._flags = _flags
694
+
695
+ if __debug__:
696
+ if queue is not None and isinstance(
697
+ self.base_data, _SVMPointer_or_nothing):
698
+ mem_queue = getattr(self.base_data, "_queue", _NOT_PRESENT)
699
+ if mem_queue is not _NOT_PRESENT and mem_queue != queue:
700
+ warn("Array has different queue from backing SVM memory. "
701
+ "This may lead to the array getting deallocated sooner "
702
+ "than expected, potentially leading to crashes.",
703
+ InconsistentOpenCLQueueWarning, stacklevel=2)
704
+
705
+ @property
706
+ def ndim(self):
707
+ return len(self.shape)
708
+
709
+ @property
710
+ def data(self):
711
+ if self.offset:
712
+ raise ArrayHasOffsetError()
713
+ else:
714
+ return self.base_data
715
+
716
+ @property
717
+ def flags(self):
718
+ f = self._flags
719
+ if f is None:
720
+ self._flags = f = _ArrayFlags(self)
721
+ return f
722
+
723
+ def _new_with_changes(self, data, offset, shape=None, dtype=None,
724
+ strides=None, queue=_copy_queue, allocator=None):
725
+ """
726
+ :arg data: *None* means allocate a new array.
727
+ """
728
+ fast = True
729
+ size = self.size
730
+ if shape is None:
731
+ shape = self.shape
732
+ else:
733
+ fast = False
734
+ size = None
735
+
736
+ if dtype is None:
737
+ dtype = self.dtype
738
+ if strides is None:
739
+ strides = self.strides
740
+ if queue is _copy_queue:
741
+ queue = self.queue
742
+ if allocator is None:
743
+ allocator = self.allocator
744
+
745
+ # If we're allocating new data, then there's not likely to be
746
+ # a data dependency. Otherwise, the two arrays should probably
747
+ # share the same events list.
748
+
749
+ if data is None:
750
+ events = None
751
+ else:
752
+ events = self.events
753
+
754
+ return self.__class__(None, shape, dtype, allocator=allocator,
755
+ strides=strides, data=data, offset=offset,
756
+ events=events,
757
+ _fast=fast, _context=self.context, _queue=queue, _size=size)
758
+
759
+ def with_queue(self, queue):
760
+ """Return a copy of *self* with the default queue set to *queue*.
761
+
762
+ *None* is allowed as a value for *queue*.
763
+
764
+ .. versionadded:: 2013.1
765
+ """
766
+
767
+ if queue is not None:
768
+ assert queue.context == self.context
769
+
770
+ return self._new_with_changes(self.base_data, self.offset,
771
+ queue=queue)
772
+
773
+ def _get_sizes(self, queue, kernel_specific_max_wg_size=None):
774
+ if not self.flags.forc:
775
+ raise NotImplementedError("cannot operate on non-contiguous array")
776
+ cache_key = (queue.device.int_ptr, self.size, kernel_specific_max_wg_size)
777
+ try:
778
+ return _ARRAY_GET_SIZES_CACHE[cache_key]
779
+ except KeyError:
780
+ sizes = _splay(queue.device, self.size,
781
+ kernel_specific_max_wg_size=kernel_specific_max_wg_size)
782
+ _ARRAY_GET_SIZES_CACHE[cache_key] = sizes
783
+ return sizes
784
+
785
+ def set(self, ary, queue=None, async_=None, **kwargs):
786
+ """Transfer the contents the :class:`numpy.ndarray` object *ary*
787
+ onto the device.
788
+
789
+ *ary* must have the same dtype and size (not necessarily shape) as
790
+ *self*.
791
+
792
+ *async_* is a Boolean indicating whether the function is allowed
793
+ to return before the transfer completes. To avoid synchronization
794
+ bugs, this defaults to *False*.
795
+
796
+ .. versionchanged:: 2017.2.1
797
+
798
+ Python 3.7 makes ``async`` a reserved keyword. On older Pythons,
799
+ we will continue to accept *async* as a parameter, however this
800
+ should be considered deprecated. *async_* is the new, official
801
+ spelling.
802
+ """
803
+
804
+ # {{{ handle 'async' deprecation
805
+
806
+ async_arg = kwargs.pop("async", None)
807
+ if async_arg is not None:
808
+ if async_ is not None:
809
+ raise TypeError("may not specify both 'async' and 'async_'")
810
+ async_ = async_arg
811
+
812
+ if async_ is None:
813
+ async_ = False
814
+
815
+ if kwargs:
816
+ raise TypeError("extra keyword arguments specified: %s"
817
+ % ", ".join(kwargs))
818
+
819
+ # }}}
820
+
821
+ assert ary.size == self.size
822
+ assert ary.dtype == self.dtype
823
+
824
+ if not ary.flags.forc:
825
+ raise RuntimeError("cannot set from non-contiguous array")
826
+
827
+ if not _equal_strides(ary.strides, self.strides, self.shape):
828
+ warn("Setting array from one with different "
829
+ "strides/storage order. This will cease to work "
830
+ "in 2013.x.",
831
+ stacklevel=2)
832
+
833
+ if self.size:
834
+ event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
835
+ dst_offset=self.offset,
836
+ is_blocking=not async_)
837
+
838
+ self.add_event(event1)
839
+
840
+ def _get(self, queue=None, ary=None, async_=None, **kwargs):
841
+ # {{{ handle 'async' deprecation
842
+
843
+ async_arg = kwargs.pop("async", None)
844
+ if async_arg is not None:
845
+ if async_ is not None:
846
+ raise TypeError("may not specify both 'async' and 'async_'")
847
+ async_ = async_arg
848
+
849
+ if async_ is None:
850
+ async_ = False
851
+
852
+ if kwargs:
853
+ raise TypeError("extra keyword arguments specified: %s"
854
+ % ", ".join(kwargs))
855
+
856
+ # }}}
857
+
858
+ if ary is None:
859
+ ary = np.empty(self.shape, self.dtype)
860
+
861
+ if self.strides != ary.strides:
862
+ ary = _as_strided(ary, strides=self.strides)
863
+ else:
864
+ if ary.size != self.size:
865
+ raise TypeError("'ary' has non-matching size")
866
+ if ary.dtype != self.dtype:
867
+ raise TypeError("'ary' has non-matching type")
868
+
869
+ if self.shape != ary.shape:
870
+ warn("get() between arrays of different shape is deprecated "
871
+ "and will be removed in PyCUDA 2017.x",
872
+ DeprecationWarning, stacklevel=2)
873
+
874
+ assert self.flags.forc, "Array in get() must be contiguous"
875
+
876
+ queue = queue or self.queue
877
+ if queue is None:
878
+ raise ValueError("Cannot copy array to host. "
879
+ "Array has no queue. Use "
880
+ "'new_array = array.with_queue(queue)' "
881
+ "to associate one.")
882
+
883
+ if self.size:
884
+ event1 = cl.enqueue_copy(queue, ary, self.base_data,
885
+ src_offset=self.offset,
886
+ wait_for=self.events, is_blocking=not async_)
887
+
888
+ self.add_event(event1)
889
+ else:
890
+ event1 = None
891
+
892
+ return ary, event1
893
+
894
+ def get(self, queue=None, ary=None, async_=None, **kwargs):
895
+ """Transfer the contents of *self* into *ary* or a newly allocated
896
+ :class:`numpy.ndarray`. If *ary* is given, it must have the same
897
+ shape and dtype.
898
+
899
+ .. versionchanged:: 2019.1.2
900
+
901
+ Calling with ``async_=True`` was deprecated and replaced by
902
+ :meth:`get_async`.
903
+ The event returned by :meth:`pyopencl.enqueue_copy` is now stored into
904
+ :attr:`events` to ensure data is not modified before the copy is
905
+ complete.
906
+
907
+ .. versionchanged:: 2015.2
908
+
909
+ *ary* with different shape was deprecated.
910
+
911
+ .. versionchanged:: 2017.2.1
912
+
913
+ Python 3.7 makes ``async`` a reserved keyword. On older Pythons,
914
+ we will continue to accept *async* as a parameter, however this
915
+ should be considered deprecated. *async_* is the new, official
916
+ spelling.
917
+ """
918
+
919
+ if async_:
920
+ warn("calling pyopencl.Array.get with 'async_=True' is deprecated. "
921
+ "Please use pyopencl.Array.get_async for asynchronous "
922
+ "device-to-host transfers",
923
+ DeprecationWarning, stacklevel=2)
924
+
925
+ ary, event1 = self._get(queue=queue, ary=ary, async_=async_, **kwargs)
926
+
927
+ return ary
928
+
929
+ def get_async(self, queue=None, ary=None, **kwargs):
930
+ """
931
+ Asynchronous version of :meth:`get` which returns a tuple ``(ary, event)``
932
+ containing the host array ``ary``
933
+ and the :class:`pyopencl.NannyEvent` ``event`` returned by
934
+ :meth:`pyopencl.enqueue_copy`.
935
+
936
+ .. versionadded:: 2019.1.2
937
+ """
938
+
939
+ return self._get(queue=queue, ary=ary, async_=True, **kwargs)
940
+
941
+ def copy(self, queue=_copy_queue):
942
+ """
943
+ :arg queue: The :class:`~pyopencl.CommandQueue` for the returned array.
944
+
945
+ .. versionchanged:: 2017.1.2
946
+
947
+ Updates the queue of the returned array.
948
+
949
+ .. versionadded:: 2013.1
950
+ """
951
+
952
+ if queue is _copy_queue:
953
+ queue = self.queue
954
+
955
+ result = self._new_like_me(queue=queue)
956
+
957
+ # result.queue won't be the same as queue if queue is None.
958
+ # We force them to be the same here.
959
+ if result.queue is not queue:
960
+ result = result.with_queue(queue)
961
+
962
+ if not self.flags.forc:
963
+ raise RuntimeError("cannot copy non-contiguous array")
964
+
965
+ if self.nbytes:
966
+ event1 = cl.enqueue_copy(queue or self.queue,
967
+ result.base_data, self.base_data,
968
+ src_offset=self.offset, byte_count=self.nbytes,
969
+ wait_for=self.events)
970
+ result.add_event(event1)
971
+
972
+ return result
973
+
974
+ def __str__(self):
975
+ if self.queue is None:
976
+ return (f"<cl.{type(self).__name__} {self.shape} of {self.dtype} "
977
+ "without queue, call with_queue()>")
978
+
979
+ return str(self.get())
980
+
981
+ def __repr__(self):
982
+ if self.queue is None:
983
+ return (f"<cl.{type(self).__name__} {self.shape} of {self.dtype} "
984
+ f"at {id(self):x} without queue, call with_queue()>")
985
+
986
+ result = repr(self.get())
987
+ if result[:5] == "array":
988
+ result = f"cl.{type(self).__name__}" + result[5:]
989
+ else:
990
+ warn(
991
+ f"{type(result).__name__}.__repr__ was expected to return a "
992
+ f"string starting with 'array', got '{result[:10]!r}'",
993
+ stacklevel=2)
994
+
995
+ return result
996
+
997
+ def safely_stringify_for_pudb(self):
998
+ return f"cl.{type(self).__name__} {self.dtype} {self.shape}"
999
+
1000
+ def __hash__(self):
1001
+ raise TypeError("pyopencl arrays are not hashable.")
1002
+
1003
+ # {{{ kernel invocation wrappers
1004
+
1005
+ @staticmethod
1006
+ @elwise_kernel_runner
1007
+ def _axpbyz(out, afac, a, bfac, b, queue=None):
1008
+ """Compute ``out = selffac * self + otherfac*other``,
1009
+ where *other* is an array."""
1010
+ a_shape = a.shape
1011
+ b_shape = b.shape
1012
+ out_shape = out.shape
1013
+ assert (a_shape == b_shape == out_shape
1014
+ or (a_shape == () and b_shape == out_shape)
1015
+ or (b_shape == () and a_shape == out_shape))
1016
+ return elementwise.get_axpbyz_kernel(
1017
+ out.context, a.dtype, b.dtype, out.dtype,
1018
+ x_is_scalar=(a_shape == ()),
1019
+ y_is_scalar=(b_shape == ()))
1020
+
1021
+ @staticmethod
1022
+ @elwise_kernel_runner
1023
+ def _axpbz(out, a, x, b, queue=None):
1024
+ """Compute ``z = a * x + b``, where *b* is a scalar."""
1025
+ a = np.array(a)
1026
+ b = np.array(b)
1027
+ assert out.shape == x.shape
1028
+ return elementwise.get_axpbz_kernel(out.context,
1029
+ a.dtype, x.dtype, b.dtype, out.dtype)
1030
+
1031
+ @staticmethod
1032
+ @elwise_kernel_runner
1033
+ def _elwise_multiply(out, a, b, queue=None):
1034
+ a_shape = a.shape
1035
+ b_shape = b.shape
1036
+ out_shape = out.shape
1037
+ assert (a_shape == b_shape == out_shape
1038
+ or (a_shape == () and b_shape == out_shape)
1039
+ or (b_shape == () and a_shape == out_shape))
1040
+ return elementwise.get_multiply_kernel(
1041
+ a.context, a.dtype, b.dtype, out.dtype,
1042
+ x_is_scalar=(a_shape == ()),
1043
+ y_is_scalar=(b_shape == ())
1044
+ )
1045
+
1046
+ @staticmethod
1047
+ @elwise_kernel_runner
1048
+ def _rdiv_scalar(out, ary, other, queue=None):
1049
+ other = np.array(other)
1050
+ assert out.shape == ary.shape
1051
+ return elementwise.get_rdivide_elwise_kernel(
1052
+ out.context, ary.dtype, other.dtype, out.dtype)
1053
+
1054
+ @staticmethod
1055
+ @elwise_kernel_runner
1056
+ def _div(out, self, other, queue=None):
1057
+ """Divides an array by another array."""
1058
+ assert (self.shape == other.shape == out.shape
1059
+ or (self.shape == () and other.shape == out.shape)
1060
+ or (other.shape == () and self.shape == out.shape))
1061
+
1062
+ return elementwise.get_divide_kernel(self.context,
1063
+ self.dtype, other.dtype, out.dtype,
1064
+ x_is_scalar=(self.shape == ()),
1065
+ y_is_scalar=(other.shape == ()))
1066
+
1067
+ @staticmethod
1068
+ @elwise_kernel_runner
1069
+ def _fill(result, scalar):
1070
+ return elementwise.get_fill_kernel(result.context, result.dtype)
1071
+
1072
+ @staticmethod
1073
+ @elwise_kernel_runner
1074
+ def _abs(result, arg):
1075
+ if arg.dtype.kind == "c":
1076
+ from pyopencl.elementwise import complex_dtype_to_name
1077
+ fname = "%s_abs" % complex_dtype_to_name(arg.dtype)
1078
+ elif arg.dtype.kind == "f":
1079
+ fname = "fabs"
1080
+ elif arg.dtype.kind in ["u", "i"]:
1081
+ fname = "abs"
1082
+ else:
1083
+ raise TypeError("unsupported dtype in _abs()")
1084
+
1085
+ return elementwise.get_unary_func_kernel(
1086
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1087
+
1088
+ @staticmethod
1089
+ @elwise_kernel_runner
1090
+ def _real(result, arg):
1091
+ from pyopencl.elementwise import complex_dtype_to_name
1092
+ fname = "%s_real" % complex_dtype_to_name(arg.dtype)
1093
+ return elementwise.get_unary_func_kernel(
1094
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1095
+
1096
+ @staticmethod
1097
+ @elwise_kernel_runner
1098
+ def _imag(result, arg):
1099
+ from pyopencl.elementwise import complex_dtype_to_name
1100
+ fname = "%s_imag" % complex_dtype_to_name(arg.dtype)
1101
+ return elementwise.get_unary_func_kernel(
1102
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1103
+
1104
+ @staticmethod
1105
+ @elwise_kernel_runner
1106
+ def _conj(result, arg):
1107
+ from pyopencl.elementwise import complex_dtype_to_name
1108
+ fname = "%s_conj" % complex_dtype_to_name(arg.dtype)
1109
+ return elementwise.get_unary_func_kernel(
1110
+ arg.context, fname, arg.dtype, out_dtype=result.dtype)
1111
+
1112
+ @staticmethod
1113
+ @elwise_kernel_runner
1114
+ def _pow_scalar(result, ary, exponent):
1115
+ exponent = np.array(exponent)
1116
+ return elementwise.get_pow_kernel(result.context,
1117
+ ary.dtype, exponent.dtype, result.dtype,
1118
+ is_base_array=True, is_exp_array=False)
1119
+
1120
+ @staticmethod
1121
+ @elwise_kernel_runner
1122
+ def _rpow_scalar(result, base, exponent):
1123
+ base = np.array(base)
1124
+ return elementwise.get_pow_kernel(result.context,
1125
+ base.dtype, exponent.dtype, result.dtype,
1126
+ is_base_array=False, is_exp_array=True)
1127
+
1128
+ @staticmethod
1129
+ @elwise_kernel_runner
1130
+ def _pow_array(result, base, exponent):
1131
+ return elementwise.get_pow_kernel(
1132
+ result.context, base.dtype, exponent.dtype, result.dtype,
1133
+ is_base_array=True, is_exp_array=True)
1134
+
1135
+ @staticmethod
1136
+ @elwise_kernel_runner
1137
+ def _reverse(result, ary):
1138
+ return elementwise.get_reverse_kernel(result.context, ary.dtype)
1139
+
1140
+ @staticmethod
1141
+ @elwise_kernel_runner
1142
+ def _copy(dest, src):
1143
+ return elementwise.get_copy_kernel(
1144
+ dest.context, dest.dtype, src.dtype)
1145
+
1146
+ def _new_like_me(self, dtype=None, queue=None):
1147
+ if dtype is None:
1148
+ dtype = self.dtype
1149
+ strides = self.strides
1150
+ flags = self.flags
1151
+ fast = True
1152
+ else:
1153
+ strides = None
1154
+ flags = None
1155
+ if dtype == self.dtype:
1156
+ strides = self.strides
1157
+ flags = self.flags
1158
+ fast = True
1159
+ else:
1160
+ fast = False
1161
+
1162
+ queue = queue or self.queue
1163
+ return self.__class__(None, self.shape, dtype,
1164
+ allocator=self.allocator, strides=strides, _flags=flags,
1165
+ _fast=fast,
1166
+ _size=self.size, _queue=queue, _context=self.context)
1167
+
1168
+ @staticmethod
1169
+ @elwise_kernel_runner
1170
+ def _scalar_binop(out, a, b, queue=None, op=None):
1171
+ return elementwise.get_array_scalar_binop_kernel(
1172
+ out.context, op, out.dtype, a.dtype,
1173
+ np.array(b).dtype)
1174
+
1175
+ @staticmethod
1176
+ @elwise_kernel_runner
1177
+ def _array_binop(out, a, b, queue=None, op=None):
1178
+ a_shape = a.shape
1179
+ b_shape = b.shape
1180
+ out_shape = out.shape
1181
+ assert (a_shape == b_shape == out_shape
1182
+ or (a_shape == () and b_shape == out_shape)
1183
+ or (b_shape == () and a_shape == out_shape))
1184
+ return elementwise.get_array_binop_kernel(
1185
+ out.context, op, out.dtype, a.dtype, b.dtype,
1186
+ a_is_scalar=(a_shape == ()),
1187
+ b_is_scalar=(b_shape == ()))
1188
+
1189
+ @staticmethod
1190
+ @elwise_kernel_runner
1191
+ def _unop(out, a, queue=None, op=None):
1192
+ if out.shape != a.shape:
1193
+ raise ValueError("shapes of arguments do not match")
1194
+ return elementwise.get_unop_kernel(
1195
+ out.context, op, a.dtype, out.dtype)
1196
+
1197
+ # }}}
1198
+
1199
+ # {{{ operators
1200
+
1201
+ def mul_add(self, selffac, other, otherfac, queue=None):
1202
+ """Return ``selffac * self + otherfac * other``.
1203
+ """
1204
+ queue = queue or self.queue
1205
+
1206
+ if isinstance(other, Array):
1207
+ result = _get_broadcasted_binary_op_result(self, other, queue)
1208
+ result.add_event(
1209
+ self._axpbyz(
1210
+ result, selffac, self, otherfac, other,
1211
+ queue=queue))
1212
+ return result
1213
+ elif np.isscalar(other):
1214
+ common_dtype = _get_common_dtype(self, other, queue)
1215
+ result = self._new_like_me(common_dtype, queue=queue)
1216
+ result.add_event(
1217
+ self._axpbz(result, selffac,
1218
+ self, common_dtype.type(otherfac * other),
1219
+ queue=queue))
1220
+ return result
1221
+ else:
1222
+ raise NotImplementedError
1223
+
1224
+ def __add__(self, other):
1225
+ """Add an array with an array or an array with a scalar."""
1226
+
1227
+ if isinstance(other, Array):
1228
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1229
+ result.add_event(
1230
+ self._axpbyz(result,
1231
+ self.dtype.type(1), self,
1232
+ other.dtype.type(1), other))
1233
+
1234
+ return result
1235
+ elif np.isscalar(other):
1236
+ if other == 0:
1237
+ return self.copy()
1238
+ else:
1239
+ common_dtype = _get_common_dtype(self, other, self.queue)
1240
+ result = self._new_like_me(common_dtype)
1241
+ result.add_event(
1242
+ self._axpbz(result, self.dtype.type(1),
1243
+ self, common_dtype.type(other)))
1244
+ return result
1245
+ else:
1246
+ return NotImplemented
1247
+
1248
+ __radd__ = __add__
1249
+
1250
+ def __sub__(self, other):
1251
+ """Subtract an array from an array or a scalar from an array."""
1252
+
1253
+ if isinstance(other, Array):
1254
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1255
+ result.add_event(
1256
+ self._axpbyz(result,
1257
+ self.dtype.type(1), self,
1258
+ result.dtype.type(-1), other))
1259
+
1260
+ return result
1261
+ elif np.isscalar(other):
1262
+ if other == 0:
1263
+ return self.copy()
1264
+ else:
1265
+ result = self._new_like_me(
1266
+ _get_common_dtype(self, other, self.queue))
1267
+ result.add_event(
1268
+ self._axpbz(result, self.dtype.type(1), self, -other))
1269
+ return result
1270
+ else:
1271
+ return NotImplemented
1272
+
1273
+ def __rsub__(self, other):
1274
+ """Subtracts an array by a scalar or an array::
1275
+
1276
+ x = n - self
1277
+ """
1278
+ if np.isscalar(other):
1279
+ common_dtype = _get_common_dtype(self, other, self.queue)
1280
+ result = self._new_like_me(common_dtype)
1281
+ result.add_event(
1282
+ self._axpbz(result, result.dtype.type(-1), self,
1283
+ common_dtype.type(other)))
1284
+
1285
+ return result
1286
+ else:
1287
+ return NotImplemented
1288
+
1289
+ def __iadd__(self, other):
1290
+ if isinstance(other, Array):
1291
+ if other.shape != self.shape and other.shape != ():
1292
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1293
+ f" {self.shape}, {other.shape}.")
1294
+ self.add_event(
1295
+ self._axpbyz(self,
1296
+ self.dtype.type(1), self,
1297
+ other.dtype.type(1), other))
1298
+
1299
+ return self
1300
+ elif np.isscalar(other):
1301
+ self.add_event(
1302
+ self._axpbz(self, self.dtype.type(1), self, other))
1303
+ return self
1304
+ else:
1305
+ return NotImplemented
1306
+
1307
+ def __isub__(self, other):
1308
+ if isinstance(other, Array):
1309
+ if other.shape != self.shape and other.shape != ():
1310
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1311
+ f" {self.shape}, {other.shape}.")
1312
+ self.add_event(
1313
+ self._axpbyz(self, self.dtype.type(1), self,
1314
+ other.dtype.type(-1), other))
1315
+ return self
1316
+ elif np.isscalar(other):
1317
+ self._axpbz(self, self.dtype.type(1), self, -other)
1318
+ return self
1319
+ else:
1320
+ return NotImplemented
1321
+
1322
+ def __pos__(self):
1323
+ return self
1324
+
1325
+ def __neg__(self):
1326
+ result = self._new_like_me()
1327
+ result.add_event(self._axpbz(result, -1, self, 0))
1328
+ return result
1329
+
1330
+ def __mul__(self, other):
1331
+ if isinstance(other, Array):
1332
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1333
+ result.add_event(
1334
+ self._elwise_multiply(result, self, other))
1335
+ return result
1336
+ elif np.isscalar(other):
1337
+ common_dtype = _get_common_dtype(self, other, self.queue)
1338
+ result = self._new_like_me(common_dtype)
1339
+ result.add_event(
1340
+ self._axpbz(result,
1341
+ common_dtype.type(other), self, self.dtype.type(0)))
1342
+ return result
1343
+ else:
1344
+ return NotImplemented
1345
+
1346
+ def __rmul__(self, other):
1347
+ if np.isscalar(other):
1348
+ common_dtype = _get_common_dtype(self, other, self.queue)
1349
+ result = self._new_like_me(common_dtype)
1350
+ result.add_event(
1351
+ self._axpbz(result,
1352
+ common_dtype.type(other), self, self.dtype.type(0)))
1353
+ return result
1354
+ else:
1355
+ return NotImplemented
1356
+
1357
+ def __imul__(self, other):
1358
+ if isinstance(other, Array):
1359
+ if other.shape != self.shape and other.shape != ():
1360
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1361
+ f" {self.shape}, {other.shape}.")
1362
+ self.add_event(
1363
+ self._elwise_multiply(self, self, other))
1364
+ return self
1365
+ elif np.isscalar(other):
1366
+ self.add_event(
1367
+ self._axpbz(self, other, self, self.dtype.type(0)))
1368
+ return self
1369
+ else:
1370
+ return NotImplemented
1371
+
1372
+ def __div__(self, other):
1373
+ """Divides an array by an array or a scalar, i.e. ``self / other``.
1374
+ """
1375
+ if isinstance(other, Array):
1376
+ result = _get_broadcasted_binary_op_result(
1377
+ self, other, self.queue,
1378
+ dtype_getter=_get_truedivide_dtype)
1379
+ result.add_event(self._div(result, self, other))
1380
+
1381
+ return result
1382
+ elif np.isscalar(other):
1383
+ if other == 1:
1384
+ return self.copy()
1385
+ else:
1386
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1387
+ result = self._new_like_me(common_dtype)
1388
+ result.add_event(
1389
+ self._axpbz(result,
1390
+ np.true_divide(common_dtype.type(1), other),
1391
+ self, self.dtype.type(0)))
1392
+ return result
1393
+ else:
1394
+ return NotImplemented
1395
+
1396
+ __truediv__ = __div__
1397
+
1398
+ def __rdiv__(self, other):
1399
+ """Divides an array by a scalar or an array, i.e. ``other / self``.
1400
+ """
1401
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1402
+
1403
+ if isinstance(other, Array):
1404
+ result = self._new_like_me(common_dtype)
1405
+ result.add_event(other._div(result, self))
1406
+ return result
1407
+ elif np.isscalar(other):
1408
+ result = self._new_like_me(common_dtype)
1409
+ result.add_event(
1410
+ self._rdiv_scalar(result, self, common_dtype.type(other)))
1411
+ return result
1412
+ else:
1413
+ return NotImplemented
1414
+
1415
+ __rtruediv__ = __rdiv__
1416
+
1417
+ def __itruediv__(self, other):
1418
+ # raise an error if the result cannot be cast to self
1419
+ common_dtype = _get_truedivide_dtype(self, other, self.queue)
1420
+ if not np.can_cast(common_dtype, self.dtype.type, "same_kind"):
1421
+ raise TypeError(
1422
+ "Cannot cast {!r} to {!r}".format(self.dtype, common_dtype))
1423
+
1424
+ if isinstance(other, Array):
1425
+ if other.shape != self.shape and other.shape != ():
1426
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1427
+ f" {self.shape}, {other.shape}.")
1428
+ self.add_event(
1429
+ self._div(self, self, other))
1430
+ return self
1431
+ elif np.isscalar(other):
1432
+ if other == 1:
1433
+ return self
1434
+ else:
1435
+ self.add_event(
1436
+ self._axpbz(self, common_dtype.type(np.true_divide(1, other)),
1437
+ self, self.dtype.type(0)))
1438
+ return self
1439
+ else:
1440
+ return NotImplemented
1441
+
1442
+ def __and__(self, other):
1443
+ common_dtype = _get_common_dtype(self, other, self.queue)
1444
+
1445
+ if not np.issubdtype(common_dtype, np.integer):
1446
+ raise TypeError(f"Integral types only: {common_dtype}")
1447
+
1448
+ if isinstance(other, Array):
1449
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1450
+ result.add_event(self._array_binop(result, self, other, op="&"))
1451
+ return result
1452
+ elif np.isscalar(other):
1453
+ result = self._new_like_me(common_dtype)
1454
+ result.add_event(
1455
+ self._scalar_binop(result, self, other, op="&"))
1456
+ return result
1457
+ else:
1458
+ return NotImplemented
1459
+
1460
+ __rand__ = __and__ # commutes
1461
+
1462
+ def __or__(self, other):
1463
+ common_dtype = _get_common_dtype(self, other, self.queue)
1464
+
1465
+ if not np.issubdtype(common_dtype, np.integer):
1466
+ raise TypeError("Integral types only")
1467
+
1468
+ if isinstance(other, Array):
1469
+ result = _get_broadcasted_binary_op_result(self, other,
1470
+ self.queue)
1471
+ result.add_event(self._array_binop(result, self, other, op="|"))
1472
+ return result
1473
+ elif np.isscalar(other):
1474
+ result = self._new_like_me(common_dtype)
1475
+ result.add_event(
1476
+ self._scalar_binop(result, self, other, op="|"))
1477
+ return result
1478
+ else:
1479
+ return NotImplemented
1480
+
1481
+ __ror__ = __or__ # commutes
1482
+
1483
+ def __xor__(self, other):
1484
+ common_dtype = _get_common_dtype(self, other, self.queue)
1485
+
1486
+ if not np.issubdtype(common_dtype, np.integer):
1487
+ raise TypeError(f"Integral types only: {common_dtype}")
1488
+
1489
+ if isinstance(other, Array):
1490
+ result = _get_broadcasted_binary_op_result(self, other, self.queue)
1491
+ result.add_event(self._array_binop(result, self, other, op="^"))
1492
+ return result
1493
+ elif np.isscalar(other):
1494
+ result = self._new_like_me(common_dtype)
1495
+ result.add_event(
1496
+ self._scalar_binop(result, self, other, op="^"))
1497
+ return result
1498
+ else:
1499
+ return NotImplemented
1500
+
1501
+ __rxor__ = __xor__ # commutes
1502
+
1503
+ def __iand__(self, other):
1504
+ common_dtype = _get_common_dtype(self, other, self.queue)
1505
+
1506
+ if not np.issubdtype(common_dtype, np.integer):
1507
+ raise TypeError(f"Integral types only: {common_dtype}")
1508
+
1509
+ if isinstance(other, Array):
1510
+ if other.shape != self.shape and other.shape != ():
1511
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1512
+ f" {self.shape}, {other.shape}.")
1513
+ self.add_event(self._array_binop(self, self, other, op="&"))
1514
+ return self
1515
+ elif np.isscalar(other):
1516
+ self.add_event(
1517
+ self._scalar_binop(self, self, other, op="&"))
1518
+ return self
1519
+ else:
1520
+ return NotImplemented
1521
+
1522
+ def __ior__(self, other):
1523
+ common_dtype = _get_common_dtype(self, other, self.queue)
1524
+
1525
+ if not np.issubdtype(common_dtype, np.integer):
1526
+ raise TypeError(f"Integral types only: {common_dtype}")
1527
+
1528
+ if isinstance(other, Array):
1529
+ if other.shape != self.shape and other.shape != ():
1530
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1531
+ f" {self.shape}, {other.shape}.")
1532
+ self.add_event(self._array_binop(self, self, other, op="|"))
1533
+ return self
1534
+ elif np.isscalar(other):
1535
+ self.add_event(
1536
+ self._scalar_binop(self, self, other, op="|"))
1537
+ return self
1538
+ else:
1539
+ return NotImplemented
1540
+
1541
+ def __ixor__(self, other):
1542
+ common_dtype = _get_common_dtype(self, other, self.queue)
1543
+
1544
+ if not np.issubdtype(common_dtype, np.integer):
1545
+ raise TypeError(f"Integral types only: {common_dtype}")
1546
+
1547
+ if isinstance(other, Array):
1548
+ if other.shape != self.shape and other.shape != ():
1549
+ raise NotImplementedError("Broadcasting binary op with shapes:"
1550
+ f" {self.shape}, {other.shape}.")
1551
+ self.add_event(self._array_binop(self, self, other, op="^"))
1552
+ return self
1553
+ elif np.isscalar(other):
1554
+ self.add_event(
1555
+ self._scalar_binop(self, self, other, op="^"))
1556
+ return self
1557
+ else:
1558
+ return NotImplemented
1559
+
1560
+ def _zero_fill(self, queue=None, wait_for=None):
1561
+ queue = queue or self.queue
1562
+
1563
+ if not self.size:
1564
+ return
1565
+
1566
+ cl_version_gtr_1_2 = (
1567
+ queue._get_cl_version() >= (1, 2)
1568
+ and cl.get_cl_header_version() >= (1, 2)
1569
+ )
1570
+ on_nvidia = queue.device.vendor.startswith("NVIDIA")
1571
+
1572
+ # circumvent bug with large buffers on NVIDIA
1573
+ # https://github.com/inducer/pyopencl/issues/395
1574
+ if cl_version_gtr_1_2 and not (on_nvidia and self.nbytes >= 2**31):
1575
+ self.add_event(
1576
+ cl.enqueue_fill(queue, self.base_data, np.int8(0),
1577
+ self.nbytes, offset=self.offset, wait_for=wait_for))
1578
+ else:
1579
+ zero = np.zeros((), self.dtype)
1580
+ self.fill(zero, queue=queue)
1581
+
1582
+ def fill(self, value, queue=None, wait_for=None):
1583
+ """Fill the array with *scalar*.
1584
+
1585
+ :returns: *self*.
1586
+ """
1587
+
1588
+ self.add_event(
1589
+ self._fill(self, value, queue=queue, wait_for=wait_for))
1590
+
1591
+ return self
1592
+
1593
+ def __len__(self):
1594
+ """Returns the size of the leading dimension of *self*."""
1595
+ if len(self.shape):
1596
+ return self.shape[0]
1597
+ else:
1598
+ return TypeError("len() of unsized object")
1599
+
1600
+ def __abs__(self):
1601
+ """Return an ``Array`` of the absolute values of the elements
1602
+ of *self*.
1603
+ """
1604
+
1605
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1606
+ result.add_event(self._abs(result, self))
1607
+ return result
1608
+
1609
+ def __pow__(self, other):
1610
+ """Exponentiation by a scalar or elementwise by another
1611
+ :class:`Array`.
1612
+ """
1613
+
1614
+ if isinstance(other, Array):
1615
+ assert self.shape == other.shape
1616
+
1617
+ result = self._new_like_me(
1618
+ _get_common_dtype(self, other, self.queue))
1619
+ result.add_event(
1620
+ self._pow_array(result, self, other))
1621
+ return result
1622
+ elif np.isscalar(other):
1623
+ result = self._new_like_me(
1624
+ _get_common_dtype(self, other, self.queue))
1625
+ result.add_event(self._pow_scalar(result, self, other))
1626
+ return result
1627
+ else:
1628
+ return NotImplemented
1629
+
1630
+ def __rpow__(self, other):
1631
+ if np.isscalar(other):
1632
+ common_dtype = _get_common_dtype(self, other, self.queue)
1633
+ result = self._new_like_me(common_dtype)
1634
+ result.add_event(
1635
+ self._rpow_scalar(result, common_dtype.type(other), self))
1636
+ return result
1637
+ else:
1638
+ return NotImplemented
1639
+
1640
+ def __invert__(self):
1641
+ if not np.issubdtype(self.dtype, np.integer):
1642
+ raise TypeError(f"Integral types only: {self.dtype}")
1643
+
1644
+ result = self._new_like_me()
1645
+ result.add_event(self._unop(result, self, op="~"))
1646
+
1647
+ return result
1648
+
1649
+ # }}}
1650
+
1651
+ def reverse(self, queue=None):
1652
+ """Return this array in reversed order. The array is treated
1653
+ as one-dimensional.
1654
+ """
1655
+
1656
+ result = self._new_like_me()
1657
+ result.add_event(self._reverse(result, self))
1658
+ return result
1659
+
1660
+ def astype(self, dtype, queue=None):
1661
+ """Return a copy of *self*, cast to *dtype*."""
1662
+ if dtype == self.dtype:
1663
+ return self.copy()
1664
+
1665
+ result = self._new_like_me(dtype=dtype)
1666
+ result.add_event(self._copy(result, self, queue=queue))
1667
+ return result
1668
+
1669
+ # {{{ rich comparisons, any, all
1670
+
1671
+ def __bool__(self):
1672
+ if self.shape == ():
1673
+ return bool(self.get())
1674
+ else:
1675
+ raise ValueError("The truth value of an array with "
1676
+ "more than one element is ambiguous. Use a.any() or a.all()")
1677
+
1678
+ def any(self, queue=None, wait_for=None):
1679
+ from pyopencl.reduction import get_any_kernel
1680
+ krnl = get_any_kernel(self.context, self.dtype)
1681
+ if wait_for is None:
1682
+ wait_for = []
1683
+ result, event1 = krnl(self, queue=queue,
1684
+ wait_for=wait_for + self.events, return_event=True)
1685
+ result.add_event(event1)
1686
+ return result
1687
+
1688
+ def all(self, queue=None, wait_for=None):
1689
+ from pyopencl.reduction import get_all_kernel
1690
+ krnl = get_all_kernel(self.context, self.dtype)
1691
+ if wait_for is None:
1692
+ wait_for = []
1693
+ result, event1 = krnl(self, queue=queue,
1694
+ wait_for=wait_for + self.events, return_event=True)
1695
+ result.add_event(event1)
1696
+ return result
1697
+
1698
+ @staticmethod
1699
+ @elwise_kernel_runner
1700
+ def _scalar_comparison(out, a, b, queue=None, op=None):
1701
+ return elementwise.get_array_scalar_comparison_kernel(
1702
+ out.context, op, a.dtype)
1703
+
1704
+ @staticmethod
1705
+ @elwise_kernel_runner
1706
+ def _array_comparison(out, a, b, queue=None, op=None):
1707
+ if a.shape != b.shape:
1708
+ raise ValueError("shapes of comparison arguments do not match")
1709
+ return elementwise.get_array_comparison_kernel(
1710
+ out.context, op, a.dtype, b.dtype)
1711
+
1712
+ def __eq__(self, other):
1713
+ if isinstance(other, Array):
1714
+ result = self._new_like_me(_BOOL_DTYPE)
1715
+ result.add_event(
1716
+ self._array_comparison(result, self, other, op="=="))
1717
+ return result
1718
+ elif np.isscalar(other):
1719
+ result = self._new_like_me(_BOOL_DTYPE)
1720
+ result.add_event(
1721
+ self._scalar_comparison(result, self, other, op="=="))
1722
+ return result
1723
+ else:
1724
+ return NotImplemented
1725
+
1726
+ def __ne__(self, other):
1727
+ if isinstance(other, Array):
1728
+ result = self._new_like_me(_BOOL_DTYPE)
1729
+ result.add_event(
1730
+ self._array_comparison(result, self, other, op="!="))
1731
+ return result
1732
+ elif np.isscalar(other):
1733
+ result = self._new_like_me(_BOOL_DTYPE)
1734
+ result.add_event(
1735
+ self._scalar_comparison(result, self, other, op="!="))
1736
+ return result
1737
+ else:
1738
+ return NotImplemented
1739
+
1740
+ def __le__(self, other):
1741
+ if isinstance(other, Array):
1742
+ result = self._new_like_me(_BOOL_DTYPE)
1743
+ result.add_event(
1744
+ self._array_comparison(result, self, other, op="<="))
1745
+ return result
1746
+ elif np.isscalar(other):
1747
+ result = self._new_like_me(_BOOL_DTYPE)
1748
+ self._scalar_comparison(result, self, other, op="<=")
1749
+ return result
1750
+ else:
1751
+ return NotImplemented
1752
+
1753
+ def __ge__(self, other):
1754
+ if isinstance(other, Array):
1755
+ result = self._new_like_me(_BOOL_DTYPE)
1756
+ result.add_event(
1757
+ self._array_comparison(result, self, other, op=">="))
1758
+ return result
1759
+ elif np.isscalar(other):
1760
+ result = self._new_like_me(_BOOL_DTYPE)
1761
+ result.add_event(
1762
+ self._scalar_comparison(result, self, other, op=">="))
1763
+ return result
1764
+ else:
1765
+ return NotImplemented
1766
+
1767
+ def __lt__(self, other):
1768
+ if isinstance(other, Array):
1769
+ result = self._new_like_me(_BOOL_DTYPE)
1770
+ result.add_event(
1771
+ self._array_comparison(result, self, other, op="<"))
1772
+ return result
1773
+ elif np.isscalar(other):
1774
+ result = self._new_like_me(_BOOL_DTYPE)
1775
+ result.add_event(
1776
+ self._scalar_comparison(result, self, other, op="<"))
1777
+ return result
1778
+ else:
1779
+ return NotImplemented
1780
+
1781
+ def __gt__(self, other):
1782
+ if isinstance(other, Array):
1783
+ result = self._new_like_me(_BOOL_DTYPE)
1784
+ result.add_event(
1785
+ self._array_comparison(result, self, other, op=">"))
1786
+ return result
1787
+ elif np.isscalar(other):
1788
+ result = self._new_like_me(_BOOL_DTYPE)
1789
+ result.add_event(
1790
+ self._scalar_comparison(result, self, other, op=">"))
1791
+ return result
1792
+ else:
1793
+ return NotImplemented
1794
+
1795
+ # }}}
1796
+
1797
+ # {{{ complex-valued business
1798
+
1799
+ @property
1800
+ def real(self):
1801
+ """
1802
+ .. versionadded:: 2012.1
1803
+ """
1804
+ if self.dtype.kind == "c":
1805
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1806
+ result.add_event(
1807
+ self._real(result, self))
1808
+ return result
1809
+ else:
1810
+ return self
1811
+
1812
+ @property
1813
+ def imag(self):
1814
+ """
1815
+ .. versionadded:: 2012.1
1816
+ """
1817
+ if self.dtype.kind == "c":
1818
+ result = self._new_like_me(self.dtype.type(0).real.dtype)
1819
+ result.add_event(
1820
+ self._imag(result, self))
1821
+ return result
1822
+ else:
1823
+ return zeros_like(self)
1824
+
1825
+ def conj(self):
1826
+ """
1827
+ .. versionadded:: 2012.1
1828
+ """
1829
+ if self.dtype.kind == "c":
1830
+ result = self._new_like_me()
1831
+ result.add_event(self._conj(result, self))
1832
+ return result
1833
+ else:
1834
+ return self
1835
+
1836
+ conjugate = conj
1837
+
1838
+ # }}}
1839
+
1840
+ # {{{ event management
1841
+
1842
+ def add_event(self, evt):
1843
+ """Add *evt* to :attr:`events`. If :attr:`events` is too long, this method
1844
+ may implicitly wait for a subset of :attr:`events` and clear them from the
1845
+ list.
1846
+ """
1847
+ n_wait = 4
1848
+
1849
+ self.events.append(evt)
1850
+
1851
+ if len(self.events) > 3*n_wait:
1852
+ wait_events = self.events[:n_wait]
1853
+ cl.wait_for_events(wait_events)
1854
+ del self.events[:n_wait]
1855
+
1856
+ def finish(self):
1857
+ """Wait for the entire contents of :attr:`events`, clear it."""
1858
+
1859
+ if self.events:
1860
+ cl.wait_for_events(self.events)
1861
+ del self.events[:]
1862
+
1863
+ # }}}
1864
+
1865
+ # {{{ views
1866
+
1867
+ def reshape(self, *shape, **kwargs):
1868
+ """Returns an array containing the same data with a new shape."""
1869
+
1870
+ order = kwargs.pop("order", "C")
1871
+ if kwargs:
1872
+ raise TypeError("unexpected keyword arguments: %s"
1873
+ % list(kwargs.keys()))
1874
+
1875
+ if order not in "CF":
1876
+ raise ValueError("order must be either 'C' or 'F'")
1877
+
1878
+ # TODO: add more error-checking, perhaps
1879
+
1880
+ # FIXME: The following is overly conservative. As long as we don't change
1881
+ # our memory footprint, we're good.
1882
+
1883
+ # if not self.flags.forc:
1884
+ # raise RuntimeError("only contiguous arrays may "
1885
+ # "be used as arguments to this operation")
1886
+
1887
+ if isinstance(shape[0], tuple) or isinstance(shape[0], list):
1888
+ shape = tuple(shape[0])
1889
+
1890
+ if -1 in shape:
1891
+ shape = list(shape)
1892
+ idx = shape.index(-1)
1893
+ size = -reduce(lambda x, y: x * y, shape, 1)
1894
+ if size == 0:
1895
+ shape[idx] = 0
1896
+ else:
1897
+ shape[idx] = self.size // size
1898
+ if builtins.any(s < 0 for s in shape):
1899
+ raise ValueError("can only specify one unknown dimension")
1900
+ shape = tuple(shape)
1901
+
1902
+ if shape == self.shape:
1903
+ return self._new_with_changes(
1904
+ data=self.base_data, offset=self.offset, shape=shape,
1905
+ strides=self.strides)
1906
+
1907
+ import operator
1908
+ size = reduce(operator.mul, shape, 1)
1909
+ if size != self.size:
1910
+ raise ValueError("total size of new array must be unchanged")
1911
+
1912
+ if self.size == 0:
1913
+ return self._new_with_changes(
1914
+ data=None, offset=0, shape=shape,
1915
+ strides=(
1916
+ _f_contiguous_strides(self.dtype.itemsize, shape)
1917
+ if order == "F" else
1918
+ _c_contiguous_strides(self.dtype.itemsize, shape)
1919
+ ))
1920
+
1921
+ # {{{ determine reshaped strides
1922
+
1923
+ # copied and translated from
1924
+ # https://github.com/numpy/numpy/blob/4083883228d61a3b571dec640185b5a5d983bf59/numpy/core/src/multiarray/shape.c # noqa: E501
1925
+
1926
+ newdims = shape
1927
+ newnd = len(newdims)
1928
+
1929
+ # Remove axes with dimension 1 from the old array. They have no effect
1930
+ # but would need special cases since their strides do not matter.
1931
+
1932
+ olddims = []
1933
+ oldstrides = []
1934
+ for oi in range(len(self.shape)):
1935
+ s = self.shape[oi]
1936
+ if s != 1:
1937
+ olddims.append(s)
1938
+ oldstrides.append(self.strides[oi])
1939
+
1940
+ oldnd = len(olddims)
1941
+
1942
+ newstrides = [-1]*len(newdims)
1943
+
1944
+ # oi to oj and ni to nj give the axis ranges currently worked with
1945
+ oi = 0
1946
+ oj = 1
1947
+ ni = 0
1948
+ nj = 1
1949
+ while ni < newnd and oi < oldnd:
1950
+ np = newdims[ni]
1951
+ op = olddims[oi]
1952
+
1953
+ while np != op:
1954
+ if np < op:
1955
+ # Misses trailing 1s, these are handled later
1956
+ np *= newdims[nj]
1957
+ nj += 1
1958
+ else:
1959
+ op *= olddims[oj]
1960
+ oj += 1
1961
+
1962
+ # Check whether the original axes can be combined
1963
+ for ok in range(oi, oj-1):
1964
+ if order == "F":
1965
+ if oldstrides[ok+1] != olddims[ok]*oldstrides[ok]:
1966
+ raise ValueError("cannot reshape without copy")
1967
+ else:
1968
+ # C order
1969
+ if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]):
1970
+ raise ValueError("cannot reshape without copy")
1971
+
1972
+ # Calculate new strides for all axes currently worked with
1973
+ if order == "F":
1974
+ newstrides[ni] = oldstrides[oi]
1975
+ for nk in range(ni+1, nj):
1976
+ newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1]
1977
+ else:
1978
+ # C order
1979
+ newstrides[nj - 1] = oldstrides[oj - 1]
1980
+ for nk in range(nj-1, ni, -1):
1981
+ newstrides[nk - 1] = newstrides[nk]*newdims[nk]
1982
+
1983
+ ni = nj
1984
+ nj += 1
1985
+
1986
+ oi = oj
1987
+ oj += 1
1988
+
1989
+ # Set strides corresponding to trailing 1s of the new shape.
1990
+ if ni >= 1:
1991
+ last_stride = newstrides[ni - 1]
1992
+ else:
1993
+ last_stride = self.dtype.itemsize
1994
+
1995
+ if order == "F":
1996
+ last_stride *= newdims[ni - 1]
1997
+
1998
+ for nk in range(ni, len(shape)):
1999
+ newstrides[nk] = last_stride
2000
+
2001
+ # }}}
2002
+
2003
+ return self._new_with_changes(
2004
+ data=self.base_data, offset=self.offset, shape=shape,
2005
+ strides=tuple(newstrides))
2006
+
2007
+ def ravel(self, order="C"):
2008
+ """Returns flattened array containing the same data."""
2009
+ return self.reshape(self.size, order=order)
2010
+
2011
+ def view(self, dtype=None):
2012
+ """Returns view of array with the same data. If *dtype* is different
2013
+ from current dtype, the actual bytes of memory will be reinterpreted.
2014
+ """
2015
+
2016
+ if dtype is None:
2017
+ dtype = self.dtype
2018
+
2019
+ old_itemsize = self.dtype.itemsize
2020
+ itemsize = np.dtype(dtype).itemsize
2021
+
2022
+ from pytools import argmin2
2023
+ min_stride_axis = argmin2(
2024
+ (axis, abs(stride))
2025
+ for axis, stride in enumerate(self.strides))
2026
+
2027
+ if self.shape[min_stride_axis] * old_itemsize % itemsize != 0:
2028
+ raise ValueError("new type not compatible with array")
2029
+
2030
+ new_shape = (
2031
+ self.shape[:min_stride_axis]
2032
+ + (self.shape[min_stride_axis] * old_itemsize // itemsize,)
2033
+ + self.shape[min_stride_axis+1:])
2034
+ new_strides = (
2035
+ self.strides[:min_stride_axis]
2036
+ + (self.strides[min_stride_axis] * itemsize // old_itemsize,)
2037
+ + self.strides[min_stride_axis+1:])
2038
+
2039
+ return self._new_with_changes(
2040
+ self.base_data, self.offset,
2041
+ shape=new_shape, dtype=dtype,
2042
+ strides=new_strides)
2043
+
2044
+ def squeeze(self):
2045
+ """Returns a view of the array with dimensions of
2046
+ length 1 removed.
2047
+
2048
+ .. versionadded:: 2015.2
2049
+ """
2050
+ new_shape = tuple([dim for dim in self.shape if dim > 1])
2051
+ new_strides = tuple([self.strides[i]
2052
+ for i, dim in enumerate(self.shape) if dim > 1])
2053
+
2054
+ return self._new_with_changes(
2055
+ self.base_data, self.offset,
2056
+ shape=new_shape, strides=new_strides)
2057
+
2058
+ def transpose(self, axes=None):
2059
+ """Permute the dimensions of an array.
2060
+
2061
+ :arg axes: list of ints, optional.
2062
+ By default, reverse the dimensions, otherwise permute the axes
2063
+ according to the values given.
2064
+
2065
+ :returns: :class:`Array` A view of the array with its axes permuted.
2066
+
2067
+ .. versionadded:: 2015.2
2068
+ """
2069
+
2070
+ if axes is None:
2071
+ axes = range(self.ndim-1, -1, -1)
2072
+
2073
+ if len(axes) != len(self.shape):
2074
+ raise ValueError("axes don't match array")
2075
+
2076
+ new_shape = [self.shape[axes[i]] for i in range(len(axes))]
2077
+ new_strides = [self.strides[axes[i]] for i in range(len(axes))]
2078
+
2079
+ return self._new_with_changes(
2080
+ self.base_data, self.offset,
2081
+ shape=tuple(new_shape),
2082
+ strides=tuple(new_strides))
2083
+
2084
+ @property
2085
+ def T(self): # noqa: N802
2086
+ """
2087
+ .. versionadded:: 2015.2
2088
+ """
2089
+ return self.transpose()
2090
+
2091
+ # }}}
2092
+
2093
+ def map_to_host(self, queue=None, flags=None, is_blocking=True, wait_for=None):
2094
+ """If *is_blocking*, return a :class:`numpy.ndarray` corresponding to the
2095
+ same memory as *self*.
2096
+
2097
+ If *is_blocking* is not true, return a tuple ``(ary, evt)``, where
2098
+ *ary* is the above-mentioned array.
2099
+
2100
+ The host array is obtained using :func:`pyopencl.enqueue_map_buffer`.
2101
+ See there for further details.
2102
+
2103
+ :arg flags: A combination of :class:`pyopencl.map_flags`.
2104
+ Defaults to read-write.
2105
+
2106
+ .. versionadded :: 2013.2
2107
+ """
2108
+
2109
+ if flags is None:
2110
+ flags = cl.map_flags.READ | cl.map_flags.WRITE
2111
+ if wait_for is None:
2112
+ wait_for = []
2113
+
2114
+ ary, evt = cl.enqueue_map_buffer(
2115
+ queue or self.queue, self.base_data, flags, self.offset,
2116
+ self.shape, self.dtype, strides=self.strides,
2117
+ wait_for=wait_for + self.events, is_blocking=is_blocking)
2118
+
2119
+ if is_blocking:
2120
+ return ary
2121
+ else:
2122
+ return ary, evt
2123
+
2124
+ # {{{ getitem/setitem
2125
+
2126
+ def __getitem__(self, index):
2127
+ """
2128
+ .. versionadded:: 2013.1
2129
+ """
2130
+
2131
+ if isinstance(index, Array):
2132
+ if index.dtype.kind not in ("i", "u"):
2133
+ raise TypeError(
2134
+ "fancy indexing is only allowed with integers")
2135
+ if len(index.shape) != 1:
2136
+ raise NotImplementedError(
2137
+ "multidimensional fancy indexing is not supported")
2138
+ if len(self.shape) != 1:
2139
+ raise NotImplementedError(
2140
+ "fancy indexing into a multi-d array is not supported")
2141
+
2142
+ return take(self, index)
2143
+
2144
+ if not isinstance(index, tuple):
2145
+ index = (index,)
2146
+
2147
+ new_shape = []
2148
+ new_offset = self.offset
2149
+ new_strides = []
2150
+
2151
+ seen_ellipsis = False
2152
+
2153
+ index_axis = 0
2154
+ array_axis = 0
2155
+ while index_axis < len(index):
2156
+ index_entry = index[index_axis]
2157
+
2158
+ if array_axis > len(self.shape):
2159
+ raise IndexError("too many axes in index")
2160
+
2161
+ if isinstance(index_entry, slice):
2162
+ start, stop, idx_stride = index_entry.indices(
2163
+ self.shape[array_axis])
2164
+
2165
+ array_stride = self.strides[array_axis]
2166
+
2167
+ new_shape.append((abs(stop-start)-1)//abs(idx_stride)+1)
2168
+ new_strides.append(idx_stride*array_stride)
2169
+ new_offset += array_stride*start
2170
+
2171
+ index_axis += 1
2172
+ array_axis += 1
2173
+
2174
+ elif isinstance(index_entry, (int, np.integer)):
2175
+ array_shape = self.shape[array_axis]
2176
+ if index_entry < 0:
2177
+ index_entry += array_shape
2178
+
2179
+ if not (0 <= index_entry < array_shape):
2180
+ raise IndexError(
2181
+ "subindex in axis %d out of range" % index_axis)
2182
+
2183
+ new_offset += self.strides[array_axis]*index_entry
2184
+
2185
+ index_axis += 1
2186
+ array_axis += 1
2187
+
2188
+ elif index_entry is Ellipsis:
2189
+ index_axis += 1
2190
+
2191
+ remaining_index_count = len(index) - index_axis
2192
+ new_array_axis = len(self.shape) - remaining_index_count
2193
+ if new_array_axis < array_axis:
2194
+ raise IndexError("invalid use of ellipsis in index")
2195
+ while array_axis < new_array_axis:
2196
+ new_shape.append(self.shape[array_axis])
2197
+ new_strides.append(self.strides[array_axis])
2198
+ array_axis += 1
2199
+
2200
+ if seen_ellipsis:
2201
+ raise IndexError(
2202
+ "more than one ellipsis not allowed in index")
2203
+ seen_ellipsis = True
2204
+
2205
+ elif index_entry is np.newaxis:
2206
+ new_shape.append(1)
2207
+ new_strides.append(0)
2208
+ index_axis += 1
2209
+
2210
+ else:
2211
+ raise IndexError("invalid subindex in axis %d" % index_axis)
2212
+
2213
+ while array_axis < len(self.shape):
2214
+ new_shape.append(self.shape[array_axis])
2215
+ new_strides.append(self.strides[array_axis])
2216
+
2217
+ array_axis += 1
2218
+
2219
+ return self._new_with_changes(
2220
+ self.base_data, offset=new_offset,
2221
+ shape=tuple(new_shape),
2222
+ strides=tuple(new_strides))
2223
+
2224
+ def setitem(self, subscript, value, queue=None, wait_for=None):
2225
+ """Like :meth:`__setitem__`, but with the ability to specify
2226
+ a *queue* and *wait_for*.
2227
+
2228
+ .. versionadded:: 2013.1
2229
+
2230
+ .. versionchanged:: 2013.2
2231
+
2232
+ Added *wait_for*.
2233
+ """
2234
+
2235
+ queue = queue or self.queue or value.queue
2236
+ if wait_for is None:
2237
+ wait_for = []
2238
+ wait_for = wait_for + self.events
2239
+
2240
+ if isinstance(subscript, Array):
2241
+ if subscript.dtype.kind not in ("i", "u"):
2242
+ raise TypeError(
2243
+ "fancy indexing is only allowed with integers")
2244
+ if len(subscript.shape) != 1:
2245
+ raise NotImplementedError(
2246
+ "multidimensional fancy indexing is not supported")
2247
+ if len(self.shape) != 1:
2248
+ raise NotImplementedError(
2249
+ "fancy indexing into a multi-d array is not supported")
2250
+
2251
+ multi_put([value], subscript, out=[self], queue=queue,
2252
+ wait_for=wait_for)
2253
+ return
2254
+
2255
+ subarray = self[subscript]
2256
+
2257
+ if not subarray.size:
2258
+ # This prevents errors about mismatched strides that neither we
2259
+ # nor numpy worry about in the empty case.
2260
+ return
2261
+
2262
+ if isinstance(value, np.ndarray):
2263
+ if subarray.shape == value.shape and subarray.strides == value.strides:
2264
+ self.add_event(
2265
+ cl.enqueue_copy(queue, subarray.base_data,
2266
+ value, dst_offset=subarray.offset, wait_for=wait_for))
2267
+ return
2268
+ else:
2269
+ value = to_device(queue, value, self.allocator)
2270
+
2271
+ if isinstance(value, Array):
2272
+ if len(subarray.shape) != len(value.shape):
2273
+ raise NotImplementedError("broadcasting is not "
2274
+ "supported in __setitem__")
2275
+ if subarray.shape != value.shape:
2276
+ raise ValueError("cannot assign between arrays of "
2277
+ "differing shapes")
2278
+ if subarray.strides != value.strides:
2279
+ raise NotImplementedError("cannot assign between arrays of "
2280
+ "differing strides")
2281
+
2282
+ self.add_event(
2283
+ self._copy(subarray, value, queue=queue, wait_for=wait_for))
2284
+
2285
+ else:
2286
+ # Let's assume it's a scalar
2287
+ subarray.fill(value, queue=queue, wait_for=wait_for)
2288
+
2289
+ def __setitem__(self, subscript, value):
2290
+ """Set the slice of *self* identified *subscript* to *value*.
2291
+
2292
+ *value* is allowed to be:
2293
+
2294
+ * A :class:`Array` of the same :attr:`shape` and (for now) :attr:`strides`,
2295
+ but with potentially different :attr:`dtype`.
2296
+ * A :class:`numpy.ndarray` of the same :attr:`shape` and (for now)
2297
+ :attr:`strides`, but with potentially different :attr:`dtype`.
2298
+ * A scalar.
2299
+
2300
+ Non-scalar broadcasting is not currently supported.
2301
+
2302
+ .. versionadded:: 2013.1
2303
+ """
2304
+ self.setitem(subscript, value)
2305
+
2306
+ # }}}
2307
+
2308
+ # }}}
2309
+
2310
+
2311
+ # {{{ creation helpers
2312
+
2313
+ def as_strided(ary, shape=None, strides=None):
2314
+ """Make an :class:`Array` from the given array with the given
2315
+ shape and strides.
2316
+ """
2317
+
2318
+ # undocumented for the moment
2319
+
2320
+ if shape is None:
2321
+ shape = ary.shape
2322
+ if strides is None:
2323
+ strides = ary.strides
2324
+
2325
+ return Array(ary.queue, shape, ary.dtype, allocator=ary.allocator,
2326
+ data=ary.data, strides=strides)
2327
+
2328
+
2329
+ class _same_as_transfer: # noqa: N801
2330
+ pass
2331
+
2332
+
2333
+ def to_device(queue, ary, allocator=None, async_=None,
2334
+ array_queue=_same_as_transfer, **kwargs):
2335
+ """Return a :class:`Array` that is an exact copy of the
2336
+ :class:`numpy.ndarray` instance *ary*.
2337
+
2338
+ :arg array_queue: The :class:`~pyopencl.CommandQueue` which will
2339
+ be stored in the resulting array. Useful
2340
+ to make sure there is no implicit queue associated
2341
+ with the array by passing *None*.
2342
+
2343
+ See :class:`Array` for the meaning of *allocator*.
2344
+
2345
+ .. versionchanged:: 2015.2
2346
+ *array_queue* argument was added.
2347
+
2348
+ .. versionchanged:: 2017.2.1
2349
+
2350
+ Python 3.7 makes ``async`` a reserved keyword. On older Pythons,
2351
+ we will continue to accept *async* as a parameter, however this
2352
+ should be considered deprecated. *async_* is the new, official
2353
+ spelling.
2354
+ """
2355
+
2356
+ # {{{ handle 'async' deprecation
2357
+
2358
+ async_arg = kwargs.pop("async", None)
2359
+ if async_arg is not None:
2360
+ if async_ is not None:
2361
+ raise TypeError("may not specify both 'async' and 'async_'")
2362
+ async_ = async_arg
2363
+
2364
+ if async_ is None:
2365
+ async_ = False
2366
+
2367
+ if kwargs:
2368
+ raise TypeError("extra keyword arguments specified: %s"
2369
+ % ", ".join(kwargs))
2370
+
2371
+ # }}}
2372
+
2373
+ if ary.dtype == object:
2374
+ raise RuntimeError("to_device does not work on object arrays.")
2375
+
2376
+ if array_queue is _same_as_transfer:
2377
+ first_arg = queue
2378
+ else:
2379
+ first_arg = queue.context
2380
+
2381
+ result = Array(first_arg, ary.shape, ary.dtype,
2382
+ allocator=allocator, strides=ary.strides)
2383
+ result.set(ary, async_=async_, queue=queue)
2384
+ return result
2385
+
2386
+
2387
+ empty = Array
2388
+
2389
+
2390
+ def zeros(queue, shape, dtype, order="C", allocator=None):
2391
+ """Same as :func:`empty`, but the :class:`Array` is zero-initialized before
2392
+ being returned.
2393
+
2394
+ .. versionchanged:: 2011.1
2395
+ *context* argument was deprecated.
2396
+ """
2397
+
2398
+ result = Array(None, shape, dtype,
2399
+ order=order, allocator=allocator,
2400
+ _context=queue.context, _queue=queue)
2401
+ result._zero_fill()
2402
+ return result
2403
+
2404
+
2405
+ def empty_like(ary, queue=_copy_queue, allocator=None):
2406
+ """Make a new, uninitialized :class:`Array` having the same properties
2407
+ as *other_ary*.
2408
+ """
2409
+
2410
+ return ary._new_with_changes(data=None, offset=0, queue=queue,
2411
+ allocator=allocator)
2412
+
2413
+
2414
+ def zeros_like(ary):
2415
+ """Make a new, zero-initialized :class:`Array` having the same properties
2416
+ as *other_ary*.
2417
+ """
2418
+
2419
+ result = ary._new_like_me()
2420
+ result._zero_fill()
2421
+ return result
2422
+
2423
+
2424
+ @dataclass
2425
+ class _ArangeInfo:
2426
+ start: Optional[int] = None
2427
+ stop: Optional[int] = None
2428
+ step: Optional[int] = None
2429
+ dtype: Optional["np.dtype"] = None
2430
+ allocator: Optional[Any] = None
2431
+
2432
+
2433
+ @elwise_kernel_runner
2434
+ def _arange_knl(result, start, step):
2435
+ return elementwise.get_arange_kernel(
2436
+ result.context, result.dtype)
2437
+
2438
+
2439
+ def arange(queue, *args, **kwargs):
2440
+ """arange(queue, [start, ] stop [, step], **kwargs)
2441
+ Create a :class:`Array` filled with numbers spaced *step* apart,
2442
+ starting from *start* and ending at *stop*. If not given, *start*
2443
+ defaults to 0, *step* defaults to 1.
2444
+
2445
+ For floating point arguments, the length of the result is
2446
+ ``ceil((stop - start)/step)``. This rule may result in the last
2447
+ element of the result being greater than *stop*.
2448
+
2449
+ *dtype* is a required keyword argument.
2450
+
2451
+ .. versionchanged:: 2011.1
2452
+ *context* argument was deprecated.
2453
+
2454
+ .. versionchanged:: 2011.2
2455
+ *allocator* keyword argument was added.
2456
+ """
2457
+
2458
+ # {{{ argument processing
2459
+
2460
+ # Yuck. Thanks, numpy developers. ;)
2461
+
2462
+ explicit_dtype = False
2463
+ inf = _ArangeInfo()
2464
+
2465
+ if isinstance(args[-1], np.dtype):
2466
+ inf.dtype = args[-1]
2467
+ args = args[:-1]
2468
+ explicit_dtype = True
2469
+
2470
+ argc = len(args)
2471
+ if argc == 0:
2472
+ raise ValueError("stop argument required")
2473
+ elif argc == 1:
2474
+ inf.stop = args[0]
2475
+ elif argc == 2:
2476
+ inf.start = args[0]
2477
+ inf.stop = args[1]
2478
+ elif argc == 3:
2479
+ inf.start = args[0]
2480
+ inf.stop = args[1]
2481
+ inf.step = args[2]
2482
+ else:
2483
+ raise ValueError("too many arguments")
2484
+
2485
+ admissible_names = ["start", "stop", "step", "dtype", "allocator"]
2486
+ for k, v in kwargs.items():
2487
+ if k in admissible_names:
2488
+ if getattr(inf, k) is None:
2489
+ setattr(inf, k, v)
2490
+ if k == "dtype":
2491
+ explicit_dtype = True
2492
+ else:
2493
+ raise ValueError(f"may not specify '{k}' by position and keyword")
2494
+ else:
2495
+ raise ValueError(f"unexpected keyword argument '{k}'")
2496
+
2497
+ if inf.start is None:
2498
+ inf.start = 0
2499
+ if inf.step is None:
2500
+ inf.step = 1
2501
+ if inf.dtype is None:
2502
+ inf.dtype = np.array([inf.start, inf.stop, inf.step]).dtype
2503
+
2504
+ # }}}
2505
+
2506
+ # {{{ actual functionality
2507
+
2508
+ dtype = np.dtype(inf.dtype)
2509
+ start = dtype.type(inf.start)
2510
+ step = dtype.type(inf.step)
2511
+ stop = dtype.type(inf.stop)
2512
+
2513
+ if not explicit_dtype:
2514
+ raise TypeError("arange requires a dtype argument")
2515
+
2516
+ from math import ceil
2517
+ size = int(ceil((stop-start)/step))
2518
+
2519
+ result = Array(queue, (size,), dtype, allocator=inf.allocator)
2520
+ result.add_event(_arange_knl(result, start, step, queue=queue))
2521
+
2522
+ # }}}
2523
+
2524
+ return result
2525
+
2526
+ # }}}
2527
+
2528
+
2529
+ # {{{ take/put/concatenate/diff/(h?stack)
2530
+
2531
+ @elwise_kernel_runner
2532
+ def _take(result, ary, indices):
2533
+ return elementwise.get_take_kernel(
2534
+ result.context, result.dtype, indices.dtype)
2535
+
2536
+
2537
+ def take(a, indices, out=None, queue=None, wait_for=None):
2538
+ """Return the :class:`Array` ``[a[indices[0]], ..., a[indices[n]]]``.
2539
+ For the moment, *a* must be a type that can be bound to a texture.
2540
+ """
2541
+
2542
+ queue = queue or a.queue
2543
+ if out is None:
2544
+ out = type(a)(queue, indices.shape, a.dtype, allocator=a.allocator)
2545
+
2546
+ assert len(indices.shape) == 1
2547
+ out.add_event(
2548
+ _take(out, a, indices, queue=queue, wait_for=wait_for))
2549
+ return out
2550
+
2551
+
2552
+ def multi_take(arrays, indices, out=None, queue=None):
2553
+ if not len(arrays):
2554
+ return []
2555
+
2556
+ assert len(indices.shape) == 1
2557
+
2558
+ from pytools import single_valued
2559
+ a_dtype = single_valued(a.dtype for a in arrays)
2560
+ a_allocator = arrays[0].dtype
2561
+ context = indices.context
2562
+ queue = queue or indices.queue
2563
+
2564
+ vec_count = len(arrays)
2565
+
2566
+ if out is None:
2567
+ out = [
2568
+ type(arrays[i])(
2569
+ context, queue, indices.shape, a_dtype,
2570
+ allocator=a_allocator)
2571
+ for i in range(vec_count)]
2572
+ else:
2573
+ if len(out) != len(arrays):
2574
+ raise ValueError("out and arrays must have the same length")
2575
+
2576
+ chunk_size = builtins.min(vec_count, 10)
2577
+
2578
+ def make_func_for_chunk_size(chunk_size):
2579
+ knl = elementwise.get_take_kernel(
2580
+ indices.context, a_dtype, indices.dtype,
2581
+ vec_count=chunk_size)
2582
+ knl.set_block_shape(*indices._block)
2583
+ return knl
2584
+
2585
+ knl = make_func_for_chunk_size(chunk_size)
2586
+
2587
+ for start_i in range(0, len(arrays), chunk_size):
2588
+ chunk_slice = slice(start_i, start_i+chunk_size)
2589
+
2590
+ if start_i + chunk_size > vec_count:
2591
+ knl = make_func_for_chunk_size(vec_count-start_i)
2592
+
2593
+ gs, ls = indices._get_sizes(queue,
2594
+ knl.get_work_group_info(
2595
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2596
+ queue.device))
2597
+
2598
+ wait_for_this = (indices.events
2599
+ + builtins.sum((i.events for i in arrays[chunk_slice]), [])
2600
+ + builtins.sum((o.events for o in out[chunk_slice]), []))
2601
+ evt = knl(queue, gs, ls,
2602
+ indices.data,
2603
+ *([o.data for o in out[chunk_slice]]
2604
+ + [i.data for i in arrays[chunk_slice]]
2605
+ + [indices.size]), wait_for=wait_for_this)
2606
+ for o in out[chunk_slice]:
2607
+ o.add_event(evt)
2608
+
2609
+ return out
2610
+
2611
+
2612
+ def multi_take_put(arrays, dest_indices, src_indices, dest_shape=None,
2613
+ out=None, queue=None, src_offsets=None):
2614
+ if not len(arrays):
2615
+ return []
2616
+
2617
+ from pytools import single_valued
2618
+ a_dtype = single_valued(a.dtype for a in arrays)
2619
+ a_allocator = arrays[0].allocator
2620
+ context = src_indices.context
2621
+ queue = queue or src_indices.queue
2622
+
2623
+ vec_count = len(arrays)
2624
+
2625
+ if out is None:
2626
+ out = [type(arrays[i])(queue, dest_shape, a_dtype, allocator=a_allocator)
2627
+ for i in range(vec_count)]
2628
+ else:
2629
+ if a_dtype != single_valued(o.dtype for o in out):
2630
+ raise TypeError("arrays and out must have the same dtype")
2631
+ if len(out) != vec_count:
2632
+ raise ValueError("out and arrays must have the same length")
2633
+
2634
+ if src_indices.dtype != dest_indices.dtype:
2635
+ raise TypeError(
2636
+ "src_indices and dest_indices must have the same dtype")
2637
+
2638
+ if len(src_indices.shape) != 1:
2639
+ raise ValueError("src_indices must be 1D")
2640
+
2641
+ if src_indices.shape != dest_indices.shape:
2642
+ raise ValueError(
2643
+ "src_indices and dest_indices must have the same shape")
2644
+
2645
+ if src_offsets is None:
2646
+ src_offsets_list = []
2647
+ else:
2648
+ src_offsets_list = src_offsets
2649
+ if len(src_offsets) != vec_count:
2650
+ raise ValueError(
2651
+ "src_indices and src_offsets must have the same length")
2652
+
2653
+ max_chunk_size = 10
2654
+
2655
+ chunk_size = builtins.min(vec_count, max_chunk_size)
2656
+
2657
+ def make_func_for_chunk_size(chunk_size):
2658
+ return elementwise.get_take_put_kernel(context,
2659
+ a_dtype, src_indices.dtype,
2660
+ with_offsets=src_offsets is not None,
2661
+ vec_count=chunk_size)
2662
+
2663
+ knl = make_func_for_chunk_size(chunk_size)
2664
+
2665
+ for start_i in range(0, len(arrays), chunk_size):
2666
+ chunk_slice = slice(start_i, start_i+chunk_size)
2667
+
2668
+ if start_i + chunk_size > vec_count:
2669
+ knl = make_func_for_chunk_size(vec_count-start_i)
2670
+
2671
+ gs, ls = src_indices._get_sizes(queue,
2672
+ knl.get_work_group_info(
2673
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2674
+ queue.device))
2675
+
2676
+ wait_for_this = (dest_indices.events + src_indices.events
2677
+ + builtins.sum((i.events for i in arrays[chunk_slice]), [])
2678
+ + builtins.sum((o.events for o in out[chunk_slice]), []))
2679
+ evt = knl(queue, gs, ls,
2680
+ *(list(out[chunk_slice])
2681
+ + [dest_indices, src_indices]
2682
+ + list(arrays[chunk_slice])
2683
+ + src_offsets_list[chunk_slice]
2684
+ + [src_indices.size]), wait_for=wait_for_this)
2685
+ for o in out[chunk_slice]:
2686
+ o.add_event(evt)
2687
+
2688
+ return out
2689
+
2690
+
2691
+ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None,
2692
+ wait_for=None):
2693
+ if not len(arrays):
2694
+ return []
2695
+
2696
+ from pytools import single_valued
2697
+ a_dtype = single_valued(a.dtype for a in arrays)
2698
+ a_allocator = arrays[0].allocator
2699
+ context = dest_indices.context
2700
+ queue = queue or dest_indices.queue
2701
+ if wait_for is None:
2702
+ wait_for = []
2703
+ wait_for = wait_for + dest_indices.events
2704
+
2705
+ vec_count = len(arrays)
2706
+
2707
+ if out is None:
2708
+ out = [type(arrays[i])(queue, dest_shape, a_dtype, allocator=a_allocator)
2709
+ for i in range(vec_count)]
2710
+ else:
2711
+ if a_dtype != single_valued(o.dtype for o in out):
2712
+ raise TypeError("arrays and out must have the same dtype")
2713
+ if len(out) != vec_count:
2714
+ raise ValueError("out and arrays must have the same length")
2715
+
2716
+ if len(dest_indices.shape) != 1:
2717
+ raise ValueError("dest_indices must be 1D")
2718
+
2719
+ chunk_size = builtins.min(vec_count, 10)
2720
+
2721
+ # array of bools to specify whether the array of same index in this chunk
2722
+ # will be filled with a single value.
2723
+ use_fill = np.ndarray((chunk_size,), dtype=np.uint8)
2724
+ array_lengths = np.ndarray((chunk_size,), dtype=np.int64)
2725
+
2726
+ def make_func_for_chunk_size(chunk_size):
2727
+ knl = elementwise.get_put_kernel(
2728
+ context, a_dtype, dest_indices.dtype,
2729
+ vec_count=chunk_size)
2730
+ return knl
2731
+
2732
+ knl = make_func_for_chunk_size(chunk_size)
2733
+
2734
+ for start_i in range(0, len(arrays), chunk_size):
2735
+ chunk_slice = slice(start_i, start_i+chunk_size)
2736
+ for fill_idx, ary in enumerate(arrays[chunk_slice]):
2737
+ # If there is only one value in the values array for this src array
2738
+ # in the chunk then fill every index in `dest_idx` array with it.
2739
+ use_fill[fill_idx] = 1 if ary.size == 1 else 0
2740
+ array_lengths[fill_idx] = len(ary)
2741
+ # Copy the populated `use_fill` array to a buffer on the device.
2742
+ use_fill_cla = to_device(queue, use_fill)
2743
+ array_lengths_cla = to_device(queue, array_lengths)
2744
+
2745
+ if start_i + chunk_size > vec_count:
2746
+ knl = make_func_for_chunk_size(vec_count-start_i)
2747
+
2748
+ gs, ls = dest_indices._get_sizes(queue,
2749
+ knl.get_work_group_info(
2750
+ cl.kernel_work_group_info.WORK_GROUP_SIZE,
2751
+ queue.device))
2752
+
2753
+ wait_for_this = (wait_for
2754
+ + builtins.sum([i.events for i in arrays[chunk_slice]], [])
2755
+ + builtins.sum([o.events for o in out[chunk_slice]], []))
2756
+ evt = knl(queue, gs, ls,
2757
+ *(
2758
+ list(out[chunk_slice])
2759
+ + [dest_indices]
2760
+ + list(arrays[chunk_slice])
2761
+ + [use_fill_cla, array_lengths_cla, dest_indices.size]),
2762
+ wait_for=wait_for_this)
2763
+
2764
+ for o in out[chunk_slice]:
2765
+ o.add_event(evt)
2766
+
2767
+ return out
2768
+
2769
+
2770
+ def concatenate(arrays, axis=0, queue=None, allocator=None):
2771
+ """
2772
+ .. versionadded:: 2013.1
2773
+
2774
+ .. note::
2775
+
2776
+ The returned array is of the same type as the first array in the list.
2777
+ """
2778
+ if not arrays:
2779
+ raise ValueError("need at least one array to concatenate")
2780
+
2781
+ # {{{ find properties of result array
2782
+
2783
+ shape = None
2784
+
2785
+ for i_ary, ary in enumerate(arrays):
2786
+ queue = queue or ary.queue
2787
+ allocator = allocator or ary.allocator
2788
+
2789
+ if shape is None:
2790
+ # first array
2791
+ shape = list(ary.shape)
2792
+ else:
2793
+ if len(ary.shape) != len(shape):
2794
+ raise ValueError(
2795
+ f"{i_ary}-th array has different number of axes: "
2796
+ f"expected {len(ary.shape)}, got {len(shape)})")
2797
+
2798
+ ary_shape_list = list(ary.shape)
2799
+ if (ary_shape_list[:axis] != shape[:axis]
2800
+ or ary_shape_list[axis+1:] != shape[axis+1:]):
2801
+ raise ValueError(
2802
+ f"{i_ary}-th array has residual not matching other arrays")
2803
+
2804
+ # pylint: disable=unsupported-assignment-operation
2805
+ shape[axis] += ary.shape[axis]
2806
+
2807
+ # }}}
2808
+
2809
+ shape = tuple(shape)
2810
+ dtype = np.result_type(*[ary.dtype for ary in arrays])
2811
+
2812
+ if __debug__:
2813
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2814
+ for ary in arrays[1:]):
2815
+ warn("Elements of 'arrays' not of the same type, returning "
2816
+ "an instance of the type of arrays[0]",
2817
+ stacklevel=2)
2818
+
2819
+ result = arrays[0].__class__(queue, shape, dtype, allocator=allocator)
2820
+
2821
+ full_slice = (slice(None),) * len(shape)
2822
+
2823
+ base_idx = 0
2824
+ for ary in arrays:
2825
+ my_len = ary.shape[axis]
2826
+ result.setitem(
2827
+ full_slice[:axis]
2828
+ + (slice(base_idx, base_idx+my_len),)
2829
+ + full_slice[axis+1:],
2830
+ ary)
2831
+
2832
+ base_idx += my_len
2833
+
2834
+ return result
2835
+
2836
+
2837
+ @elwise_kernel_runner
2838
+ def _diff(result, array):
2839
+ return elementwise.get_diff_kernel(array.context, array.dtype)
2840
+
2841
+
2842
+ def diff(array, queue=None, allocator=None):
2843
+ """
2844
+ .. versionadded:: 2013.2
2845
+ """
2846
+
2847
+ if len(array.shape) != 1:
2848
+ raise ValueError("multi-D arrays are not supported")
2849
+
2850
+ n, = array.shape
2851
+
2852
+ queue = queue or array.queue
2853
+ allocator = allocator or array.allocator
2854
+
2855
+ result = array.__class__(queue, (n-1,), array.dtype, allocator=allocator)
2856
+ event1 = _diff(result, array, queue=queue)
2857
+ result.add_event(event1)
2858
+ return result
2859
+
2860
+
2861
+ def hstack(arrays, queue=None):
2862
+ if len(arrays) == 0:
2863
+ raise ValueError("need at least one array to hstack")
2864
+
2865
+ if queue is None:
2866
+ for ary in arrays:
2867
+ if ary.queue is not None:
2868
+ queue = ary.queue
2869
+ break
2870
+
2871
+ from pytools import all_equal, single_valued
2872
+ if not all_equal(len(ary.shape) for ary in arrays):
2873
+ raise ValueError("arguments must all have the same number of axes")
2874
+
2875
+ lead_shape = single_valued(ary.shape[:-1] for ary in arrays)
2876
+
2877
+ w = builtins.sum([ary.shape[-1] for ary in arrays])
2878
+
2879
+ if __debug__:
2880
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2881
+ for ary in arrays[1:]):
2882
+ warn("Elements of 'arrays' not of the same type, returning "
2883
+ "an instance of the type of arrays[0]",
2884
+ stacklevel=2)
2885
+
2886
+ result = arrays[0].__class__(queue, lead_shape+(w,), arrays[0].dtype,
2887
+ allocator=arrays[0].allocator)
2888
+ index = 0
2889
+ for ary in arrays:
2890
+ result[..., index:index+ary.shape[-1]] = ary
2891
+ index += ary.shape[-1]
2892
+
2893
+ return result
2894
+
2895
+
2896
+ def stack(arrays, axis=0, queue=None):
2897
+ """
2898
+ Join a sequence of arrays along a new axis.
2899
+
2900
+ :arg arrays: A sequence of :class:`Array`.
2901
+ :arg axis: Index of the dimension of the new axis in the result array.
2902
+ Can be -1, for the new axis to be last dimension.
2903
+
2904
+ :returns: :class:`Array`
2905
+ """
2906
+ if not arrays:
2907
+ raise ValueError("need at least one array to stack")
2908
+
2909
+ input_shape = arrays[0].shape
2910
+ input_ndim = arrays[0].ndim
2911
+ axis = input_ndim if axis == -1 else axis
2912
+
2913
+ if queue is None:
2914
+ for ary in arrays:
2915
+ if ary.queue is not None:
2916
+ queue = ary.queue
2917
+ break
2918
+
2919
+ if not builtins.all(ary.shape == input_shape for ary in arrays[1:]):
2920
+ raise ValueError("arrays must have the same shape")
2921
+
2922
+ if not (0 <= axis <= input_ndim):
2923
+ raise ValueError("invalid axis")
2924
+
2925
+ if (axis == 0 and not builtins.all(
2926
+ ary.flags.c_contiguous for ary in arrays)):
2927
+ # pyopencl.Array.__setitem__ does not support non-contiguous assignments
2928
+ raise NotImplementedError
2929
+
2930
+ if (axis == input_ndim and not builtins.all(
2931
+ ary.flags.f_contiguous for ary in arrays)):
2932
+ # pyopencl.Array.__setitem__ does not support non-contiguous assignments
2933
+ raise NotImplementedError
2934
+
2935
+ result_shape = input_shape[:axis] + (len(arrays),) + input_shape[axis:]
2936
+
2937
+ if __debug__:
2938
+ if builtins.any(type(ary) != type(arrays[0]) # noqa: E721
2939
+ for ary in arrays[1:]):
2940
+ warn("Elements of 'arrays' not of the same type, returning "
2941
+ "an instance of the type of arrays[0]",
2942
+ stacklevel=2)
2943
+
2944
+ result = arrays[0].__class__(queue, result_shape,
2945
+ np.result_type(*(ary.dtype
2946
+ for ary in arrays)),
2947
+ # TODO: reconsider once arrays support
2948
+ # non-contiguous assignments
2949
+ order="C" if axis == 0 else "F",
2950
+ allocator=arrays[0].allocator)
2951
+ for i, ary in enumerate(arrays):
2952
+ idx = (slice(None),)*axis + (i,) + (slice(None),)*(input_ndim-axis)
2953
+ result[idx] = ary
2954
+
2955
+ return result
2956
+
2957
+ # }}}
2958
+
2959
+
2960
+ # {{{ shape manipulation
2961
+
2962
+ def transpose(a, axes=None):
2963
+ """Permute the dimensions of an array.
2964
+
2965
+ :arg a: :class:`Array`
2966
+ :arg axes: list of ints, optional.
2967
+ By default, reverse the dimensions, otherwise permute the axes
2968
+ according to the values given.
2969
+
2970
+ :returns: :class:`Array` A view of the array with its axes permuted.
2971
+ """
2972
+ return a.transpose(axes)
2973
+
2974
+
2975
+ def reshape(a, shape):
2976
+ """Gives a new shape to an array without changing its data.
2977
+
2978
+ .. versionadded:: 2015.2
2979
+ """
2980
+
2981
+ return a.reshape(shape)
2982
+
2983
+ # }}}
2984
+
2985
+
2986
+ # {{{ conditionals
2987
+
2988
+ @elwise_kernel_runner
2989
+ def _if_positive(result, criterion, then_, else_):
2990
+ return elementwise.get_if_positive_kernel(
2991
+ result.context, criterion.dtype, then_.dtype,
2992
+ is_then_array=isinstance(then_, Array),
2993
+ is_else_array=isinstance(else_, Array),
2994
+ is_then_scalar=then_.shape == (),
2995
+ is_else_scalar=else_.shape == (),
2996
+ )
2997
+
2998
+
2999
+ def if_positive(criterion, then_, else_, out=None, queue=None):
3000
+ """Return an array like *then_*, which, for the element at index *i*,
3001
+ contains *then_[i]* if *criterion[i]>0*, else *else_[i]*.
3002
+ """
3003
+
3004
+ is_then_scalar = isinstance(then_, SCALAR_CLASSES)
3005
+ is_else_scalar = isinstance(else_, SCALAR_CLASSES)
3006
+ if isinstance(criterion, SCALAR_CLASSES) and is_then_scalar and is_else_scalar:
3007
+ result = np.where(criterion, then_, else_)
3008
+
3009
+ if out is not None:
3010
+ out[...] = result
3011
+ return out
3012
+
3013
+ return result
3014
+
3015
+ if is_then_scalar:
3016
+ then_ = np.array(then_)
3017
+
3018
+ if is_else_scalar:
3019
+ else_ = np.array(else_)
3020
+
3021
+ if then_.dtype != else_.dtype:
3022
+ raise ValueError(
3023
+ f"dtypes do not match: then_ is '{then_.dtype}' and "
3024
+ f"else_ is '{else_.dtype}'")
3025
+
3026
+ if then_.shape == () and else_.shape == ():
3027
+ pass
3028
+ elif then_.shape != () and else_.shape != ():
3029
+ if not (criterion.shape == then_.shape == else_.shape):
3030
+ raise ValueError(
3031
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3032
+ f", 'then_' has shape {then_.shape} and 'else_' has shape "
3033
+ f"{else_.shape}")
3034
+ elif then_.shape == ():
3035
+ if criterion.shape != else_.shape:
3036
+ raise ValueError(
3037
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3038
+ f" and 'else_' has shape {else_.shape}")
3039
+ elif else_.shape == ():
3040
+ if criterion.shape != then_.shape:
3041
+ raise ValueError(
3042
+ f"shapes do not match: 'criterion' has shape {criterion.shape}"
3043
+ f" and 'then_' has shape {then_.shape}")
3044
+ else:
3045
+ raise AssertionError()
3046
+
3047
+ if out is None:
3048
+ if then_.shape != ():
3049
+ out = empty_like(
3050
+ then_, criterion.queue, allocator=criterion.allocator)
3051
+ else:
3052
+ # Use same strides as criterion
3053
+ cr_byte_strides = np.array(criterion.strides, dtype=np.int64)
3054
+ cr_item_strides = cr_byte_strides // criterion.dtype.itemsize
3055
+ out_strides = tuple(cr_item_strides*then_.dtype.itemsize)
3056
+
3057
+ out = type(criterion)(
3058
+ criterion.queue, criterion.shape, then_.dtype,
3059
+ allocator=criterion.allocator,
3060
+ strides=out_strides)
3061
+
3062
+ event1 = _if_positive(out, criterion, then_, else_, queue=queue)
3063
+ out.add_event(event1)
3064
+
3065
+ return out
3066
+
3067
+ # }}}
3068
+
3069
+
3070
+ # {{{ minimum/maximum
3071
+
3072
+ @elwise_kernel_runner
3073
+ def _minimum_maximum_backend(out, a, b, minmax):
3074
+ from pyopencl.elementwise import get_minmaximum_kernel
3075
+ return get_minmaximum_kernel(out.context, minmax,
3076
+ out.dtype,
3077
+ a.dtype if isinstance(a, Array) else np.dtype(type(a)),
3078
+ b.dtype if isinstance(b, Array) else np.dtype(type(b)),
3079
+ elementwise.get_argument_kind(a),
3080
+ elementwise.get_argument_kind(b))
3081
+
3082
+
3083
+ def maximum(a, b, out=None, queue=None):
3084
+ """Return the elementwise maximum of *a* and *b*."""
3085
+
3086
+ a_is_scalar = np.isscalar(a)
3087
+ b_is_scalar = np.isscalar(b)
3088
+ if a_is_scalar and b_is_scalar:
3089
+ result = np.maximum(a, b)
3090
+ if out is not None:
3091
+ out[...] = result
3092
+ return out
3093
+
3094
+ return result
3095
+
3096
+ queue = queue or a.queue or b.queue
3097
+
3098
+ if out is None:
3099
+ out_dtype = _get_common_dtype(a, b, queue)
3100
+ if not a_is_scalar:
3101
+ out = a._new_like_me(out_dtype, queue)
3102
+ elif not b_is_scalar:
3103
+ out = b._new_like_me(out_dtype, queue)
3104
+
3105
+ out.add_event(_minimum_maximum_backend(out, a, b, queue=queue, minmax="max"))
3106
+
3107
+ return out
3108
+
3109
+
3110
+ def minimum(a, b, out=None, queue=None):
3111
+ """Return the elementwise minimum of *a* and *b*."""
3112
+ a_is_scalar = np.isscalar(a)
3113
+ b_is_scalar = np.isscalar(b)
3114
+ if a_is_scalar and b_is_scalar:
3115
+ result = np.minimum(a, b)
3116
+ if out is not None:
3117
+ out[...] = result
3118
+ return out
3119
+
3120
+ return result
3121
+
3122
+ queue = queue or a.queue or b.queue
3123
+
3124
+ if out is None:
3125
+ out_dtype = _get_common_dtype(a, b, queue)
3126
+ if not a_is_scalar:
3127
+ out = a._new_like_me(out_dtype, queue)
3128
+ elif not b_is_scalar:
3129
+ out = b._new_like_me(out_dtype, queue)
3130
+
3131
+ out.add_event(_minimum_maximum_backend(out, a, b, queue=queue, minmax="min"))
3132
+
3133
+ return out
3134
+
3135
+ # }}}
3136
+
3137
+
3138
+ # {{{ logical ops
3139
+
3140
+ def _logical_op(x1, x2, out, operator, queue=None):
3141
+ # NOTE: Copied from pycuda.gpuarray
3142
+ assert operator in ["&&", "||"]
3143
+
3144
+ if np.isscalar(x1) and np.isscalar(x2):
3145
+ if out is None:
3146
+ out = empty(queue, shape=(), dtype=np.int8)
3147
+
3148
+ if operator == "&&":
3149
+ out[:] = np.logical_and(x1, x2)
3150
+ else:
3151
+ out[:] = np.logical_or(x1, x2)
3152
+ elif np.isscalar(x1) or np.isscalar(x2):
3153
+ scalar_arg, = [x for x in (x1, x2) if np.isscalar(x)]
3154
+ ary_arg, = [x for x in (x1, x2) if not np.isscalar(x)]
3155
+ queue = queue or ary_arg.queue
3156
+ allocator = ary_arg.allocator
3157
+
3158
+ if not isinstance(ary_arg, Array):
3159
+ raise ValueError("logical_and can take either scalar or Array"
3160
+ " as inputs")
3161
+
3162
+ out = out or ary_arg._new_like_me(dtype=np.int8)
3163
+
3164
+ assert out.shape == ary_arg.shape and out.dtype == np.int8
3165
+
3166
+ knl = elementwise.get_array_scalar_binop_kernel(
3167
+ queue.context,
3168
+ operator,
3169
+ out.dtype,
3170
+ ary_arg.dtype,
3171
+ np.dtype(type(scalar_arg))
3172
+ )
3173
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, ary_arg, scalar_arg)
3174
+ else:
3175
+ if not (isinstance(x1, Array) and isinstance(x2, Array)):
3176
+ raise ValueError("logical_or/logical_and can take either scalar"
3177
+ " or Arrays as inputs")
3178
+ if x1.shape != x2.shape:
3179
+ raise NotImplementedError("Broadcasting not supported")
3180
+
3181
+ queue = queue or x1.queue or x2.queue
3182
+ allocator = x1.allocator or x2.allocator
3183
+
3184
+ if out is None:
3185
+ out = empty(queue, allocator=allocator,
3186
+ shape=x1.shape, dtype=np.int8)
3187
+
3188
+ assert out.shape == x1.shape and out.dtype == np.int8
3189
+
3190
+ knl = elementwise.get_array_binop_kernel(
3191
+ queue.context,
3192
+ operator,
3193
+ out.dtype,
3194
+ x1.dtype, x2.dtype)
3195
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, x1, x2)
3196
+
3197
+ return out
3198
+
3199
+
3200
+ def logical_and(x1, x2, /, out=None, queue=None):
3201
+ """
3202
+ Returns the element-wise logical AND of *x1* and *x2*.
3203
+ """
3204
+ return _logical_op(x1, x2, out, "&&", queue=queue)
3205
+
3206
+
3207
+ def logical_or(x1, x2, /, out=None, queue=None):
3208
+ """
3209
+ Returns the element-wise logical OR of *x1* and *x2*.
3210
+ """
3211
+ return _logical_op(x1, x2, out, "||", queue=queue)
3212
+
3213
+
3214
+ def logical_not(x, /, out=None, queue=None):
3215
+ """
3216
+ Returns the element-wise logical NOT of *x*.
3217
+ """
3218
+ if np.isscalar(x):
3219
+ out = out or empty(queue, shape=(), dtype=np.int8)
3220
+ out[:] = np.logical_not(x)
3221
+ else:
3222
+ queue = queue or x.queue
3223
+ out = out or empty(queue, shape=x.shape, dtype=np.int8,
3224
+ allocator=x.allocator)
3225
+ knl = elementwise.get_logical_not_kernel(queue.context,
3226
+ x.dtype)
3227
+ elwise_kernel_runner(lambda *args, **kwargs: knl)(out, x)
3228
+
3229
+ return out
3230
+
3231
+ # }}}
3232
+
3233
+
3234
+ # {{{ reductions
3235
+
3236
+ def sum(a, dtype=None, queue=None, slice=None, initial=np._NoValue):
3237
+ """
3238
+ .. versionadded:: 2011.1
3239
+ """
3240
+ if initial is not np._NoValue and not isinstance(initial, SCALAR_CLASSES):
3241
+ raise ValueError("'initial' is not a scalar")
3242
+
3243
+ if dtype is not None:
3244
+ dtype = np.dtype(dtype)
3245
+
3246
+ from pyopencl.reduction import get_sum_kernel
3247
+ krnl = get_sum_kernel(a.context, dtype, a.dtype)
3248
+ result, event1 = krnl(a, queue=queue, slice=slice, wait_for=a.events,
3249
+ return_event=True)
3250
+ result.add_event(event1)
3251
+
3252
+ # NOTE: neutral element in `get_sum_kernel` is 0 by default
3253
+ if initial is not np._NoValue:
3254
+ result += a.dtype.type(initial)
3255
+
3256
+ return result
3257
+
3258
+
3259
+ def any(a, queue=None, wait_for=None):
3260
+ if len(a) == 0:
3261
+ return _BOOL_DTYPE.type(False)
3262
+
3263
+ return a.any(queue=queue, wait_for=wait_for)
3264
+
3265
+
3266
+ def all(a, queue=None, wait_for=None):
3267
+ if len(a) == 0:
3268
+ return _BOOL_DTYPE.type(True)
3269
+
3270
+ return a.all(queue=queue, wait_for=wait_for)
3271
+
3272
+
3273
+ def dot(a, b, dtype=None, queue=None, slice=None):
3274
+ """
3275
+ .. versionadded:: 2011.1
3276
+ """
3277
+ if dtype is not None:
3278
+ dtype = np.dtype(dtype)
3279
+
3280
+ from pyopencl.reduction import get_dot_kernel
3281
+ krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype)
3282
+
3283
+ result, event1 = krnl(a, b, queue=queue, slice=slice,
3284
+ wait_for=a.events + b.events, return_event=True)
3285
+ result.add_event(event1)
3286
+
3287
+ return result
3288
+
3289
+
3290
+ def vdot(a, b, dtype=None, queue=None, slice=None):
3291
+ """Like :func:`numpy.vdot`.
3292
+
3293
+ .. versionadded:: 2013.1
3294
+ """
3295
+ if dtype is not None:
3296
+ dtype = np.dtype(dtype)
3297
+
3298
+ from pyopencl.reduction import get_dot_kernel
3299
+ krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype,
3300
+ conjugate_first=True)
3301
+
3302
+ result, event1 = krnl(a, b, queue=queue, slice=slice,
3303
+ wait_for=a.events + b.events, return_event=True)
3304
+ result.add_event(event1)
3305
+
3306
+ return result
3307
+
3308
+
3309
+ def subset_dot(subset, a, b, dtype=None, queue=None, slice=None):
3310
+ """
3311
+ .. versionadded:: 2011.1
3312
+ """
3313
+ if dtype is not None:
3314
+ dtype = np.dtype(dtype)
3315
+
3316
+ from pyopencl.reduction import get_subset_dot_kernel
3317
+ krnl = get_subset_dot_kernel(
3318
+ a.context, dtype, subset.dtype, a.dtype, b.dtype)
3319
+
3320
+ result, event1 = krnl(subset, a, b, queue=queue, slice=slice,
3321
+ wait_for=subset.events + a.events + b.events, return_event=True)
3322
+ result.add_event(event1)
3323
+
3324
+ return result
3325
+
3326
+
3327
+ def _make_minmax_kernel(what):
3328
+ def f(a, queue=None, initial=np._NoValue):
3329
+ if isinstance(a, SCALAR_CLASSES):
3330
+ return np.array(a).dtype.type(a)
3331
+
3332
+ if len(a) == 0:
3333
+ if initial is np._NoValue:
3334
+ raise ValueError(
3335
+ f"zero-size array to reduction '{what}' "
3336
+ "which has no identity")
3337
+ else:
3338
+ return initial
3339
+
3340
+ if initial is not np._NoValue and not isinstance(initial, SCALAR_CLASSES):
3341
+ raise ValueError("'initial' is not a scalar")
3342
+
3343
+ from pyopencl.reduction import get_minmax_kernel
3344
+ krnl = get_minmax_kernel(a.context, what, a.dtype)
3345
+ result, event1 = krnl(a, queue=queue, wait_for=a.events,
3346
+ return_event=True)
3347
+ result.add_event(event1)
3348
+
3349
+ if initial is not np._NoValue:
3350
+ initial = a.dtype.type(initial)
3351
+ if what == "min":
3352
+ result = minimum(result, initial, queue=queue)
3353
+ elif what == "max":
3354
+ result = maximum(result, initial, queue=queue)
3355
+ else:
3356
+ raise ValueError(f"unknown minmax reduction type: '{what}'")
3357
+
3358
+ return result
3359
+
3360
+ return f
3361
+
3362
+
3363
+ min = _make_minmax_kernel("min")
3364
+ min.__name__ = "min"
3365
+ min.__doc__ = """
3366
+ .. versionadded:: 2011.1
3367
+ """
3368
+
3369
+ max = _make_minmax_kernel("max")
3370
+ max.__name__ = "max"
3371
+ max.__doc__ = """
3372
+ .. versionadded:: 2011.1
3373
+ """
3374
+
3375
+
3376
+ def _make_subset_minmax_kernel(what):
3377
+ def f(subset, a, queue=None, slice=None):
3378
+ from pyopencl.reduction import get_subset_minmax_kernel
3379
+ krnl = get_subset_minmax_kernel(a.context, what, a.dtype, subset.dtype)
3380
+ result, event1 = krnl(subset, a, queue=queue, slice=slice,
3381
+ wait_for=a.events + subset.events, return_event=True)
3382
+ result.add_event(event1)
3383
+ return result
3384
+ return f
3385
+
3386
+
3387
+ subset_min = _make_subset_minmax_kernel("min")
3388
+ subset_min.__doc__ = """.. versionadded:: 2011.1"""
3389
+ subset_max = _make_subset_minmax_kernel("max")
3390
+ subset_max.__doc__ = """.. versionadded:: 2011.1"""
3391
+
3392
+ # }}}
3393
+
3394
+
3395
+ # {{{ scans
3396
+
3397
+ def cumsum(a, output_dtype=None, queue=None,
3398
+ wait_for=None, return_event=False):
3399
+ # undocumented for now
3400
+
3401
+ """
3402
+ .. versionadded:: 2013.1
3403
+ """
3404
+
3405
+ if output_dtype is None:
3406
+ output_dtype = a.dtype
3407
+ else:
3408
+ output_dtype = np.dtype(output_dtype)
3409
+
3410
+ if wait_for is None:
3411
+ wait_for = []
3412
+
3413
+ result = a._new_like_me(output_dtype)
3414
+
3415
+ from pyopencl.scan import get_cumsum_kernel
3416
+ krnl = get_cumsum_kernel(a.context, a.dtype, output_dtype)
3417
+ evt = krnl(a, result, queue=queue, wait_for=wait_for + a.events)
3418
+ result.add_event(evt)
3419
+
3420
+ if return_event:
3421
+ return evt, result
3422
+ else:
3423
+ return result
3424
+
3425
+ # }}}
3426
+
3427
+ # vim: foldmethod=marker