pyopencl 2024.2__cp312-cp312-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (122) hide show
  1. pyopencl/__init__.py +2393 -0
  2. pyopencl/_cl.cpython-312-darwin.so +0 -0
  3. pyopencl/_cluda.py +54 -0
  4. pyopencl/_mymako.py +14 -0
  5. pyopencl/algorithm.py +1444 -0
  6. pyopencl/array.py +3427 -0
  7. pyopencl/bitonic_sort.py +238 -0
  8. pyopencl/bitonic_sort_templates.py +594 -0
  9. pyopencl/cache.py +534 -0
  10. pyopencl/capture_call.py +176 -0
  11. pyopencl/characterize/__init__.py +433 -0
  12. pyopencl/characterize/performance.py +237 -0
  13. pyopencl/cl/pyopencl-airy.cl +324 -0
  14. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  15. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  16. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  17. pyopencl/cl/pyopencl-complex.h +303 -0
  18. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  19. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  20. pyopencl/cl/pyopencl-random123/array.h +325 -0
  21. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  22. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  23. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  24. pyopencl/clmath.py +280 -0
  25. pyopencl/clrandom.py +408 -0
  26. pyopencl/cltypes.py +137 -0
  27. pyopencl/compyte/__init__.py +0 -0
  28. pyopencl/compyte/array.py +214 -0
  29. pyopencl/compyte/dtypes.py +290 -0
  30. pyopencl/compyte/ndarray/__init__.py +0 -0
  31. pyopencl/compyte/ndarray/gen_elemwise.py +1907 -0
  32. pyopencl/compyte/ndarray/gen_reduction.py +1511 -0
  33. pyopencl/compyte/ndarray/setup_opencl.py +101 -0
  34. pyopencl/compyte/ndarray/test_gpu_elemwise.py +411 -0
  35. pyopencl/compyte/ndarray/test_gpu_ndarray.py +487 -0
  36. pyopencl/elementwise.py +1164 -0
  37. pyopencl/invoker.py +418 -0
  38. pyopencl/ipython_ext.py +68 -0
  39. pyopencl/reduction.py +780 -0
  40. pyopencl/scan.py +1898 -0
  41. pyopencl/tools.py +1513 -0
  42. pyopencl/version.py +3 -0
  43. pyopencl-2024.2.data/data/CITATION.cff +74 -0
  44. pyopencl-2024.2.data/data/LICENSE +282 -0
  45. pyopencl-2024.2.data/data/Makefile.in +21 -0
  46. pyopencl-2024.2.data/data/README.rst +70 -0
  47. pyopencl-2024.2.data/data/README_SETUP.txt +34 -0
  48. pyopencl-2024.2.data/data/aksetup_helper.py +1013 -0
  49. pyopencl-2024.2.data/data/configure.py +6 -0
  50. pyopencl-2024.2.data/data/contrib/cldis.py +91 -0
  51. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/README +29 -0
  52. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/translate.py +1441 -0
  53. pyopencl-2024.2.data/data/contrib/pyopencl.vim +84 -0
  54. pyopencl-2024.2.data/data/doc/Makefile +23 -0
  55. pyopencl-2024.2.data/data/doc/algorithm.rst +214 -0
  56. pyopencl-2024.2.data/data/doc/array.rst +305 -0
  57. pyopencl-2024.2.data/data/doc/conf.py +26 -0
  58. pyopencl-2024.2.data/data/doc/howto.rst +105 -0
  59. pyopencl-2024.2.data/data/doc/index.rst +137 -0
  60. pyopencl-2024.2.data/data/doc/make_constants.py +561 -0
  61. pyopencl-2024.2.data/data/doc/misc.rst +885 -0
  62. pyopencl-2024.2.data/data/doc/runtime.rst +51 -0
  63. pyopencl-2024.2.data/data/doc/runtime_const.rst +30 -0
  64. pyopencl-2024.2.data/data/doc/runtime_gl.rst +78 -0
  65. pyopencl-2024.2.data/data/doc/runtime_memory.rst +527 -0
  66. pyopencl-2024.2.data/data/doc/runtime_platform.rst +184 -0
  67. pyopencl-2024.2.data/data/doc/runtime_program.rst +364 -0
  68. pyopencl-2024.2.data/data/doc/runtime_queue.rst +182 -0
  69. pyopencl-2024.2.data/data/doc/subst.rst +36 -0
  70. pyopencl-2024.2.data/data/doc/tools.rst +4 -0
  71. pyopencl-2024.2.data/data/doc/types.rst +42 -0
  72. pyopencl-2024.2.data/data/examples/black-hole-accretion.py +2227 -0
  73. pyopencl-2024.2.data/data/examples/demo-struct-reduce.py +75 -0
  74. pyopencl-2024.2.data/data/examples/demo.py +39 -0
  75. pyopencl-2024.2.data/data/examples/demo_array.py +32 -0
  76. pyopencl-2024.2.data/data/examples/demo_array_svm.py +37 -0
  77. pyopencl-2024.2.data/data/examples/demo_elementwise.py +34 -0
  78. pyopencl-2024.2.data/data/examples/demo_elementwise_complex.py +53 -0
  79. pyopencl-2024.2.data/data/examples/demo_mandelbrot.py +183 -0
  80. pyopencl-2024.2.data/data/examples/demo_meta_codepy.py +56 -0
  81. pyopencl-2024.2.data/data/examples/demo_meta_template.py +55 -0
  82. pyopencl-2024.2.data/data/examples/dump-performance.py +38 -0
  83. pyopencl-2024.2.data/data/examples/dump-properties.py +86 -0
  84. pyopencl-2024.2.data/data/examples/gl_interop_demo.py +84 -0
  85. pyopencl-2024.2.data/data/examples/gl_particle_animation.py +218 -0
  86. pyopencl-2024.2.data/data/examples/ipython-demo.ipynb +203 -0
  87. pyopencl-2024.2.data/data/examples/median-filter.py +99 -0
  88. pyopencl-2024.2.data/data/examples/n-body.py +1070 -0
  89. pyopencl-2024.2.data/data/examples/narray.py +37 -0
  90. pyopencl-2024.2.data/data/examples/noisyImage.jpg +0 -0
  91. pyopencl-2024.2.data/data/examples/pi-monte-carlo.py +1166 -0
  92. pyopencl-2024.2.data/data/examples/svm.py +82 -0
  93. pyopencl-2024.2.data/data/examples/transpose.py +229 -0
  94. pyopencl-2024.2.data/data/pytest.ini +3 -0
  95. pyopencl-2024.2.data/data/src/bitlog.cpp +51 -0
  96. pyopencl-2024.2.data/data/src/bitlog.hpp +83 -0
  97. pyopencl-2024.2.data/data/src/clinfo_ext.h +134 -0
  98. pyopencl-2024.2.data/data/src/mempool.hpp +444 -0
  99. pyopencl-2024.2.data/data/src/pyopencl_ext.h +77 -0
  100. pyopencl-2024.2.data/data/src/tools.hpp +90 -0
  101. pyopencl-2024.2.data/data/src/wrap_cl.cpp +61 -0
  102. pyopencl-2024.2.data/data/src/wrap_cl.hpp +5853 -0
  103. pyopencl-2024.2.data/data/src/wrap_cl_part_1.cpp +369 -0
  104. pyopencl-2024.2.data/data/src/wrap_cl_part_2.cpp +702 -0
  105. pyopencl-2024.2.data/data/src/wrap_constants.cpp +1274 -0
  106. pyopencl-2024.2.data/data/src/wrap_helpers.hpp +213 -0
  107. pyopencl-2024.2.data/data/src/wrap_mempool.cpp +731 -0
  108. pyopencl-2024.2.data/data/test/add-vectors-32.spv +0 -0
  109. pyopencl-2024.2.data/data/test/add-vectors-64.spv +0 -0
  110. pyopencl-2024.2.data/data/test/empty-header.h +1 -0
  111. pyopencl-2024.2.data/data/test/test_algorithm.py +1180 -0
  112. pyopencl-2024.2.data/data/test/test_array.py +2392 -0
  113. pyopencl-2024.2.data/data/test/test_arrays_in_structs.py +100 -0
  114. pyopencl-2024.2.data/data/test/test_clmath.py +529 -0
  115. pyopencl-2024.2.data/data/test/test_clrandom.py +75 -0
  116. pyopencl-2024.2.data/data/test/test_enqueue_copy.py +271 -0
  117. pyopencl-2024.2.data/data/test/test_wrapper.py +1554 -0
  118. pyopencl-2024.2.dist-info/LICENSE +282 -0
  119. pyopencl-2024.2.dist-info/METADATA +105 -0
  120. pyopencl-2024.2.dist-info/RECORD +122 -0
  121. pyopencl-2024.2.dist-info/WHEEL +5 -0
  122. pyopencl-2024.2.dist-info/top_level.txt +1 -0
pyopencl/__init__.py ADDED
@@ -0,0 +1,2393 @@
1
+ __copyright__ = "Copyright (C) 2009-15 Andreas Kloeckner"
2
+
3
+ __license__ = """
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in
12
+ all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
+ THE SOFTWARE.
21
+ """
22
+
23
+ import logging
24
+ from sys import intern
25
+ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
26
+ from warnings import warn
27
+
28
+ # must import, otherwise dtype registry will not be fully populated
29
+ import pyopencl.cltypes # noqa: F401
30
+ from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa: F401
31
+
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # This supports ocl-icd find shipped OpenCL ICDs, cf.
36
+ # https://github.com/isuruf/ocl-icd/commit/3862386b51930f95d9ad1089f7157a98165d5a6b
37
+ # via
38
+ # https://github.com/inducer/pyopencl/blob/0b3d0ef92497e6838eea300b974f385f94cb5100/scripts/build-wheels.sh#L43-L44
39
+ import os
40
+
41
+
42
+ os.environ["PYOPENCL_HOME"] = os.path.dirname(os.path.abspath(__file__))
43
+
44
+ try:
45
+ import pyopencl._cl as _cl
46
+ except ImportError:
47
+ from os.path import dirname, join, realpath
48
+ if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)):
49
+ warn(
50
+ "It looks like you are importing PyOpenCL from "
51
+ "its source directory. This likely won't work.",
52
+ stacklevel=2)
53
+ raise
54
+
55
+ import numpy as np # noqa: I003
56
+
57
+ import sys
58
+
59
+ _PYPY = "__pypy__" in sys.builtin_module_names
60
+
61
+ from pyopencl._cl import ( # noqa: F401
62
+ get_cl_header_version,
63
+ program_kind,
64
+ status_code,
65
+ platform_info,
66
+ device_type,
67
+ device_info,
68
+ device_topology_type_amd,
69
+ device_fp_config,
70
+ device_mem_cache_type,
71
+ device_local_mem_type,
72
+ device_exec_capabilities,
73
+ device_svm_capabilities,
74
+
75
+ command_queue_properties,
76
+ context_info,
77
+ gl_context_info,
78
+ context_properties,
79
+ command_queue_info,
80
+ queue_properties,
81
+
82
+ mem_flags,
83
+ svm_mem_flags,
84
+
85
+ channel_order,
86
+ channel_type,
87
+ mem_object_type,
88
+ mem_info,
89
+ image_info,
90
+ pipe_info,
91
+ pipe_properties,
92
+ addressing_mode,
93
+ filter_mode,
94
+ sampler_info,
95
+ sampler_properties,
96
+ map_flags,
97
+ program_info,
98
+ program_build_info,
99
+ program_binary_type,
100
+
101
+ kernel_info,
102
+ kernel_arg_info,
103
+ kernel_arg_address_qualifier,
104
+ kernel_arg_access_qualifier,
105
+ kernel_arg_type_qualifier,
106
+ kernel_work_group_info,
107
+ kernel_sub_group_info,
108
+
109
+ event_info,
110
+ command_type,
111
+ command_execution_status,
112
+ profiling_info,
113
+ mem_migration_flags,
114
+ device_partition_property,
115
+ device_affinity_domain,
116
+ device_atomic_capabilities,
117
+ device_device_enqueue_capabilities,
118
+
119
+ version_bits,
120
+ khronos_vendor_id,
121
+
122
+ Error, MemoryError, LogicError, RuntimeError,
123
+
124
+ Platform,
125
+ get_platforms,
126
+
127
+ Device,
128
+ Context,
129
+ CommandQueue,
130
+ LocalMemory,
131
+ MemoryObjectHolder,
132
+ MemoryObject,
133
+ MemoryMap,
134
+ Buffer,
135
+
136
+ _Program,
137
+ Kernel,
138
+
139
+ Event,
140
+ wait_for_events,
141
+ NannyEvent,
142
+
143
+ enqueue_nd_range_kernel,
144
+
145
+ _enqueue_marker,
146
+
147
+ _enqueue_read_buffer,
148
+ _enqueue_write_buffer,
149
+ _enqueue_copy_buffer,
150
+ _enqueue_read_buffer_rect,
151
+ _enqueue_write_buffer_rect,
152
+ _enqueue_copy_buffer_rect,
153
+
154
+ _enqueue_read_image,
155
+ _enqueue_copy_image,
156
+ _enqueue_write_image,
157
+ _enqueue_copy_image_to_buffer,
158
+ _enqueue_copy_buffer_to_image,
159
+
160
+ have_gl,
161
+
162
+ ImageFormat,
163
+ get_supported_image_formats,
164
+
165
+ Image,
166
+ Sampler,
167
+
168
+ # This class is available unconditionally, even though CL only
169
+ # has it on CL2.0 and newer.
170
+ Pipe,
171
+ )
172
+
173
+
174
+ try:
175
+ from pyopencl._cl import DeviceTopologyAmd # noqa: F401
176
+ from pyopencl._cl import enqueue_copy_buffer_p2p_amd # noqa: F401
177
+ except ImportError:
178
+ pass
179
+
180
+ if not _PYPY:
181
+ # FIXME: Add back to default set when pypy support catches up
182
+ from pyopencl._cl import enqueue_map_buffer # noqa: F401
183
+ from pyopencl._cl import enqueue_map_image # noqa: F401
184
+
185
+ if get_cl_header_version() >= (1, 1):
186
+ from pyopencl._cl import UserEvent # noqa: F401
187
+ if get_cl_header_version() >= (1, 2):
188
+ from pyopencl._cl import ImageDescriptor # noqa: F401
189
+ from pyopencl._cl import ( # noqa: F401
190
+ _enqueue_barrier_with_wait_list, _enqueue_fill_buffer,
191
+ _enqueue_marker_with_wait_list, enqueue_fill_image,
192
+ enqueue_migrate_mem_objects, unload_platform_compiler)
193
+
194
+ if get_cl_header_version() >= (2, 0):
195
+ from pyopencl._cl import SVM, SVMAllocation, SVMPointer # noqa: F401
196
+
197
+ if _cl.have_gl():
198
+ from pyopencl._cl import ( # noqa: F401
199
+ GLBuffer, GLRenderBuffer, GLTexture, gl_object_type, gl_texture_info)
200
+
201
+ try:
202
+ from pyopencl._cl import get_apple_cgl_share_group # noqa: F401
203
+ except ImportError:
204
+ pass
205
+
206
+ try:
207
+ from pyopencl._cl import enqueue_acquire_gl_objects # noqa: F401
208
+ from pyopencl._cl import enqueue_release_gl_objects # noqa: F401
209
+ except ImportError:
210
+ pass
211
+
212
+ import inspect as _inspect
213
+
214
+
215
+ CONSTANT_CLASSES = tuple(
216
+ getattr(_cl, name) for name in dir(_cl)
217
+ if _inspect.isclass(getattr(_cl, name))
218
+ and name[0].islower() and name not in ["zip", "map", "range"])
219
+
220
+ BITFIELD_CONSTANT_CLASSES = (
221
+ _cl.device_type,
222
+ _cl.device_fp_config,
223
+ _cl.device_exec_capabilities,
224
+ _cl.command_queue_properties,
225
+ _cl.mem_flags,
226
+ _cl.map_flags,
227
+ _cl.kernel_arg_type_qualifier,
228
+ _cl.device_affinity_domain,
229
+ _cl.mem_migration_flags,
230
+ _cl.device_svm_capabilities,
231
+ _cl.queue_properties,
232
+ _cl.svm_mem_flags,
233
+ _cl.device_atomic_capabilities,
234
+ _cl.device_device_enqueue_capabilities,
235
+ _cl.version_bits,
236
+ )
237
+
238
+
239
+ # {{{ diagnostics
240
+
241
+ class CompilerWarning(UserWarning):
242
+ pass
243
+
244
+
245
+ class CommandQueueUsedAfterExit(UserWarning):
246
+ pass
247
+
248
+
249
+ def compiler_output(text: str) -> None:
250
+ from pytools import strtobool
251
+ if strtobool(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "False")):
252
+ warn(text, CompilerWarning, stacklevel=3)
253
+ else:
254
+ warn("Non-empty compiler output encountered. Set the "
255
+ "environment variable PYOPENCL_COMPILER_OUTPUT=1 "
256
+ "to see more.", CompilerWarning, stacklevel=3)
257
+
258
+ # }}}
259
+
260
+
261
+ # {{{ find pyopencl shipped source code
262
+
263
+ def _find_pyopencl_include_path() -> str:
264
+ from os.path import abspath, dirname, exists, join
265
+
266
+ # Try to find the include path in the same directory as this file
267
+ include_path = join(abspath(dirname(__file__)), "cl")
268
+ if not exists(include_path):
269
+ try:
270
+ # NOTE: only available in Python >=3.9
271
+ from importlib.resources import files
272
+ except ImportError:
273
+ from importlib_resources import files
274
+
275
+ include_path = str(files("pyopencl") / "cl")
276
+ if not exists(include_path):
277
+ raise OSError("Unable to find PyOpenCL include path")
278
+
279
+ # Quote the path if it contains a space and is not quoted already.
280
+ # See https://github.com/inducer/pyopencl/issues/250 for discussion.
281
+ if " " in include_path and not include_path.startswith('"'):
282
+ return '"' + include_path + '"'
283
+ else:
284
+ return include_path
285
+
286
+ # }}}
287
+
288
+
289
+ # {{{ build option munging
290
+
291
+ def _split_options_if_necessary(options):
292
+ if isinstance(options, str):
293
+ import shlex
294
+
295
+ options = shlex.split(options)
296
+
297
+ return options
298
+
299
+
300
+ def _find_include_path(options):
301
+ def unquote(path):
302
+ if path.startswith('"') and path.endswith('"'):
303
+ return path[1:-1]
304
+ else:
305
+ return path
306
+
307
+ include_path = ["."]
308
+
309
+ option_idx = 0
310
+ while option_idx < len(options):
311
+ option = options[option_idx].strip()
312
+ if option.startswith("-I") or option.startswith("/I"):
313
+ if len(option) == 2:
314
+ if option_idx+1 < len(options):
315
+ include_path.append(unquote(options[option_idx+1]))
316
+ option_idx += 2
317
+ else:
318
+ include_path.append(unquote(option[2:].lstrip()))
319
+ option_idx += 1
320
+ else:
321
+ option_idx += 1
322
+
323
+ # }}}
324
+
325
+ return include_path
326
+
327
+
328
+ def _options_to_bytestring(options):
329
+ def encode_if_necessary(s):
330
+ if isinstance(s, str):
331
+ return s.encode("utf-8")
332
+ else:
333
+ return s
334
+
335
+ return b" ".join(encode_if_necessary(s) for s in options)
336
+
337
+
338
+ # }}}
339
+
340
+
341
+ # {{{ Program (wrapper around _Program, adds caching support)
342
+
343
+ from pytools import strtobool
344
+
345
+
346
+ _PYOPENCL_NO_CACHE = strtobool(os.environ.get("PYOPENCL_NO_CACHE", "false"))
347
+
348
+ _DEFAULT_BUILD_OPTIONS: List[str] = []
349
+ _DEFAULT_INCLUDE_OPTIONS: List[str] = ["-I", _find_pyopencl_include_path()]
350
+
351
+ # map of platform.name to build options list
352
+ _PLAT_BUILD_OPTIONS: Dict[str, List[str]] = {
353
+ "Oclgrind": ["-D", "PYOPENCL_USING_OCLGRIND"],
354
+ }
355
+
356
+
357
+ def enable_debugging(platform_or_context):
358
+ """Enables debugging for all code subsequently compiled by
359
+ PyOpenCL on the passed *platform*. Alternatively, a context
360
+ may be passed.
361
+ """
362
+
363
+ if isinstance(platform_or_context, Context):
364
+ platform = platform_or_context.devices[0].platform
365
+ else:
366
+ platform = platform_or_context
367
+
368
+ if "AMD Accelerated" in platform.name:
369
+ _PLAT_BUILD_OPTIONS.setdefault(platform.name, []).extend(
370
+ ["-g", "-O0"])
371
+ os.environ["CPU_MAX_COMPUTE_UNITS"] = "1"
372
+ else:
373
+ warn(f"Do not know how to enable debugging on '{platform.name}'",
374
+ stacklevel=2)
375
+
376
+
377
+ class Program:
378
+ def __init__(self, arg1, arg2=None, arg3=None):
379
+ if arg2 is None:
380
+ # 1-argument form: program
381
+ self._prg = arg1
382
+ self._context = self._prg.get_info(program_info.CONTEXT)
383
+
384
+ elif arg3 is None:
385
+ # 2-argument form: context, source
386
+ context, source = arg1, arg2
387
+
388
+ from pyopencl.tools import is_spirv
389
+ if is_spirv(source):
390
+ # FIXME no caching in SPIR-V case
391
+ self._context = context
392
+ self._prg = _cl._create_program_with_il(context, source)
393
+ return
394
+
395
+ self._context = context
396
+ self._source = source
397
+ self._prg = None
398
+
399
+ else:
400
+ context, device, binaries = arg1, arg2, arg3
401
+ self._context = context
402
+ self._prg = _cl._Program(context, device, binaries)
403
+
404
+ self._build_duration_info = None
405
+
406
+ def _get_prg(self):
407
+ if self._prg is not None:
408
+ return self._prg
409
+ else:
410
+ # "no program" can only happen in from-source case.
411
+ warn("Pre-build attribute access defeats compiler caching.",
412
+ stacklevel=3)
413
+
414
+ self._prg = _cl._Program(self._context, self._source)
415
+ return self._prg
416
+
417
+ def get_info(self, arg):
418
+ return self._get_prg().get_info(arg)
419
+
420
+ def get_build_info(self, *args, **kwargs):
421
+ return self._get_prg().get_build_info(*args, **kwargs)
422
+
423
+ def all_kernels(self):
424
+ result = self._get_prg().all_kernels()
425
+ for knl in result:
426
+ knl._setup(self)
427
+ return result
428
+
429
+ @property
430
+ def int_ptr(self):
431
+ return self._get_prg().int_ptr
432
+ int_ptr.__doc__ = _cl._Program.int_ptr.__doc__
433
+
434
+ @staticmethod
435
+ def from_int_ptr(int_ptr_value, retain=True):
436
+ return Program(_cl._Program.from_int_ptr(int_ptr_value, retain))
437
+ from_int_ptr.__doc__ = _cl._Program.from_int_ptr.__doc__
438
+
439
+ def __getattr__(self, attr):
440
+ try:
441
+ knl = Kernel(self, attr)
442
+ # Nvidia does not raise errors even for invalid names,
443
+ # but this will give an error if the kernel is invalid.
444
+ knl.num_args
445
+ knl._source = getattr(self, "_source", None)
446
+
447
+ if self._build_duration_info is not None:
448
+ build_descr, was_cached, duration = self._build_duration_info
449
+ if duration > 0.2:
450
+ logger.info("build program: kernel '%s' was part of a "
451
+ "lengthy %s (%.2f s)" % (attr, build_descr, duration))
452
+
453
+ # don't whine about build times more than once.
454
+ self._build_duration_info = None
455
+
456
+ return knl
457
+ except LogicError:
458
+ raise AttributeError("'%s' was not found as a program "
459
+ "info attribute or as a kernel name" % attr)
460
+
461
+ # {{{ build
462
+
463
+ @classmethod
464
+ def _process_build_options(cls, context, options, _add_include_path=False):
465
+ if options is None:
466
+ options = []
467
+ if isinstance(options, tuple):
468
+ options = list(options)
469
+
470
+ options = _split_options_if_necessary(options)
471
+
472
+ options = (options
473
+ + _DEFAULT_BUILD_OPTIONS
474
+ + _DEFAULT_INCLUDE_OPTIONS
475
+ + _PLAT_BUILD_OPTIONS.get(
476
+ context.devices[0].platform.name, []))
477
+
478
+ forced_options = os.environ.get("PYOPENCL_BUILD_OPTIONS")
479
+ if forced_options:
480
+ options = options + forced_options.split()
481
+
482
+ return (
483
+ _options_to_bytestring(options),
484
+ _find_include_path(options))
485
+
486
+ def build(self, options=None, devices=None, cache_dir=None):
487
+ options_bytes, include_path = self._process_build_options(
488
+ self._context, options)
489
+
490
+ if cache_dir is None:
491
+ cache_dir = getattr(self._context, "cache_dir", None)
492
+
493
+ build_descr = None
494
+ if _PYOPENCL_NO_CACHE and self._prg is None:
495
+ build_descr = "uncached source build (cache disabled by user)"
496
+ self._prg = _cl._Program(self._context, self._source)
497
+
498
+ from time import time
499
+ start_time = time()
500
+ was_cached = False
501
+
502
+ if self._prg is not None:
503
+ # uncached
504
+
505
+ if build_descr is None:
506
+ build_descr = "uncached source build"
507
+
508
+ self._build_and_catch_errors(
509
+ lambda: self._prg.build(options_bytes, devices),
510
+ options_bytes=options_bytes)
511
+
512
+ else:
513
+ # cached
514
+
515
+ from pyopencl.cache import create_built_program_from_source_cached
516
+ self._prg, was_cached = self._build_and_catch_errors(
517
+ lambda: create_built_program_from_source_cached(
518
+ self._context, self._source, options_bytes, devices,
519
+ cache_dir=cache_dir, include_path=include_path),
520
+ options_bytes=options_bytes, source=self._source)
521
+
522
+ if was_cached:
523
+ build_descr = "cache retrieval"
524
+ else:
525
+ build_descr = "source build resulting from a binary cache miss"
526
+
527
+ del self._context
528
+
529
+ end_time = time()
530
+
531
+ self._build_duration_info = (build_descr, was_cached, end_time-start_time)
532
+
533
+ return self
534
+
535
+ def _build_and_catch_errors(self, build_func, options_bytes, source=None):
536
+ try:
537
+ return build_func()
538
+ except RuntimeError as e:
539
+ msg = str(e)
540
+ if options_bytes:
541
+ msg = msg + "\n(options: %s)" % options_bytes.decode("utf-8")
542
+
543
+ if source is not None:
544
+ from tempfile import NamedTemporaryFile
545
+ srcfile = NamedTemporaryFile(mode="wt", delete=False, suffix=".cl")
546
+ try:
547
+ srcfile.write(source)
548
+ finally:
549
+ srcfile.close()
550
+
551
+ msg = msg + "\n(source saved as %s)" % srcfile.name
552
+
553
+ code = e.code
554
+ routine = e.routine
555
+
556
+ err = RuntimeError(
557
+ _cl._ErrorRecord(
558
+ msg=msg,
559
+ code=code,
560
+ routine=routine))
561
+
562
+ # Python 3.2 outputs the whole list of currently active exceptions
563
+ # This serves to remove one (redundant) level from that nesting.
564
+ raise err
565
+
566
+ # }}}
567
+
568
+ def compile(self, options=None, devices=None, headers=None):
569
+ if headers is None:
570
+ headers = []
571
+
572
+ options_bytes, _ = self._process_build_options(self._context, options)
573
+
574
+ self._get_prg().compile(options_bytes, devices,
575
+ [(name, prg._get_prg()) for name, prg in headers])
576
+ return self
577
+
578
+ def __eq__(self, other):
579
+ return self._get_prg() == other._get_prg()
580
+
581
+ def __ne__(self, other):
582
+ return self._get_prg() == other._get_prg()
583
+
584
+ def __hash__(self):
585
+ return hash(self._get_prg())
586
+
587
+
588
+ def create_program_with_built_in_kernels(context, devices, kernel_names):
589
+ if not isinstance(kernel_names, str):
590
+ kernel_names = ":".join(kernel_names)
591
+
592
+ return Program(_Program.create_with_built_in_kernels(
593
+ context, devices, kernel_names))
594
+
595
+
596
+ def link_program(context, programs, options=None, devices=None):
597
+ if options is None:
598
+ options = []
599
+
600
+ options_bytes = _options_to_bytestring(_split_options_if_necessary(options))
601
+ programs = [prg._get_prg() for prg in programs]
602
+ raw_prg = _Program.link(context, programs, options_bytes, devices)
603
+ return Program(raw_prg)
604
+
605
+ # }}}
606
+
607
+
608
+ # {{{ monkeypatch C++ wrappers to add functionality
609
+
610
+ def _add_functionality():
611
+ def generic_get_cl_version(self):
612
+ import re
613
+ version_string = self.version
614
+ match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string)
615
+ if match is None:
616
+ raise RuntimeError("%s %s returned non-conformant "
617
+ "platform version string '%s'" %
618
+ (type(self).__name__, self, version_string))
619
+
620
+ return int(match.group(1)), int(match.group(2))
621
+
622
+ # {{{ Platform
623
+
624
+ def platform_repr(self):
625
+ return f"<pyopencl.Platform '{self.name}' at 0x{self.int_ptr:x}>"
626
+
627
+ Platform.__repr__ = platform_repr
628
+ Platform._get_cl_version = generic_get_cl_version
629
+
630
+ # }}}
631
+
632
+ # {{{ Device
633
+
634
+ def device_repr(self):
635
+ return "<pyopencl.Device '{}' on '{}' at 0x{:x}>".format(
636
+ self.name.strip(), self.platform.name.strip(), self.int_ptr)
637
+
638
+ def device_hashable_model_and_version_identifier(self):
639
+ return ("v1", self.vendor, self.vendor_id, self.name, self.version)
640
+
641
+ def device_persistent_unique_id(self):
642
+ warn("Device.persistent_unique_id is deprecated. "
643
+ "Use Device.hashable_model_and_version_identifier instead.",
644
+ DeprecationWarning, stacklevel=2)
645
+ return device_hashable_model_and_version_identifier(self)
646
+
647
+ Device.__repr__ = device_repr
648
+
649
+ # undocumented for now:
650
+ Device._get_cl_version = generic_get_cl_version
651
+ Device.hashable_model_and_version_identifier = property(
652
+ device_hashable_model_and_version_identifier)
653
+ Device.persistent_unique_id = property(device_persistent_unique_id)
654
+
655
+ # }}}
656
+
657
+ # {{{ Context
658
+
659
+ context_old_init = Context.__init__
660
+
661
+ def context_init(self, devices, properties, dev_type, cache_dir=None):
662
+ if cache_dir is not None:
663
+ warn("The 'cache_dir' argument to the Context constructor "
664
+ "is deprecated and no longer has an effect. "
665
+ "It was removed because it only applied to the wrapper "
666
+ "object and not the context itself, leading to inconsistencies.",
667
+ DeprecationWarning, stacklevel=2)
668
+
669
+ context_old_init(self, devices, properties, dev_type)
670
+
671
+ def context_repr(self):
672
+ return "<pyopencl.Context at 0x{:x} on {}>".format(self.int_ptr,
673
+ ", ".join(repr(dev) for dev in self.devices))
674
+
675
+ def context_get_cl_version(self):
676
+ return self.devices[0].platform._get_cl_version()
677
+
678
+ Context.__repr__ = context_repr
679
+ from pytools import memoize_method
680
+ Context._get_cl_version = memoize_method(context_get_cl_version)
681
+
682
+ # }}}
683
+
684
+ # {{{ CommandQueue
685
+
686
+ def command_queue_enter(self):
687
+ return self
688
+
689
+ def command_queue_exit(self, exc_type, exc_val, exc_tb):
690
+ self.finish()
691
+ self._finalize()
692
+
693
+ def command_queue_get_cl_version(self):
694
+ return self.device._get_cl_version()
695
+
696
+ CommandQueue.__enter__ = command_queue_enter
697
+ CommandQueue.__exit__ = command_queue_exit
698
+ CommandQueue._get_cl_version = memoize_method(command_queue_get_cl_version)
699
+
700
+ # }}}
701
+
702
+ # {{{ _Program (the internal, non-caching version)
703
+
704
+ def program_get_build_logs(self):
705
+ build_logs = []
706
+ for dev in self.get_info(_cl.program_info.DEVICES):
707
+ try:
708
+ log = self.get_build_info(dev, program_build_info.LOG)
709
+ except Exception:
710
+ log = "<error retrieving log>"
711
+
712
+ build_logs.append((dev, log))
713
+
714
+ return build_logs
715
+
716
+ def program_build(self, options_bytes, devices=None):
717
+ err = None
718
+ try:
719
+ self._build(options=options_bytes, devices=devices)
720
+ except Error as e:
721
+ msg = str(e) + "\n\n" + (75*"="+"\n").join(
722
+ f"Build on {dev}:\n\n{log}"
723
+ for dev, log in self._get_build_logs())
724
+ code = e.code
725
+ routine = e.routine
726
+
727
+ err = _cl.RuntimeError(
728
+ _cl._ErrorRecord(
729
+ msg=msg,
730
+ code=code,
731
+ routine=routine))
732
+
733
+ if err is not None:
734
+ # Python 3.2 outputs the whole list of currently active exceptions
735
+ # This serves to remove one (redundant) level from that nesting.
736
+ raise err
737
+
738
+ message = (75*"="+"\n").join(
739
+ f"Build on {dev} succeeded, but said:\n\n{log}"
740
+ for dev, log in self._get_build_logs()
741
+ if log is not None and log.strip())
742
+
743
+ if message:
744
+ if self.kind() == program_kind.SOURCE:
745
+ build_type = "From-source build"
746
+ elif self.kind() == program_kind.BINARY:
747
+ build_type = "From-binary build"
748
+ elif self.kind() == program_kind.IL:
749
+ build_type = "From-IL build"
750
+ else:
751
+ build_type = "Build"
752
+
753
+ compiler_output("%s succeeded, but resulted in non-empty logs:\n%s"
754
+ % (build_type, message))
755
+
756
+ return self
757
+
758
+ _cl._Program._get_build_logs = program_get_build_logs
759
+ _cl._Program.build = program_build
760
+
761
+ # }}}
762
+
763
+ # {{{ Event
764
+ class ProfilingInfoGetter:
765
+ def __init__(self, event):
766
+ self.event = event
767
+
768
+ def __getattr__(self, name):
769
+ info_cls = _cl.profiling_info
770
+
771
+ try:
772
+ inf_attr = getattr(info_cls, name.upper())
773
+ except AttributeError:
774
+ raise AttributeError("%s has no attribute '%s'"
775
+ % (type(self), name))
776
+ else:
777
+ return self.event.get_profiling_info(inf_attr)
778
+
779
+ _cl.Event.profile = property(ProfilingInfoGetter)
780
+
781
+ # }}}
782
+
783
+ # {{{ Kernel
784
+
785
+ kernel_old_init = Kernel.__init__
786
+ kernel_old_get_info = Kernel.get_info
787
+ kernel_old_get_work_group_info = Kernel.get_work_group_info
788
+
789
+ def kernel_init(self, prg, name):
790
+ if not isinstance(prg, _cl._Program):
791
+ prg = prg._get_prg()
792
+
793
+ kernel_old_init(self, prg, name)
794
+
795
+ self._setup(prg)
796
+
797
+ def kernel__setup(self, prg):
798
+ self._source = getattr(prg, "_source", None)
799
+
800
+ from pyopencl.invoker import generate_enqueue_and_set_args
801
+ self._enqueue, self._set_args = generate_enqueue_and_set_args(
802
+ self.function_name, self.num_args, self.num_args,
803
+ None,
804
+ warn_about_arg_count_bug=None,
805
+ work_around_arg_count_bug=None, devs=self.context.devices)
806
+
807
+ self._wg_info_cache = {}
808
+ return self
809
+
810
+ def kernel_set_arg_types(self, arg_types):
811
+ arg_types = tuple(arg_types)
812
+
813
+ # {{{ arg counting bug handling
814
+
815
+ # For example:
816
+ # https://github.com/pocl/pocl/issues/197
817
+ # (but Apple CPU has a similar bug)
818
+
819
+ work_around_arg_count_bug = False
820
+ warn_about_arg_count_bug = False
821
+
822
+ from pyopencl.characterize import has_struct_arg_count_bug
823
+
824
+ count_bug_per_dev = [
825
+ has_struct_arg_count_bug(dev, self.context)
826
+ for dev in self.context.devices]
827
+
828
+ from pytools import single_valued
829
+ if any(count_bug_per_dev):
830
+ if all(count_bug_per_dev):
831
+ work_around_arg_count_bug = single_valued(count_bug_per_dev)
832
+ else:
833
+ warn_about_arg_count_bug = True
834
+
835
+ # }}}
836
+
837
+ from pyopencl.invoker import generate_enqueue_and_set_args
838
+ self._enqueue, self.set_args = \
839
+ generate_enqueue_and_set_args(
840
+ self.function_name,
841
+ len(arg_types), self.num_args,
842
+ arg_types,
843
+ warn_about_arg_count_bug=warn_about_arg_count_bug,
844
+ work_around_arg_count_bug=work_around_arg_count_bug,
845
+ devs=self.context.devices)
846
+
847
+ def kernel_get_work_group_info(self, param, device):
848
+ cache_key = (param, device.int_ptr)
849
+ try:
850
+ return self._wg_info_cache[cache_key]
851
+ except KeyError:
852
+ pass
853
+
854
+ result = kernel_old_get_work_group_info(self, param, device)
855
+ self._wg_info_cache[cache_key] = result
856
+ return result
857
+
858
+ def kernel_set_args(self, *args, **kwargs):
859
+ # Need to duplicate the 'self' argument for dynamically generated method
860
+ return self._set_args(self, *args, **kwargs)
861
+
862
+ def kernel_call(self, queue, global_size, local_size, *args, **kwargs):
863
+ # __call__ can't be overridden directly, so we need this
864
+ # trampoline hack.
865
+
866
+ # Note: This is only used for the generic __call__, before
867
+ # kernel_set_scalar_arg_dtypes is called.
868
+ return self._enqueue(self, queue, global_size, local_size, *args, **kwargs)
869
+
870
+ def kernel_capture_call(self, output_file, queue, global_size, local_size,
871
+ *args, **kwargs):
872
+ from pyopencl.capture_call import capture_kernel_call
873
+ capture_kernel_call(self, output_file, queue, global_size, local_size,
874
+ *args, **kwargs)
875
+
876
+ def kernel_get_info(self, param_name):
877
+ val = kernel_old_get_info(self, param_name)
878
+
879
+ if isinstance(val, _Program):
880
+ return Program(val)
881
+ else:
882
+ return val
883
+
884
+ Kernel.__init__ = kernel_init
885
+ Kernel._setup = kernel__setup
886
+ Kernel.get_work_group_info = kernel_get_work_group_info
887
+
888
+ # FIXME: Possibly deprecate this version
889
+ Kernel.set_scalar_arg_dtypes = kernel_set_arg_types
890
+ Kernel.set_arg_types = kernel_set_arg_types
891
+
892
+ Kernel.set_args = kernel_set_args
893
+ Kernel.__call__ = kernel_call
894
+ Kernel.capture_call = kernel_capture_call
895
+ Kernel.get_info = kernel_get_info
896
+
897
+ # }}}
898
+
899
+ # {{{ ImageFormat
900
+
901
+ def image_format_repr(self):
902
+ return "ImageFormat({}, {})".format(
903
+ channel_order.to_string(self.channel_order,
904
+ "<unknown channel order 0x%x>"),
905
+ channel_type.to_string(self.channel_data_type,
906
+ "<unknown channel data type 0x%x>"))
907
+
908
+ def image_format_eq(self, other):
909
+ return (self.channel_order == other.channel_order
910
+ and self.channel_data_type == other.channel_data_type)
911
+
912
+ def image_format_ne(self, other):
913
+ return not image_format_eq(self, other)
914
+
915
+ def image_format_hash(self):
916
+ return hash((type(self), self.channel_order, self.channel_data_type))
917
+
918
+ ImageFormat.__repr__ = image_format_repr
919
+ ImageFormat.__eq__ = image_format_eq
920
+ ImageFormat.__ne__ = image_format_ne
921
+ ImageFormat.__hash__ = image_format_hash
922
+
923
+ # }}}
924
+
925
+ # {{{ Image
926
+
927
+ image_old_init = Image.__init__
928
+
929
+ def image_init(self, context, flags, format, shape=None, pitches=None,
930
+ hostbuf=None, is_array=False, buffer=None):
931
+
932
+ if shape is None and hostbuf is None:
933
+ raise Error("'shape' must be passed if 'hostbuf' is not given")
934
+
935
+ if shape is None and hostbuf is not None:
936
+ shape = hostbuf.shape
937
+
938
+ if hostbuf is not None and not \
939
+ (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)):
940
+ warn("'hostbuf' was passed, but no memory flags to make use of it.",
941
+ stacklevel=2)
942
+
943
+ if hostbuf is None and pitches is not None:
944
+ raise Error("'pitches' may only be given if 'hostbuf' is given")
945
+
946
+ if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
947
+ if buffer is not None and is_array:
948
+ raise ValueError(
949
+ "'buffer' and 'is_array' are mutually exclusive")
950
+
951
+ if len(shape) == 3:
952
+ if buffer is not None:
953
+ raise TypeError(
954
+ "'buffer' argument is not supported for 3D arrays")
955
+ elif is_array:
956
+ image_type = mem_object_type.IMAGE2D_ARRAY
957
+ else:
958
+ image_type = mem_object_type.IMAGE3D
959
+
960
+ elif len(shape) == 2:
961
+ if buffer is not None:
962
+ raise TypeError(
963
+ "'buffer' argument is not supported for 2D arrays")
964
+ elif is_array:
965
+ image_type = mem_object_type.IMAGE1D_ARRAY
966
+ else:
967
+ image_type = mem_object_type.IMAGE2D
968
+
969
+ elif len(shape) == 1:
970
+ if buffer is not None:
971
+ image_type = mem_object_type.IMAGE1D_BUFFER
972
+ elif is_array:
973
+ raise TypeError("array of zero-dimensional images not supported")
974
+ else:
975
+ image_type = mem_object_type.IMAGE1D
976
+
977
+ else:
978
+ raise ValueError("images cannot have more than three dimensions")
979
+
980
+ desc = ImageDescriptor()
981
+
982
+ desc.image_type = image_type
983
+ desc.shape = shape # also sets desc.array_size
984
+
985
+ if pitches is None:
986
+ desc.pitches = (0, 0)
987
+ else:
988
+ desc.pitches = pitches
989
+
990
+ desc.num_mip_levels = 0 # per CL 1.2 spec
991
+ desc.num_samples = 0 # per CL 1.2 spec
992
+ desc.buffer = buffer
993
+
994
+ image_old_init(self, context, flags, format, desc, hostbuf)
995
+ else:
996
+ # legacy init for CL 1.1 and older
997
+ if is_array:
998
+ raise TypeError("'is_array=True' is not supported for CL < 1.2")
999
+ # if num_mip_levels is not None:
1000
+ # raise TypeError(
1001
+ # "'num_mip_levels' argument is not supported for CL < 1.2")
1002
+ # if num_samples is not None:
1003
+ # raise TypeError(
1004
+ # "'num_samples' argument is not supported for CL < 1.2")
1005
+ if buffer is not None:
1006
+ raise TypeError("'buffer' argument is not supported for CL < 1.2")
1007
+
1008
+ image_old_init(self, context, flags, format, shape,
1009
+ pitches, hostbuf)
1010
+
1011
+ class _ImageInfoGetter:
1012
+ def __init__(self, event):
1013
+ warn(
1014
+ "Image.image.attr is deprecated and will go away in 2021. "
1015
+ "Use Image.attr directly, instead.", stacklevel=2)
1016
+
1017
+ self.event = event
1018
+
1019
+ def __getattr__(self, name):
1020
+ try:
1021
+ inf_attr = getattr(_cl.image_info, name.upper())
1022
+ except AttributeError:
1023
+ raise AttributeError("%s has no attribute '%s'"
1024
+ % (type(self), name))
1025
+ else:
1026
+ return self.event.get_image_info(inf_attr)
1027
+
1028
+ def image_shape(self):
1029
+ if self.type == mem_object_type.IMAGE2D:
1030
+ return (self.width, self.height)
1031
+ elif self.type == mem_object_type.IMAGE3D:
1032
+ return (self.width, self.height, self.depth)
1033
+ else:
1034
+ raise LogicError("only images have shapes")
1035
+
1036
+ Image.__init__ = image_init
1037
+ Image.image = property(_ImageInfoGetter)
1038
+ Image.shape = property(image_shape)
1039
+
1040
+ # }}}
1041
+
1042
+ # {{{ Error
1043
+
1044
+ def error_str(self):
1045
+ val = self.what
1046
+ try:
1047
+ val.routine
1048
+ except AttributeError:
1049
+ return str(val)
1050
+ else:
1051
+ result = ""
1052
+ if val.code() != status_code.SUCCESS:
1053
+ result = status_code.to_string(
1054
+ val.code(), "<unknown error %d>")
1055
+ routine = val.routine()
1056
+ if routine:
1057
+ result = f"{routine} failed: {result}"
1058
+ what = val.what()
1059
+ if what:
1060
+ if result:
1061
+ result += " - "
1062
+ result += what
1063
+ return result
1064
+
1065
+ def error_code(self):
1066
+ return self.args[0].code()
1067
+
1068
+ def error_routine(self):
1069
+ return self.args[0].routine()
1070
+
1071
+ def error_what(self):
1072
+ return self.args[0]
1073
+
1074
+ Error.__str__ = error_str
1075
+ Error.code = property(error_code)
1076
+ Error.routine = property(error_routine)
1077
+ Error.what = property(error_what)
1078
+
1079
+ # }}}
1080
+
1081
+ # {{{ MemoryMap
1082
+
1083
+ def memory_map_enter(self):
1084
+ return self
1085
+
1086
+ def memory_map_exit(self, exc_type, exc_val, exc_tb):
1087
+ self.release()
1088
+
1089
+ MemoryMap.__doc__ = """
1090
+ This class may also be used as a context manager in a ``with`` statement.
1091
+ The memory corresponding to this object will be unmapped when
1092
+ this object is deleted or :meth:`release` is called.
1093
+
1094
+ .. automethod:: release
1095
+ """
1096
+ MemoryMap.__enter__ = memory_map_enter
1097
+ MemoryMap.__exit__ = memory_map_exit
1098
+
1099
+ # }}}
1100
+
1101
+ # {{{ SVMPointer
1102
+
1103
+ if get_cl_header_version() >= (2, 0):
1104
+ SVMPointer.__doc__ = """A base class for things that can be passed to
1105
+ functions that allow an SVM pointer, e.g. kernel enqueues and memory
1106
+ copies.
1107
+
1108
+ Objects of this type cannot currently be directly created or
1109
+ implemented in Python. To obtain objects implementing this type,
1110
+ consider its subtypes :class:`SVMAllocation` and :class:`SVM`.
1111
+
1112
+
1113
+ .. property:: svm_ptr
1114
+
1115
+ Gives the SVM pointer as an :class:`int`.
1116
+
1117
+ .. property:: size
1118
+
1119
+ An :class:`int` denoting the size in bytes, or *None*, if the size
1120
+ of the SVM pointed to is not known.
1121
+
1122
+ *Most* objects of this type (e.g. instances of
1123
+ :class:`SVMAllocation` and :class:`SVM` know their size, so that,
1124
+ for example :class:`enqueue_copy` will automatically copy an entire
1125
+ :class:`SVMAllocation` when a size is not explicitly specified.
1126
+
1127
+ .. automethod:: map
1128
+ .. automethod:: map_ro
1129
+ .. automethod:: map_rw
1130
+ .. automethod:: as_buffer
1131
+ .. property:: buf
1132
+
1133
+ An opaque object implementing the :c:func:`Python buffer protocol
1134
+ <PyObject_GetBuffer>`. It exposes the pointed-to memory as
1135
+ a one-dimensional buffer of bytes, with the size matching
1136
+ :attr:`size`.
1137
+
1138
+ No guarantee is provided that two references to this attribute
1139
+ result in the same object.
1140
+ """
1141
+
1142
+ def svmptr_map(self, queue: CommandQueue, *, flags: int, is_blocking: bool =
1143
+ True, wait_for: Optional[Sequence[Event]] = None,
1144
+ size: Optional[Event] = None) -> "SVMMap":
1145
+ """
1146
+ :arg is_blocking: If *False*, subsequent code must wait on
1147
+ :attr:`SVMMap.event` in the returned object before accessing the
1148
+ mapped memory.
1149
+ :arg flags: a combination of :class:`pyopencl.map_flags`.
1150
+ :arg size: The size of the map in bytes. If not provided, defaults to
1151
+ :attr:`size`.
1152
+
1153
+ |std-enqueue-blurb|
1154
+ """
1155
+ return SVMMap(self,
1156
+ np.asarray(self.buf),
1157
+ queue,
1158
+ _cl._enqueue_svm_map(queue, is_blocking, flags, self, wait_for,
1159
+ size=size))
1160
+
1161
+ def svmptr_map_ro(self, queue: CommandQueue, *, is_blocking: bool = True,
1162
+ wait_for: Optional[Sequence[Event]] = None,
1163
+ size: Optional[int] = None) -> "SVMMap":
1164
+ """Like :meth:`map`, but with *flags* set for a read-only map.
1165
+ """
1166
+
1167
+ return self.map(queue, flags=map_flags.READ,
1168
+ is_blocking=is_blocking, wait_for=wait_for, size=size)
1169
+
1170
+ def svmptr_map_rw(self, queue: CommandQueue, *, is_blocking: bool = True,
1171
+ wait_for: Optional[Sequence[Event]] = None,
1172
+ size: Optional[int] = None) -> "SVMMap":
1173
+ """Like :meth:`map`, but with *flags* set for a read-only map.
1174
+ """
1175
+
1176
+ return self.map(queue, flags=map_flags.READ | map_flags.WRITE,
1177
+ is_blocking=is_blocking, wait_for=wait_for, size=size)
1178
+
1179
+ def svmptr__enqueue_unmap(self, queue, wait_for=None):
1180
+ return _cl._enqueue_svm_unmap(queue, self, wait_for)
1181
+
1182
+ def svmptr_as_buffer(self, ctx: Context, *, flags: Optional[int] = None,
1183
+ size: Optional[int] = None) -> Buffer:
1184
+ """
1185
+ :arg ctx: a :class:`Context`
1186
+ :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to
1187
+ read-write.
1188
+ :arg size: The size of the map in bytes. If not provided, defaults to
1189
+ :attr:`size`.
1190
+ :returns: a :class:`Buffer` corresponding to *self*.
1191
+
1192
+ The memory referred to by this object must not be freed before
1193
+ the returned :class:`Buffer` is released.
1194
+ """
1195
+
1196
+ if flags is None:
1197
+ flags = mem_flags.READ_WRITE | mem_flags.USE_HOST_PTR
1198
+
1199
+ if size is None:
1200
+ size = self.size
1201
+
1202
+ return Buffer(ctx, flags, size=size, hostbuf=self.buf)
1203
+
1204
+ if get_cl_header_version() >= (2, 0):
1205
+ SVMPointer.map = svmptr_map
1206
+ SVMPointer.map_ro = svmptr_map_ro
1207
+ SVMPointer.map_rw = svmptr_map_rw
1208
+ SVMPointer._enqueue_unmap = svmptr__enqueue_unmap
1209
+ SVMPointer.as_buffer = svmptr_as_buffer
1210
+
1211
+ # }}}
1212
+
1213
+ # {{{ SVMAllocation
1214
+
1215
+ if get_cl_header_version() >= (2, 0):
1216
+ SVMAllocation.__doc__ = """
1217
+ Is a :class:`SVMPointer`.
1218
+
1219
+ .. versionadded:: 2016.2
1220
+
1221
+ .. automethod:: __init__
1222
+
1223
+ :arg flags: See :class:`svm_mem_flags`.
1224
+ :arg queue: If not specified, the allocation will be freed
1225
+ eagerly, irrespective of whether pending/enqueued operations
1226
+ are still using this memory.
1227
+
1228
+ If specified, deallocation of the memory will be enqueued
1229
+ with the given queue, and will only be performed
1230
+ after previously-enqueue operations in the queue have
1231
+ completed.
1232
+
1233
+ It is an error to specify an out-of-order queue.
1234
+
1235
+ .. warning::
1236
+
1237
+ Not specifying a queue will typically lead to undesired
1238
+ behavior, including crashes and memory corruption.
1239
+ See the warning in :ref:`svm`.
1240
+
1241
+ .. automethod:: enqueue_release
1242
+
1243
+ Enqueue the release of this allocation into *queue*.
1244
+ If *queue* is not specified, enqueue the deallocation
1245
+ into the queue provided at allocation time or via
1246
+ :class:`bind_to_queue`.
1247
+
1248
+ .. automethod:: bind_to_queue
1249
+
1250
+ Change the queue used for implicit enqueue of deallocation
1251
+ to *queue*. Sufficient synchronization is ensured by
1252
+ enqueuing a marker into the old queue and waiting on this
1253
+ marker in the new queue.
1254
+
1255
+ .. automethod:: unbind_from_queue
1256
+
1257
+ Configure the allocation to no longer implicitly enqueue
1258
+ memory allocation. If such a queue was previously provided,
1259
+ :meth:`~CommandQueue.finish` is automatically called on it.
1260
+ """
1261
+
1262
+ # }}}
1263
+
1264
+ # {{{ SVM
1265
+
1266
+ if get_cl_header_version() >= (2, 0):
1267
+ SVM.__doc__ = """Tags an object exhibiting the Python buffer interface
1268
+ (such as a :class:`numpy.ndarray`) as referring to shared virtual
1269
+ memory.
1270
+
1271
+ Is a :class:`SVMPointer`, hence objects of this type may be passed
1272
+ to kernel calls and :func:`enqueue_copy`, and all methods declared
1273
+ there are also available there. Note that :meth:`map` differs
1274
+ slightly from :meth:`SVMPointer.map`.
1275
+
1276
+ Depending on the features of the OpenCL implementation, the following
1277
+ types of objects may be passed to/wrapped in this type:
1278
+
1279
+ * fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`,
1280
+ if the implementation supports fine-grained shared virtual memory.
1281
+ This memory may directly be passed to a kernel::
1282
+
1283
+ ary = cl.fsvm_empty(ctx, 1000, np.float32)
1284
+ assert isinstance(ary, np.ndarray)
1285
+
1286
+ prg.twice(queue, ary.shape, None, cl.SVM(ary))
1287
+ queue.finish() # synchronize
1288
+ print(ary) # access from host
1289
+
1290
+ Observe how mapping (as needed in coarse-grain SVM) is no longer
1291
+ necessary.
1292
+
1293
+ * any :class:`numpy.ndarray` (or other Python object with a buffer
1294
+ interface) if the implementation supports fine-grained *system*
1295
+ shared virtual memory.
1296
+
1297
+ This is how plain :mod:`numpy` arrays may directly be passed to a
1298
+ kernel::
1299
+
1300
+ ary = np.zeros(1000, np.float32)
1301
+ prg.twice(queue, ary.shape, None, cl.SVM(ary))
1302
+ queue.finish() # synchronize
1303
+ print(ary) # access from host
1304
+
1305
+ * coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty`
1306
+ for any implementation of OpenCL 2.0.
1307
+
1308
+ .. note::
1309
+
1310
+ Applications making use of coarse-grain SVM may be better
1311
+ served by opaque-style SVM. See :ref:`opaque-svm`.
1312
+
1313
+ This is how coarse-grain SVM may be used from both host and device::
1314
+
1315
+ svm_ary = cl.SVM(
1316
+ cl.csvm_empty(ctx, 1000, np.float32, alignment=64))
1317
+ assert isinstance(svm_ary.mem, np.ndarray)
1318
+
1319
+ with svm_ary.map_rw(queue) as ary:
1320
+ ary.fill(17) # use from host
1321
+
1322
+ prg.twice(queue, svm_ary.mem.shape, None, svm_ary)
1323
+
1324
+ Coarse-grain shared-memory *must* be mapped into host address space
1325
+ using :meth:`~SVMPointer.map` before being accessed through the
1326
+ :mod:`numpy` interface.
1327
+
1328
+ .. note::
1329
+
1330
+ This object merely serves as a 'tag' that changes the behavior
1331
+ of functions to which it is passed. It has no special management
1332
+ relationship to the memory it tags. For example, it is permissible
1333
+ to grab a :class:`numpy.ndarray` out of :attr:`SVM.mem` of one
1334
+ :class:`SVM` instance and use the array to construct another.
1335
+ Neither of the tags need to be kept alive.
1336
+
1337
+ .. versionadded:: 2016.2
1338
+
1339
+ .. attribute:: mem
1340
+
1341
+ The wrapped object.
1342
+
1343
+ .. automethod:: __init__
1344
+ .. automethod:: map
1345
+ .. automethod:: map_ro
1346
+ .. automethod:: map_rw
1347
+ """
1348
+
1349
+ # }}}
1350
+
1351
+ if get_cl_header_version() >= (2, 0):
1352
+ svm_old_init = SVM.__init__
1353
+
1354
+ def svm_init(self, mem):
1355
+ svm_old_init(self, mem)
1356
+
1357
+ self.mem = mem
1358
+
1359
+ def svm_map(self, queue, flags, is_blocking=True, wait_for=None):
1360
+ """
1361
+ :arg is_blocking: If *False*, subsequent code must wait on
1362
+ :attr:`SVMMap.event` in the returned object before accessing the
1363
+ mapped memory.
1364
+ :arg flags: a combination of :class:`pyopencl.map_flags`.
1365
+ :returns: an :class:`SVMMap` instance
1366
+
1367
+ This differs from the inherited :class:`SVMPointer.map` in that no size
1368
+ can be specified, and that :attr:`mem` is the exact array produced
1369
+ when the :class:`SVMMap` is used as a context manager.
1370
+
1371
+ |std-enqueue-blurb|
1372
+ """
1373
+ return SVMMap(
1374
+ self,
1375
+ self.mem,
1376
+ queue,
1377
+ _cl._enqueue_svm_map(queue, is_blocking, flags, self, wait_for))
1378
+
1379
+ def svm_map_ro(self, queue, is_blocking=True, wait_for=None):
1380
+ """Like :meth:`map`, but with *flags* set for a read-only map."""
1381
+
1382
+ return self.map(queue, map_flags.READ,
1383
+ is_blocking=is_blocking, wait_for=wait_for)
1384
+
1385
+ def svm_map_rw(self, queue, is_blocking=True, wait_for=None):
1386
+ """Like :meth:`map`, but with *flags* set for a read-only map."""
1387
+
1388
+ return self.map(queue, map_flags.READ | map_flags.WRITE,
1389
+ is_blocking=is_blocking, wait_for=wait_for)
1390
+
1391
+ def svm__enqueue_unmap(self, queue, wait_for=None):
1392
+ return _cl._enqueue_svm_unmap(queue, self, wait_for)
1393
+
1394
+ if get_cl_header_version() >= (2, 0):
1395
+ SVM.__init__ = svm_init
1396
+ SVM.map = svm_map
1397
+ SVM.map_ro = svm_map_ro
1398
+ SVM.map_rw = svm_map_rw
1399
+ SVM._enqueue_unmap = svm__enqueue_unmap
1400
+
1401
+ # }}}
1402
+
1403
+ # ORDER DEPENDENCY: Some of the above may override get_info, the effect needs
1404
+ # to be visible through the attributes. So get_info attr creation needs to happen
1405
+ # after the overriding is complete.
1406
+ cls_to_info_cls = {
1407
+ _cl.Platform: (_cl.Platform.get_info, _cl.platform_info, []),
1408
+ _cl.Device: (_cl.Device.get_info, _cl.device_info,
1409
+ ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]),
1410
+ _cl.Context: (_cl.Context.get_info, _cl.context_info, []),
1411
+ _cl.CommandQueue: (_cl.CommandQueue.get_info, _cl.command_queue_info,
1412
+ ["CONTEXT", "DEVICE"]),
1413
+ _cl.Event: (_cl.Event.get_info, _cl.event_info, []),
1414
+ _cl.MemoryObjectHolder:
1415
+ (MemoryObjectHolder.get_info, _cl.mem_info, []),
1416
+ Image: (_cl.Image.get_image_info, _cl.image_info, []),
1417
+ Pipe: (_cl.Pipe.get_pipe_info, _cl.pipe_info, []),
1418
+ Program: (Program.get_info, _cl.program_info, []),
1419
+ Kernel: (Kernel.get_info, _cl.kernel_info, []),
1420
+ _cl.Sampler: (Sampler.get_info, _cl.sampler_info, []),
1421
+ }
1422
+
1423
+ def to_string(cls, value, default_format=None):
1424
+ if cls._is_bitfield:
1425
+ names = []
1426
+ for name in dir(cls):
1427
+ attr = getattr(cls, name)
1428
+ if not isinstance(attr, int):
1429
+ continue
1430
+ if attr == value or attr & value:
1431
+ names.append(name)
1432
+ if names:
1433
+ return " | ".join(names)
1434
+ else:
1435
+ for name in dir(cls):
1436
+ if (not name.startswith("_")
1437
+ and getattr(cls, name) == value):
1438
+ return name
1439
+
1440
+ if default_format is None:
1441
+ raise ValueError("a name for value %d was not found in %s"
1442
+ % (value, cls.__name__))
1443
+ else:
1444
+ return default_format % value
1445
+
1446
+ for cls in CONSTANT_CLASSES:
1447
+ cls._is_bitfield = cls in BITFIELD_CONSTANT_CLASSES
1448
+ cls.to_string = classmethod(to_string)
1449
+
1450
+ # {{{ get_info attributes -------------------------------------------------
1451
+
1452
+ def make_getinfo(info_method, info_name, info_attr):
1453
+ def result(self):
1454
+ return info_method(self, info_attr)
1455
+
1456
+ return property(result)
1457
+
1458
+ def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr):
1459
+ def result(self):
1460
+ try:
1461
+ return getattr(self, cache_attr)
1462
+ except AttributeError:
1463
+ pass
1464
+
1465
+ result = info_method(self, info_attr)
1466
+ setattr(self, cache_attr, result)
1467
+ return result
1468
+
1469
+ return property(result)
1470
+
1471
+ for cls, (info_method, info_class, cacheable_attrs) \
1472
+ in cls_to_info_cls.items():
1473
+ for info_name, _info_value in info_class.__dict__.items():
1474
+ if info_name == "to_string" or info_name.startswith("_"):
1475
+ continue
1476
+
1477
+ info_lower = info_name.lower()
1478
+ info_constant = getattr(info_class, info_name)
1479
+ if info_name in cacheable_attrs:
1480
+ cache_attr = intern("_info_cache_"+info_lower)
1481
+ setattr(cls, info_lower, make_cacheable_getinfo(
1482
+ info_method, info_lower, cache_attr, info_constant))
1483
+ else:
1484
+ setattr(cls, info_lower, make_getinfo(
1485
+ info_method, info_name, info_constant))
1486
+
1487
+ # }}}
1488
+
1489
+ if _cl.have_gl():
1490
+ def gl_object_get_gl_object(self):
1491
+ return self.get_gl_object_info()[1]
1492
+
1493
+ GLBuffer.gl_object = property(gl_object_get_gl_object)
1494
+ GLTexture.gl_object = property(gl_object_get_gl_object)
1495
+
1496
+
1497
+ _add_functionality()
1498
+
1499
+ # }}}
1500
+
1501
+
1502
+ # {{{ _OverriddenArrayInterfaceSVMAllocation
1503
+
1504
+ if get_cl_header_version() >= (2, 0):
1505
+ class _OverriddenArrayInterfaceSVMAllocation(SVMAllocation):
1506
+ def __init__(self, ctx, size, alignment, flags, *, _interface,
1507
+ queue=None):
1508
+ """
1509
+ :arg ctx: a :class:`Context`
1510
+ :arg flags: some of :class:`svm_mem_flags`.
1511
+ """
1512
+ super().__init__(ctx, size, alignment, flags, queue)
1513
+
1514
+ # mem_flags.READ_ONLY applies to kernels, not the host
1515
+ read_write = True
1516
+ _interface["data"] = (int(self.svm_ptr), not read_write)
1517
+
1518
+ self.__array_interface__ = _interface
1519
+
1520
+ # }}}
1521
+
1522
+
1523
+ # {{{ create_some_context
1524
+
1525
+ def choose_devices(interactive: Optional[bool] = None,
1526
+ answers: Optional[List[str]] = None) -> List[Device]:
1527
+ """
1528
+ Choose :class:`Device` instances 'somehow'.
1529
+
1530
+ :arg interactive: If multiple choices for platform and/or device exist,
1531
+ *interactive* is ``True`` (or ``None`` and ``sys.stdin.isatty()``
1532
+ returns ``True``), then the user is queried about which device should be
1533
+ chosen. Otherwise, a device is chosen in an implementation-defined
1534
+ manner.
1535
+ :arg answers: A sequence of strings that will be used to answer the
1536
+ platform/device selection questions.
1537
+
1538
+ :returns: a list of :class:`Device` instances.
1539
+ """
1540
+
1541
+ if answers is None:
1542
+ if "PYOPENCL_CTX" in os.environ:
1543
+ ctx_spec = os.environ["PYOPENCL_CTX"]
1544
+ answers = ctx_spec.split(":")
1545
+
1546
+ if "PYOPENCL_TEST" in os.environ:
1547
+ from pyopencl.tools import get_test_platforms_and_devices
1548
+ for _plat, devs in get_test_platforms_and_devices():
1549
+ for dev in devs:
1550
+ return [dev]
1551
+
1552
+ if answers is not None:
1553
+ pre_provided_answers = answers
1554
+ answers = answers[:]
1555
+ else:
1556
+ pre_provided_answers = None
1557
+
1558
+ user_inputs = []
1559
+
1560
+ if interactive is None:
1561
+ interactive = True
1562
+ try:
1563
+ if not sys.stdin.isatty():
1564
+ interactive = False
1565
+ except Exception:
1566
+ interactive = False
1567
+
1568
+ def cc_print(s):
1569
+ if interactive:
1570
+ print(s)
1571
+
1572
+ def get_input(prompt):
1573
+ if answers:
1574
+ return str(answers.pop(0))
1575
+ elif not interactive:
1576
+ return ""
1577
+ else:
1578
+ user_input = input(prompt)
1579
+ user_inputs.append(user_input)
1580
+ return user_input
1581
+
1582
+ # {{{ pick a platform
1583
+
1584
+ platforms = get_platforms()
1585
+
1586
+ if not platforms:
1587
+ raise Error("no platforms found")
1588
+ else:
1589
+ if not answers:
1590
+ cc_print("Choose platform:")
1591
+ for i, pf in enumerate(platforms):
1592
+ cc_print("[%d] %s" % (i, pf))
1593
+
1594
+ answer = get_input("Choice [0]:")
1595
+ if not answer:
1596
+ platform = platforms[0]
1597
+ else:
1598
+ platform = None
1599
+ try:
1600
+ int_choice = int(answer)
1601
+ except ValueError:
1602
+ pass
1603
+ else:
1604
+ if 0 <= int_choice < len(platforms):
1605
+ platform = platforms[int_choice]
1606
+
1607
+ if platform is None:
1608
+ answer = answer.lower()
1609
+ for pf in platforms:
1610
+ if answer in pf.name.lower():
1611
+ platform = pf
1612
+ if platform is None:
1613
+ raise RuntimeError("input did not match any platform")
1614
+
1615
+ # }}}
1616
+
1617
+ # {{{ pick a device
1618
+
1619
+ devices = platform.get_devices()
1620
+
1621
+ def parse_device(choice):
1622
+ try:
1623
+ int_choice = int(choice)
1624
+ except ValueError:
1625
+ pass
1626
+ else:
1627
+ if 0 <= int_choice < len(devices):
1628
+ return devices[int_choice]
1629
+
1630
+ choice = choice.lower()
1631
+ for dev in devices:
1632
+ if choice in dev.name.lower():
1633
+ return dev
1634
+ raise RuntimeError("input did not match any device")
1635
+
1636
+ if not devices:
1637
+ raise Error("no devices found")
1638
+ elif len(devices) == 1:
1639
+ pass
1640
+ else:
1641
+ if not answers:
1642
+ cc_print("Choose device(s):")
1643
+ for i, dev in enumerate(devices):
1644
+ cc_print("[%d] %s" % (i, dev))
1645
+
1646
+ answer = get_input("Choice, comma-separated [0]:")
1647
+ if not answer:
1648
+ devices = [devices[0]]
1649
+ else:
1650
+ devices = [parse_device(i) for i in answer.split(",")]
1651
+
1652
+ # }}}
1653
+
1654
+ if user_inputs:
1655
+ if pre_provided_answers is not None:
1656
+ user_inputs = pre_provided_answers + user_inputs
1657
+ cc_print("Set the environment variable PYOPENCL_CTX='%s' to "
1658
+ "avoid being asked again." % ":".join(user_inputs))
1659
+
1660
+ if answers:
1661
+ raise RuntimeError("not all provided choices were used by "
1662
+ "choose_device. (left over: '%s')" % ":".join(answers))
1663
+
1664
+ return devices
1665
+
1666
+
1667
+ def create_some_context(interactive: Optional[bool] = None,
1668
+ answers: Optional[List[str]] = None) -> Context:
1669
+ """
1670
+ Create a :class:`Context` 'somehow'.
1671
+
1672
+ :arg interactive: If multiple choices for platform and/or device exist,
1673
+ *interactive* is ``True`` (or ``None`` and ``sys.stdin.isatty()``
1674
+ returns ``True``), then the user is queried about which device should be
1675
+ chosen. Otherwise, a device is chosen in an implementation-defined
1676
+ manner.
1677
+ :arg answers: A sequence of strings that will be used to answer the
1678
+ platform/device selection questions.
1679
+
1680
+ :returns: an instance of :class:`Context`.
1681
+ """
1682
+ devices = choose_devices(interactive, answers)
1683
+
1684
+ return Context(devices)
1685
+
1686
+
1687
+ _csc = create_some_context
1688
+
1689
+ # }}}
1690
+
1691
+
1692
+ # {{{ SVMMap
1693
+
1694
+ class SVMMap:
1695
+ """
1696
+ Returned by :func:`SVMPointer.map` and :func:`SVM.map`.
1697
+ This class may also be used as a context manager in a ``with`` statement.
1698
+ :meth:`release` will be called upon exit from the ``with`` region.
1699
+ The value returned to the ``as`` part of the context manager is the
1700
+ mapped Python object (e.g. a :mod:`numpy` array).
1701
+
1702
+ .. versionadded:: 2016.2
1703
+
1704
+ .. property:: event
1705
+
1706
+ The :class:`Event` returned when mapping the memory.
1707
+
1708
+ .. automethod:: release
1709
+
1710
+ """
1711
+ def __init__(self, svm, array, queue, event):
1712
+ self.svm = svm
1713
+ self.array = array
1714
+ self.queue = queue
1715
+ self.event = event
1716
+
1717
+ def __del__(self):
1718
+ if self.svm is not None:
1719
+ self.release()
1720
+
1721
+ def __enter__(self):
1722
+ return self.array
1723
+
1724
+ def __exit__(self, exc_type, exc_val, exc_tb):
1725
+ self.release()
1726
+
1727
+ def release(self, queue=None, wait_for=None):
1728
+ """
1729
+ :arg queue: a :class:`pyopencl.CommandQueue`. Defaults to the one
1730
+ with which the map was created, if not specified.
1731
+ :returns: a :class:`pyopencl.Event`
1732
+
1733
+ |std-enqueue-blurb|
1734
+ """
1735
+
1736
+ evt = self.svm._enqueue_unmap(self.queue)
1737
+ self.svm = None
1738
+
1739
+ return evt
1740
+
1741
+ # }}}
1742
+
1743
+
1744
+ # {{{ enqueue_copy
1745
+
1746
+ _IMAGE_MEM_OBJ_TYPES = [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]
1747
+ if get_cl_header_version() >= (1, 2):
1748
+ _IMAGE_MEM_OBJ_TYPES.append(mem_object_type.IMAGE2D_ARRAY)
1749
+
1750
+
1751
+ def enqueue_copy(queue, dest, src, **kwargs):
1752
+ """Copy from :class:`Image`, :class:`Buffer` or the host to
1753
+ :class:`Image`, :class:`Buffer` or the host. (Note: host-to-host
1754
+ copies are unsupported.)
1755
+
1756
+ The following keyword arguments are available:
1757
+
1758
+ :arg wait_for: (optional, default empty)
1759
+ :arg is_blocking: Wait for completion. Defaults to *True*.
1760
+ (Available on any copy involving host memory)
1761
+ :return: A :class:`NannyEvent` if the transfer involved a
1762
+ host-side buffer, otherwise an :class:`Event`.
1763
+
1764
+ .. note::
1765
+
1766
+ Be aware that the deletion of the :class:`NannyEvent` that is
1767
+ returned by the function if the transfer involved a host-side buffer
1768
+ will block until the transfer is complete, so be sure to keep a
1769
+ reference to this :class:`Event` until the
1770
+ transfer has completed.
1771
+
1772
+ .. note::
1773
+
1774
+ Two types of 'buffer' occur in the arguments to this function,
1775
+ :class:`Buffer` and 'host-side buffers'. The latter are
1776
+ defined by Python and commonly called `buffer objects
1777
+ <https://docs.python.org/3/c-api/buffer.html>`__. :mod:`numpy`
1778
+ arrays are a very common example.
1779
+ Make sure to always be clear on whether a :class:`Buffer` or a
1780
+ Python buffer object is needed.
1781
+
1782
+ .. ------------------------------------------------------------------------
1783
+ .. rubric :: Transfer :class:`Buffer` ↔ host
1784
+ .. ------------------------------------------------------------------------
1785
+
1786
+ :arg src_offset: offset in bytes (optional)
1787
+
1788
+ May only be nonzero if applied on the device side.
1789
+
1790
+ :arg dst_offset: offset in bytes (optional)
1791
+
1792
+ May only be nonzero if applied on the device side.
1793
+
1794
+ .. note::
1795
+
1796
+ The size of the transfer is controlled by the size of the
1797
+ of the host-side buffer. If the host-side buffer
1798
+ is a :class:`numpy.ndarray`, you can control the transfer size by
1799
+ transferring into a smaller 'view' of the target array, like this::
1800
+
1801
+ cl.enqueue_copy(queue, large_dest_numpy_array[:15], src_buffer)
1802
+
1803
+ .. ------------------------------------------------------------------------
1804
+ .. rubric :: Transfer :class:`Buffer` ↔ :class:`Buffer`
1805
+ .. ------------------------------------------------------------------------
1806
+
1807
+ :arg byte_count: (optional) If not specified, defaults to the
1808
+ size of the source in versions 2012.x and earlier,
1809
+ and to the minimum of the size of the source and target
1810
+ from 2013.1 on.
1811
+ :arg src_offset: (optional)
1812
+ :arg dst_offset: (optional)
1813
+
1814
+ .. ------------------------------------------------------------------------
1815
+ .. rubric :: Rectangular :class:`Buffer` ↔ host transfers (CL 1.1 and newer)
1816
+ .. ------------------------------------------------------------------------
1817
+
1818
+ :arg buffer_origin: :class:`tuple` of :class:`int` of length
1819
+ three or shorter. (mandatory)
1820
+ :arg host_origin: :class:`tuple` of :class:`int` of length
1821
+ three or shorter. (mandatory)
1822
+ :arg region: :class:`tuple` of :class:`int` of length
1823
+ three or shorter. (mandatory)
1824
+ :arg buffer_pitches: :class:`tuple` of :class:`int` of length
1825
+ two or shorter. (optional, "tightly-packed" if unspecified)
1826
+ :arg host_pitches: :class:`tuple` of :class:`int` of length
1827
+ two or shorter. (optional, "tightly-packed" if unspecified)
1828
+
1829
+ .. ------------------------------------------------------------------------
1830
+ .. rubric :: Rectangular :class:`Buffer` ↔ :class:`Buffer`
1831
+ transfers (CL 1.1 and newer)
1832
+ .. ------------------------------------------------------------------------
1833
+
1834
+ :arg src_origin: :class:`tuple` of :class:`int` of length
1835
+ three or shorter. (mandatory)
1836
+ :arg dst_origin: :class:`tuple` of :class:`int` of length
1837
+ three or shorter. (mandatory)
1838
+ :arg region: :class:`tuple` of :class:`int` of length
1839
+ three or shorter. (mandatory)
1840
+ :arg src_pitches: :class:`tuple` of :class:`int` of length
1841
+ two or shorter. (optional, "tightly-packed" if unspecified)
1842
+ :arg dst_pitches: :class:`tuple` of :class:`int` of length
1843
+ two or shorter. (optional, "tightly-packed" if unspecified)
1844
+
1845
+ .. ------------------------------------------------------------------------
1846
+ .. rubric :: Transfer :class:`Image` ↔ host
1847
+ .. ------------------------------------------------------------------------
1848
+
1849
+ :arg origin: :class:`tuple` of :class:`int` of length
1850
+ three or shorter. (mandatory)
1851
+ :arg region: :class:`tuple` of :class:`int` of length
1852
+ three or shorter. (mandatory)
1853
+ :arg pitches: :class:`tuple` of :class:`int` of length
1854
+ two or shorter. (optional)
1855
+
1856
+ .. ------------------------------------------------------------------------
1857
+ .. rubric :: Transfer :class:`Buffer` ↔ :class:`Image`
1858
+ .. ------------------------------------------------------------------------
1859
+
1860
+ :arg offset: offset in buffer (mandatory)
1861
+ :arg origin: :class:`tuple` of :class:`int` of length
1862
+ three or shorter. (mandatory)
1863
+ :arg region: :class:`tuple` of :class:`int` of length
1864
+ three or shorter. (mandatory)
1865
+
1866
+ .. ------------------------------------------------------------------------
1867
+ .. rubric :: Transfer :class:`Image` ↔ :class:`Image`
1868
+ .. ------------------------------------------------------------------------
1869
+
1870
+ :arg src_origin: :class:`tuple` of :class:`int` of length
1871
+ three or shorter. (mandatory)
1872
+ :arg dest_origin: :class:`tuple` of :class:`int` of length
1873
+ three or shorter. (mandatory)
1874
+ :arg region: :class:`tuple` of :class:`int` of length
1875
+ three or shorter. (mandatory)
1876
+
1877
+ .. ------------------------------------------------------------------------
1878
+ .. rubric :: Transfer :class:`SVMPointer`/host ↔ :class:`SVMPointer`/host
1879
+ .. ------------------------------------------------------------------------
1880
+
1881
+ :arg byte_count: (optional) If not specified, defaults to the
1882
+ size of the source in versions 2012.x and earlier,
1883
+ and to the minimum of the size of the source and target
1884
+ from 2013.1 on.
1885
+
1886
+ |std-enqueue-blurb|
1887
+
1888
+ .. versionadded:: 2011.1
1889
+ """
1890
+
1891
+ if isinstance(dest, MemoryObjectHolder):
1892
+ if dest.type == mem_object_type.BUFFER:
1893
+ if isinstance(src, MemoryObjectHolder):
1894
+ if src.type == mem_object_type.BUFFER:
1895
+ # {{{ buffer -> buffer
1896
+
1897
+ if "src_origin" in kwargs:
1898
+ # rectangular
1899
+ return _cl._enqueue_copy_buffer_rect(
1900
+ queue, src, dest, **kwargs)
1901
+ else:
1902
+ # linear
1903
+ dest_offset = kwargs.pop("dest_offset", None)
1904
+ if dest_offset is not None:
1905
+ if "dst_offset" in kwargs:
1906
+ raise TypeError("may not specify both 'dst_offset' "
1907
+ "and 'dest_offset'")
1908
+
1909
+ warn("The 'dest_offset' argument of enqueue_copy "
1910
+ "is deprecated. Use 'dst_offset' instead. "
1911
+ "'dest_offset' will stop working in 2023.x.",
1912
+ DeprecationWarning, stacklevel=2)
1913
+
1914
+ kwargs["dst_offset"] = dest_offset
1915
+
1916
+ return _cl._enqueue_copy_buffer(queue, src, dest, **kwargs)
1917
+
1918
+ # }}}
1919
+ elif src.type in _IMAGE_MEM_OBJ_TYPES:
1920
+ return _cl._enqueue_copy_image_to_buffer(
1921
+ queue, src, dest, **kwargs)
1922
+ else:
1923
+ raise ValueError("invalid src mem object type")
1924
+ else:
1925
+ # {{{ host -> buffer
1926
+
1927
+ if "buffer_origin" in kwargs:
1928
+ return _cl._enqueue_write_buffer_rect(queue, dest, src, **kwargs)
1929
+ else:
1930
+ device_offset = kwargs.pop("device_offset", None)
1931
+ if device_offset is not None:
1932
+ if "dst_offset" in kwargs:
1933
+ raise TypeError("may not specify both 'device_offset' "
1934
+ "and 'dst_offset'")
1935
+
1936
+ warn("The 'device_offset' argument of enqueue_copy "
1937
+ "is deprecated. Use 'dst_offset' instead. "
1938
+ "'dst_offset' will stop working in 2023.x.",
1939
+ DeprecationWarning, stacklevel=2)
1940
+
1941
+ kwargs["dst_offset"] = device_offset
1942
+
1943
+ return _cl._enqueue_write_buffer(queue, dest, src, **kwargs)
1944
+
1945
+ # }}}
1946
+
1947
+ elif dest.type in _IMAGE_MEM_OBJ_TYPES:
1948
+ # {{{ ... -> image
1949
+
1950
+ if isinstance(src, MemoryObjectHolder):
1951
+ if src.type == mem_object_type.BUFFER:
1952
+ return _cl._enqueue_copy_buffer_to_image(
1953
+ queue, src, dest, **kwargs)
1954
+ elif src.type in _IMAGE_MEM_OBJ_TYPES:
1955
+ return _cl._enqueue_copy_image(queue, src, dest, **kwargs)
1956
+ else:
1957
+ raise ValueError("invalid src mem object type")
1958
+ else:
1959
+ # assume from-host
1960
+ origin = kwargs.pop("origin")
1961
+ region = kwargs.pop("region")
1962
+
1963
+ pitches = kwargs.pop("pitches", (0, 0))
1964
+ if len(pitches) == 1:
1965
+ kwargs["row_pitch"], = pitches
1966
+ else:
1967
+ kwargs["row_pitch"], kwargs["slice_pitch"] = pitches
1968
+
1969
+ return _cl._enqueue_write_image(
1970
+ queue, dest, origin, region, src, **kwargs)
1971
+
1972
+ # }}}
1973
+ else:
1974
+ raise ValueError("invalid dest mem object type")
1975
+
1976
+ elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVMPointer):
1977
+ # {{{ ... -> SVM
1978
+
1979
+ if not isinstance(src, SVMPointer):
1980
+ src = SVM(src)
1981
+
1982
+ is_blocking = kwargs.pop("is_blocking", True)
1983
+
1984
+ # These are NOT documented. They only support consistency with the
1985
+ # Buffer-based API for the sake of the Array.
1986
+ if kwargs.pop("src_offset", 0) != 0:
1987
+ raise ValueError("src_offset must be 0")
1988
+ if kwargs.pop("dst_offset", 0) != 0:
1989
+ raise ValueError("dst_offset must be 0")
1990
+
1991
+ return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs)
1992
+
1993
+ # }}}
1994
+
1995
+ else:
1996
+ # assume to-host
1997
+
1998
+ if isinstance(src, MemoryObjectHolder):
1999
+ if src.type == mem_object_type.BUFFER:
2000
+ if "buffer_origin" in kwargs:
2001
+ return _cl._enqueue_read_buffer_rect(queue, src, dest, **kwargs)
2002
+ else:
2003
+ device_offset = kwargs.pop("device_offset", None)
2004
+ if device_offset is not None:
2005
+ if "src_offset" in kwargs:
2006
+ raise TypeError("may not specify both 'device_offset' "
2007
+ "and 'src_offset'")
2008
+
2009
+ warn("The 'device_offset' argument of enqueue_copy "
2010
+ "is deprecated. Use 'src_offset' instead. "
2011
+ "'dst_offset' will stop working in 2023.x.",
2012
+ DeprecationWarning, stacklevel=2)
2013
+
2014
+ kwargs["src_offset"] = device_offset
2015
+
2016
+ return _cl._enqueue_read_buffer(queue, src, dest, **kwargs)
2017
+
2018
+ elif src.type in _IMAGE_MEM_OBJ_TYPES:
2019
+ origin = kwargs.pop("origin")
2020
+ region = kwargs.pop("region")
2021
+
2022
+ pitches = kwargs.pop("pitches", (0, 0))
2023
+ if len(pitches) == 1:
2024
+ kwargs["row_pitch"], = pitches
2025
+ else:
2026
+ kwargs["row_pitch"], kwargs["slice_pitch"] = pitches
2027
+
2028
+ return _cl._enqueue_read_image(
2029
+ queue, src, origin, region, dest, **kwargs)
2030
+ else:
2031
+ raise ValueError("invalid src mem object type")
2032
+ elif isinstance(src, SVMPointer):
2033
+ # {{{ svm -> host
2034
+
2035
+ # dest is not a SVM instance, otherwise we'd be in the branch above
2036
+
2037
+ # This is NOT documented. They only support consistency with the
2038
+ # Buffer-based API for the sake of the Array.
2039
+ if kwargs.pop("src_offset", 0) != 0:
2040
+ raise ValueError("src_offset must be 0")
2041
+
2042
+ is_blocking = kwargs.pop("is_blocking", True)
2043
+ return _cl._enqueue_svm_memcpy(
2044
+ queue, is_blocking, SVM(dest), src, **kwargs)
2045
+
2046
+ # }}}
2047
+ else:
2048
+ # assume from-host
2049
+ raise TypeError("enqueue_copy cannot perform host-to-host transfers")
2050
+
2051
+ # }}}
2052
+
2053
+
2054
+ # {{{ enqueue_fill
2055
+
2056
+ def enqueue_fill(queue: CommandQueue,
2057
+ dest: "Union[MemoryObject, SVMPointer]",
2058
+ pattern: Any, size: int, *, offset: int = 0,
2059
+ wait_for: Optional[Sequence[Event]] = None) -> Event:
2060
+ """
2061
+ .. versionadded:: 2022.2
2062
+ """
2063
+ if isinstance(dest, MemoryObjectHolder):
2064
+ return enqueue_fill_buffer(queue, dest, pattern, offset, size, wait_for)
2065
+ elif isinstance(dest, SVMPointer):
2066
+ if offset:
2067
+ raise NotImplementedError("enqueue_fill with SVM does not yet support "
2068
+ "offsets")
2069
+ return enqueue_svm_memfill(queue, dest, pattern, size, wait_for)
2070
+ else:
2071
+ raise TypeError(f"enqueue_fill does not know how to fill '{type(dest)}'")
2072
+
2073
+ # }}}
2074
+
2075
+
2076
+ # {{{ image creation
2077
+
2078
+ DTYPE_TO_CHANNEL_TYPE = {
2079
+ np.dtype(np.float32): channel_type.FLOAT,
2080
+ np.dtype(np.int16): channel_type.SIGNED_INT16,
2081
+ np.dtype(np.int32): channel_type.SIGNED_INT32,
2082
+ np.dtype(np.int8): channel_type.SIGNED_INT8,
2083
+ np.dtype(np.uint16): channel_type.UNSIGNED_INT16,
2084
+ np.dtype(np.uint32): channel_type.UNSIGNED_INT32,
2085
+ np.dtype(np.uint8): channel_type.UNSIGNED_INT8,
2086
+ }
2087
+ try:
2088
+ np.float16
2089
+ except Exception:
2090
+ pass
2091
+ else:
2092
+ DTYPE_TO_CHANNEL_TYPE[np.dtype(np.float16)] = channel_type.HALF_FLOAT
2093
+
2094
+ DTYPE_TO_CHANNEL_TYPE_NORM = {
2095
+ np.dtype(np.int16): channel_type.SNORM_INT16,
2096
+ np.dtype(np.int8): channel_type.SNORM_INT8,
2097
+ np.dtype(np.uint16): channel_type.UNORM_INT16,
2098
+ np.dtype(np.uint8): channel_type.UNORM_INT8,
2099
+ }
2100
+
2101
+
2102
+ def image_from_array(ctx, ary, num_channels=None, mode="r", norm_int=False):
2103
+ if not ary.flags.c_contiguous:
2104
+ raise ValueError("array must be C-contiguous")
2105
+
2106
+ dtype = ary.dtype
2107
+ if num_channels is None:
2108
+
2109
+ try:
2110
+ dtype, num_channels = \
2111
+ pyopencl.cltypes.vec_type_to_scalar_and_count[dtype]
2112
+ except KeyError:
2113
+ # It must be a scalar type then.
2114
+ num_channels = 1
2115
+
2116
+ shape = ary.shape
2117
+ strides = ary.strides
2118
+
2119
+ elif num_channels == 1:
2120
+ shape = ary.shape
2121
+ strides = ary.strides
2122
+ else:
2123
+ if ary.shape[-1] != num_channels:
2124
+ raise RuntimeError("last dimension must be equal to number of channels")
2125
+
2126
+ shape = ary.shape[:-1]
2127
+ strides = ary.strides[:-1]
2128
+
2129
+ if mode == "r":
2130
+ mode_flags = mem_flags.READ_ONLY
2131
+ elif mode == "w":
2132
+ mode_flags = mem_flags.WRITE_ONLY
2133
+ else:
2134
+ raise ValueError("invalid value '%s' for 'mode'" % mode)
2135
+
2136
+ img_format = {
2137
+ 1: channel_order.R,
2138
+ 2: channel_order.RG,
2139
+ 3: channel_order.RGB,
2140
+ 4: channel_order.RGBA,
2141
+ }[num_channels]
2142
+
2143
+ assert ary.strides[-1] == ary.dtype.itemsize
2144
+
2145
+ if norm_int:
2146
+ channel_type = DTYPE_TO_CHANNEL_TYPE_NORM[dtype]
2147
+ else:
2148
+ channel_type = DTYPE_TO_CHANNEL_TYPE[dtype]
2149
+
2150
+ return Image(ctx, mode_flags | mem_flags.COPY_HOST_PTR,
2151
+ ImageFormat(img_format, channel_type),
2152
+ shape=shape[::-1], pitches=strides[::-1][1:],
2153
+ hostbuf=ary)
2154
+
2155
+ # }}}
2156
+
2157
+
2158
+ # {{{ enqueue_* compatibility shims
2159
+
2160
+ def enqueue_marker(queue, wait_for=None):
2161
+ if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
2162
+ return _cl._enqueue_marker_with_wait_list(queue, wait_for)
2163
+ else:
2164
+ if wait_for:
2165
+ _cl._enqueue_wait_for_events(queue, wait_for)
2166
+ return _cl._enqueue_marker(queue)
2167
+
2168
+
2169
+ def enqueue_barrier(queue, wait_for=None):
2170
+ if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
2171
+ return _cl._enqueue_barrier_with_wait_list(queue, wait_for)
2172
+ else:
2173
+ _cl._enqueue_barrier(queue)
2174
+ if wait_for:
2175
+ _cl._enqueue_wait_for_events(queue, wait_for)
2176
+ return _cl._enqueue_marker(queue)
2177
+
2178
+
2179
+ def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None):
2180
+ if not (queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2)):
2181
+ warn(
2182
+ "The context for this queue does not declare OpenCL 1.2 support, so "
2183
+ "the next thing you might see is a crash",
2184
+ stacklevel=2)
2185
+
2186
+ if _PYPY and isinstance(pattern, np.generic):
2187
+ pattern = np.asarray(pattern)
2188
+
2189
+ return _cl._enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for)
2190
+
2191
+ # }}}
2192
+
2193
+
2194
+ # {{{ numpy-like svm allocation
2195
+
2196
+ def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None):
2197
+ """Fill shared virtual memory with a pattern.
2198
+
2199
+ :arg dest: a Python buffer object, or any implementation of :class:`SVMPointer`.
2200
+ :arg pattern: a Python buffer object (e.g. a :class:`numpy.ndarray` with the
2201
+ fill pattern to be used.
2202
+ :arg byte_count: The size of the memory to be fill. Defaults to the
2203
+ entirety of *dest*.
2204
+
2205
+ |std-enqueue-blurb|
2206
+
2207
+ .. versionadded:: 2016.2
2208
+ """
2209
+
2210
+ if not isinstance(dest, SVMPointer):
2211
+ dest = SVM(dest)
2212
+
2213
+ return _cl._enqueue_svm_memfill(
2214
+ queue, dest, pattern, byte_count=byte_count, wait_for=wait_for)
2215
+
2216
+
2217
+ def enqueue_svm_migratemem(queue, svms, flags, wait_for=None):
2218
+ """
2219
+ :arg svms: a collection of Python buffer objects (e.g. :mod:`numpy`
2220
+ arrays), or any implementation of :class:`SVMPointer`.
2221
+ :arg flags: a combination of :class:`mem_migration_flags`
2222
+
2223
+ |std-enqueue-blurb|
2224
+
2225
+ .. versionadded:: 2016.2
2226
+
2227
+ This function requires OpenCL 2.1.
2228
+ """
2229
+
2230
+ return _cl._enqueue_svm_migratemem(queue, svms, flags, wait_for)
2231
+
2232
+
2233
+ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None, queue=None):
2234
+ """Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype*
2235
+ and *order*. (See :func:`numpy.empty` for the meaning of these arguments.)
2236
+ The array will be allocated in shared virtual memory belonging
2237
+ to *ctx*.
2238
+
2239
+ :arg ctx: a :class:`Context`
2240
+ :arg flags: a combination of flags from :class:`svm_mem_flags`.
2241
+ :arg alignment: the number of bytes to which the beginning of the memory
2242
+ is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*.
2243
+
2244
+ :returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute
2245
+ is a :class:`SVMAllocation`.
2246
+
2247
+ To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you
2248
+ will likely want to wrap the returned array in an :class:`SVM` tag.
2249
+
2250
+ .. versionadded:: 2016.2
2251
+
2252
+ .. versionchanged:: 2022.2
2253
+
2254
+ *queue* argument added.
2255
+ """
2256
+
2257
+ dtype = np.dtype(dtype)
2258
+
2259
+ try:
2260
+ s = 1
2261
+ for dim in shape:
2262
+ s *= dim
2263
+ except TypeError:
2264
+ admissible_types = (int, np.integer)
2265
+
2266
+ if not isinstance(shape, admissible_types):
2267
+ raise TypeError("shape must either be iterable or "
2268
+ "castable to an integer")
2269
+ s = shape
2270
+ shape = (shape,)
2271
+
2272
+ itemsize = dtype.itemsize
2273
+ nbytes = s * itemsize
2274
+
2275
+ from pyopencl.compyte.array import c_contiguous_strides, f_contiguous_strides
2276
+
2277
+ if order in "fF":
2278
+ strides = f_contiguous_strides(itemsize, shape)
2279
+ elif order in "cC":
2280
+ strides = c_contiguous_strides(itemsize, shape)
2281
+ else:
2282
+ raise ValueError("order not recognized: %s" % order)
2283
+
2284
+ descr = dtype.descr
2285
+
2286
+ interface = {
2287
+ "version": 3,
2288
+ "shape": shape,
2289
+ "strides": strides,
2290
+ }
2291
+
2292
+ if len(descr) == 1:
2293
+ interface["typestr"] = descr[0][1]
2294
+ else:
2295
+ interface["typestr"] = "V%d" % itemsize
2296
+ interface["descr"] = descr
2297
+
2298
+ if alignment is None:
2299
+ alignment = itemsize
2300
+
2301
+ svm_alloc = _OverriddenArrayInterfaceSVMAllocation(
2302
+ ctx, nbytes, alignment, flags, _interface=interface,
2303
+ queue=queue)
2304
+ return np.asarray(svm_alloc)
2305
+
2306
+
2307
+ def svm_empty_like(ctx, flags, ary, alignment=None):
2308
+ """Allocate an empty :class:`numpy.ndarray` like the existing
2309
+ :class:`numpy.ndarray` *ary*. The array will be allocated in shared
2310
+ virtual memory belonging to *ctx*.
2311
+
2312
+ :arg ctx: a :class:`Context`
2313
+ :arg flags: a combination of flags from :class:`svm_mem_flags`.
2314
+ :arg alignment: the number of bytes to which the beginning of the memory
2315
+ is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*.
2316
+
2317
+ :returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute
2318
+ is a :class:`SVMAllocation`.
2319
+
2320
+ To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you
2321
+ will likely want to wrap the returned array in an :class:`SVM` tag.
2322
+
2323
+ .. versionadded:: 2016.2
2324
+ """
2325
+ if ary.flags.c_contiguous:
2326
+ order = "C"
2327
+ elif ary.flags.f_contiguous:
2328
+ order = "F"
2329
+ else:
2330
+ raise ValueError("array is neither C- nor Fortran-contiguous")
2331
+
2332
+ return svm_empty(ctx, flags, ary.shape, ary.dtype, order,
2333
+ alignment=alignment)
2334
+
2335
+
2336
+ def csvm_empty(ctx, shape, dtype, order="C", alignment=None):
2337
+ """
2338
+ Like :func:`svm_empty`, but with *flags* set for a coarse-grain read-write
2339
+ buffer.
2340
+
2341
+ .. versionadded:: 2016.2
2342
+ """
2343
+ return svm_empty(ctx, svm_mem_flags.READ_WRITE, shape, dtype, order, alignment)
2344
+
2345
+
2346
+ def csvm_empty_like(ctx, ary, alignment=None):
2347
+ """
2348
+ Like :func:`svm_empty_like`, but with *flags* set for a coarse-grain
2349
+ read-write buffer.
2350
+
2351
+ .. versionadded:: 2016.2
2352
+ """
2353
+ return svm_empty_like(ctx, svm_mem_flags.READ_WRITE, ary)
2354
+
2355
+
2356
+ def fsvm_empty(ctx, shape, dtype, order="C", alignment=None):
2357
+ """
2358
+ Like :func:`svm_empty`, but with *flags* set for a fine-grain read-write
2359
+ buffer.
2360
+
2361
+ .. versionadded:: 2016.2
2362
+ """
2363
+ return svm_empty(ctx,
2364
+ svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER,
2365
+ shape, dtype, order, alignment)
2366
+
2367
+
2368
+ def fsvm_empty_like(ctx, ary, alignment=None):
2369
+ """
2370
+ Like :func:`svm_empty_like`, but with *flags* set for a fine-grain
2371
+ read-write buffer.
2372
+
2373
+ .. versionadded:: 2016.2
2374
+ """
2375
+ return svm_empty_like(
2376
+ ctx,
2377
+ svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER,
2378
+ ary)
2379
+
2380
+ # }}}
2381
+
2382
+
2383
+ _KERNEL_ARG_CLASSES: Tuple[type, ...] = (
2384
+ MemoryObjectHolder,
2385
+ Sampler,
2386
+ CommandQueue,
2387
+ LocalMemory,
2388
+ )
2389
+ if get_cl_header_version() >= (2, 0):
2390
+ _KERNEL_ARG_CLASSES = _KERNEL_ARG_CLASSES + (SVM,)
2391
+
2392
+
2393
+ # vim: foldmethod=marker