pyopencl 2024.3__cp312-cp312-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (43) hide show
  1. pyopencl/.libs/libOpenCL-1ef0e16e.so.1.0.0 +0 -0
  2. pyopencl/__init__.py +2410 -0
  3. pyopencl/_cl.cpython-312-x86_64-linux-musl.so +0 -0
  4. pyopencl/_cluda.py +54 -0
  5. pyopencl/_mymako.py +14 -0
  6. pyopencl/algorithm.py +1449 -0
  7. pyopencl/array.py +3437 -0
  8. pyopencl/bitonic_sort.py +242 -0
  9. pyopencl/bitonic_sort_templates.py +594 -0
  10. pyopencl/cache.py +535 -0
  11. pyopencl/capture_call.py +177 -0
  12. pyopencl/characterize/__init__.py +456 -0
  13. pyopencl/characterize/performance.py +237 -0
  14. pyopencl/cl/pyopencl-airy.cl +324 -0
  15. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  16. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  17. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  18. pyopencl/cl/pyopencl-complex.h +303 -0
  19. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  20. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  21. pyopencl/cl/pyopencl-random123/array.h +325 -0
  22. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  23. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  24. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  25. pyopencl/clmath.py +280 -0
  26. pyopencl/clrandom.py +409 -0
  27. pyopencl/cltypes.py +137 -0
  28. pyopencl/compyte/.gitignore +21 -0
  29. pyopencl/compyte/__init__.py +0 -0
  30. pyopencl/compyte/array.py +214 -0
  31. pyopencl/compyte/dtypes.py +290 -0
  32. pyopencl/compyte/pyproject.toml +54 -0
  33. pyopencl/elementwise.py +1171 -0
  34. pyopencl/invoker.py +421 -0
  35. pyopencl/ipython_ext.py +68 -0
  36. pyopencl/reduction.py +786 -0
  37. pyopencl/scan.py +1915 -0
  38. pyopencl/tools.py +1527 -0
  39. pyopencl/version.py +9 -0
  40. pyopencl-2024.3.dist-info/METADATA +108 -0
  41. pyopencl-2024.3.dist-info/RECORD +43 -0
  42. pyopencl-2024.3.dist-info/WHEEL +5 -0
  43. pyopencl-2024.3.dist-info/licenses/LICENSE +104 -0
pyopencl/__init__.py ADDED
@@ -0,0 +1,2410 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ __copyright__ = "Copyright (C) 2009-15 Andreas Kloeckner"
5
+
6
+ __license__ = """
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in
15
+ all copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
+ THE SOFTWARE.
24
+ """
25
+
26
+ import logging
27
+ from sys import intern
28
+ from typing import Any, Sequence
29
+ from warnings import warn
30
+
31
+ # must import, otherwise dtype registry will not be fully populated
32
+ import pyopencl.cltypes
33
+ from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa: F401
34
+
35
+
36
+ __version__ = VERSION_TEXT
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+ # This supports ocl-icd find shipped OpenCL ICDs, cf.
41
+ # https://github.com/isuruf/ocl-icd/commit/3862386b51930f95d9ad1089f7157a98165d5a6b
42
+ # via
43
+ # https://github.com/inducer/pyopencl/blob/0b3d0ef92497e6838eea300b974f385f94cb5100/scripts/build-wheels.sh#L43-L44
44
+ import os
45
+
46
+
47
+ os.environ["PYOPENCL_HOME"] = os.path.dirname(os.path.abspath(__file__))
48
+
49
+ try:
50
+ import pyopencl._cl as _cl
51
+ except ImportError:
52
+ from os.path import dirname, join, realpath
53
+ if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)):
54
+ warn(
55
+ "It looks like you are importing PyOpenCL from "
56
+ "its source directory. This likely won't work.",
57
+ stacklevel=2)
58
+ raise
59
+
60
+ import numpy as np
61
+
62
+ import sys
63
+
64
+ _PYPY = "__pypy__" in sys.builtin_module_names
65
+
66
+ from pyopencl._cl import ( # noqa: F401
67
+ get_cl_header_version,
68
+ program_kind,
69
+ status_code,
70
+ platform_info,
71
+ device_type,
72
+ device_info,
73
+ device_topology_type_amd,
74
+ device_fp_config,
75
+ device_mem_cache_type,
76
+ device_local_mem_type,
77
+ device_exec_capabilities,
78
+ device_svm_capabilities,
79
+
80
+ command_queue_properties,
81
+ context_info,
82
+ gl_context_info,
83
+ context_properties,
84
+ command_queue_info,
85
+ queue_properties,
86
+
87
+ mem_flags,
88
+ svm_mem_flags,
89
+
90
+ channel_order,
91
+ channel_type,
92
+ mem_object_type,
93
+ mem_info,
94
+ image_info,
95
+ pipe_info,
96
+ pipe_properties,
97
+ addressing_mode,
98
+ filter_mode,
99
+ sampler_info,
100
+ sampler_properties,
101
+ map_flags,
102
+ program_info,
103
+ program_build_info,
104
+ program_binary_type,
105
+
106
+ kernel_info,
107
+ kernel_arg_info,
108
+ kernel_arg_address_qualifier,
109
+ kernel_arg_access_qualifier,
110
+ kernel_arg_type_qualifier,
111
+ kernel_work_group_info,
112
+ kernel_sub_group_info,
113
+
114
+ event_info,
115
+ command_type,
116
+ command_execution_status,
117
+ profiling_info,
118
+ mem_migration_flags,
119
+ device_partition_property,
120
+ device_affinity_domain,
121
+ device_atomic_capabilities,
122
+ device_device_enqueue_capabilities,
123
+
124
+ version_bits,
125
+ khronos_vendor_id,
126
+
127
+ Error, MemoryError, LogicError, RuntimeError,
128
+
129
+ Platform,
130
+ get_platforms,
131
+
132
+ Device,
133
+ Context,
134
+ CommandQueue,
135
+ LocalMemory,
136
+ MemoryObjectHolder,
137
+ MemoryObject,
138
+ MemoryMap,
139
+ Buffer,
140
+
141
+ _Program,
142
+ Kernel,
143
+
144
+ Event,
145
+ wait_for_events,
146
+ NannyEvent,
147
+
148
+ enqueue_nd_range_kernel,
149
+
150
+ _enqueue_marker,
151
+
152
+ _enqueue_read_buffer,
153
+ _enqueue_write_buffer,
154
+ _enqueue_copy_buffer,
155
+ _enqueue_read_buffer_rect,
156
+ _enqueue_write_buffer_rect,
157
+ _enqueue_copy_buffer_rect,
158
+
159
+ _enqueue_read_image,
160
+ _enqueue_copy_image,
161
+ _enqueue_write_image,
162
+ _enqueue_copy_image_to_buffer,
163
+ _enqueue_copy_buffer_to_image,
164
+
165
+ have_gl,
166
+
167
+ ImageFormat,
168
+ get_supported_image_formats,
169
+
170
+ Image,
171
+ Sampler,
172
+
173
+ # This class is available unconditionally, even though CL only
174
+ # has it on CL2.0 and newer.
175
+ Pipe,
176
+ )
177
+
178
+
179
+ try:
180
+ from pyopencl._cl import DeviceTopologyAmd # noqa: F401
181
+ from pyopencl._cl import enqueue_copy_buffer_p2p_amd # noqa: F401
182
+ except ImportError:
183
+ pass
184
+
185
+ if not _PYPY:
186
+ # FIXME: Add back to default set when pypy support catches up
187
+ from pyopencl._cl import enqueue_map_buffer # noqa: F401
188
+ from pyopencl._cl import enqueue_map_image # noqa: F401
189
+
190
+ if get_cl_header_version() >= (1, 1):
191
+ from pyopencl._cl import UserEvent # noqa: F401
192
+ if get_cl_header_version() >= (1, 2):
193
+ from pyopencl._cl import ImageDescriptor
194
+ from pyopencl._cl import ( # noqa: F401
195
+ _enqueue_barrier_with_wait_list, _enqueue_fill_buffer,
196
+ _enqueue_marker_with_wait_list, enqueue_fill_image,
197
+ enqueue_migrate_mem_objects, unload_platform_compiler)
198
+
199
+ if get_cl_header_version() >= (2, 0):
200
+ from pyopencl._cl import SVM, SVMAllocation, SVMPointer
201
+
202
+ if _cl.have_gl():
203
+ from pyopencl._cl import ( # noqa: F401
204
+ GLBuffer, GLRenderBuffer, GLTexture, gl_object_type, gl_texture_info)
205
+
206
+ try:
207
+ from pyopencl._cl import get_apple_cgl_share_group # noqa: F401
208
+ except ImportError:
209
+ pass
210
+
211
+ try:
212
+ from pyopencl._cl import enqueue_acquire_gl_objects # noqa: F401
213
+ from pyopencl._cl import enqueue_release_gl_objects # noqa: F401
214
+ except ImportError:
215
+ pass
216
+
217
+ import inspect as _inspect
218
+
219
+
220
+ CONSTANT_CLASSES = tuple(
221
+ getattr(_cl, name) for name in dir(_cl)
222
+ if _inspect.isclass(getattr(_cl, name))
223
+ and name[0].islower() and name not in ["zip", "map", "range"])
224
+
225
+ BITFIELD_CONSTANT_CLASSES = (
226
+ _cl.device_type,
227
+ _cl.device_fp_config,
228
+ _cl.device_exec_capabilities,
229
+ _cl.command_queue_properties,
230
+ _cl.mem_flags,
231
+ _cl.map_flags,
232
+ _cl.kernel_arg_type_qualifier,
233
+ _cl.device_affinity_domain,
234
+ _cl.mem_migration_flags,
235
+ _cl.device_svm_capabilities,
236
+ _cl.queue_properties,
237
+ _cl.svm_mem_flags,
238
+ _cl.device_atomic_capabilities,
239
+ _cl.device_device_enqueue_capabilities,
240
+ _cl.version_bits,
241
+ )
242
+
243
+
244
+ # {{{ diagnostics
245
+
246
+ class CompilerWarning(UserWarning):
247
+ pass
248
+
249
+
250
+ class CommandQueueUsedAfterExit(UserWarning):
251
+ pass
252
+
253
+
254
+ def compiler_output(text: str) -> None:
255
+ from pytools import strtobool
256
+ if strtobool(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "False")):
257
+ warn(text, CompilerWarning, stacklevel=3)
258
+ else:
259
+ warn("Non-empty compiler output encountered. Set the "
260
+ "environment variable PYOPENCL_COMPILER_OUTPUT=1 "
261
+ "to see more.", CompilerWarning, stacklevel=3)
262
+
263
+ # }}}
264
+
265
+
266
+ # {{{ find pyopencl shipped source code
267
+
268
+ def _find_pyopencl_include_path() -> str:
269
+ from os.path import abspath, dirname, exists, join
270
+
271
+ # Try to find the include path in the same directory as this file
272
+ include_path = join(abspath(dirname(__file__)), "cl")
273
+ if not exists(include_path):
274
+ try:
275
+ # NOTE: only available in Python >=3.9
276
+ from importlib.resources import files
277
+ except ImportError:
278
+ from importlib_resources import files # type: ignore[no-redef]
279
+
280
+ include_path = str(files("pyopencl") / "cl")
281
+ if not exists(include_path):
282
+ raise OSError("Unable to find PyOpenCL include path")
283
+
284
+ # Quote the path if it contains a space and is not quoted already.
285
+ # See https://github.com/inducer/pyopencl/issues/250 for discussion.
286
+ if " " in include_path and not include_path.startswith('"'):
287
+ return '"' + include_path + '"'
288
+ else:
289
+ return include_path
290
+
291
+ # }}}
292
+
293
+
294
+ # {{{ build option munging
295
+
296
+ def _split_options_if_necessary(options):
297
+ if isinstance(options, str):
298
+ import shlex
299
+
300
+ options = shlex.split(options)
301
+
302
+ return options
303
+
304
+
305
+ def _find_include_path(options):
306
+ def unquote(path):
307
+ if path.startswith('"') and path.endswith('"'):
308
+ return path[1:-1]
309
+ else:
310
+ return path
311
+
312
+ include_path = ["."]
313
+
314
+ option_idx = 0
315
+ while option_idx < len(options):
316
+ option = options[option_idx].strip()
317
+ if option.startswith("-I") or option.startswith("/I"):
318
+ if len(option) == 2:
319
+ if option_idx+1 < len(options):
320
+ include_path.append(unquote(options[option_idx+1]))
321
+ option_idx += 2
322
+ else:
323
+ include_path.append(unquote(option[2:].lstrip()))
324
+ option_idx += 1
325
+ else:
326
+ option_idx += 1
327
+
328
+ # }}}
329
+
330
+ return include_path
331
+
332
+
333
+ def _options_to_bytestring(options):
334
+ def encode_if_necessary(s):
335
+ if isinstance(s, str):
336
+ return s.encode("utf-8")
337
+ else:
338
+ return s
339
+
340
+ return b" ".join(encode_if_necessary(s) for s in options)
341
+
342
+
343
+ # }}}
344
+
345
+
346
+ # {{{ Program (wrapper around _Program, adds caching support)
347
+
348
+ from pytools import strtobool
349
+
350
+
351
+ _PYOPENCL_NO_CACHE = strtobool(os.environ.get("PYOPENCL_NO_CACHE", "false"))
352
+
353
+ _DEFAULT_BUILD_OPTIONS: list[str] = []
354
+ _DEFAULT_INCLUDE_OPTIONS: list[str] = ["-I", _find_pyopencl_include_path()]
355
+
356
+ # map of platform.name to build options list
357
+ _PLAT_BUILD_OPTIONS: dict[str, list[str]] = {
358
+ "Oclgrind": ["-D", "PYOPENCL_USING_OCLGRIND"],
359
+ }
360
+
361
+
362
+ def enable_debugging(platform_or_context):
363
+ """Enables debugging for all code subsequently compiled by
364
+ PyOpenCL on the passed *platform*. Alternatively, a context
365
+ may be passed.
366
+ """
367
+
368
+ if isinstance(platform_or_context, Context):
369
+ platform = platform_or_context.devices[0].platform
370
+ else:
371
+ platform = platform_or_context
372
+
373
+ if "AMD Accelerated" in platform.name:
374
+ _PLAT_BUILD_OPTIONS.setdefault(platform.name, []).extend(
375
+ ["-g", "-O0"])
376
+ os.environ["CPU_MAX_COMPUTE_UNITS"] = "1"
377
+ else:
378
+ warn(f"Do not know how to enable debugging on '{platform.name}'",
379
+ stacklevel=2)
380
+
381
+
382
+ class Program:
383
+ def __init__(self, arg1, arg2=None, arg3=None):
384
+ if arg2 is None:
385
+ # 1-argument form: program
386
+ self._prg = arg1
387
+ self._context = self._prg.get_info(program_info.CONTEXT)
388
+
389
+ elif arg3 is None:
390
+ # 2-argument form: context, source
391
+ context, source = arg1, arg2
392
+
393
+ from pyopencl.tools import is_spirv
394
+ if is_spirv(source):
395
+ # FIXME no caching in SPIR-V case
396
+ self._context = context
397
+ self._prg = _cl._create_program_with_il(context, source)
398
+ return
399
+
400
+ self._context = context
401
+ self._source = source
402
+ self._prg = None
403
+
404
+ else:
405
+ context, device, binaries = arg1, arg2, arg3
406
+ self._context = context
407
+ self._prg = _cl._Program(context, device, binaries)
408
+
409
+ self._build_duration_info = None
410
+
411
+ def _get_prg(self):
412
+ if self._prg is not None:
413
+ return self._prg
414
+ else:
415
+ # "no program" can only happen in from-source case.
416
+ warn("Pre-build attribute access defeats compiler caching.",
417
+ stacklevel=3)
418
+
419
+ self._prg = _cl._Program(self._context, self._source)
420
+ return self._prg
421
+
422
+ def get_info(self, arg):
423
+ return self._get_prg().get_info(arg)
424
+
425
+ def get_build_info(self, *args, **kwargs):
426
+ return self._get_prg().get_build_info(*args, **kwargs)
427
+
428
+ def all_kernels(self):
429
+ return self._get_prg().all_kernels()
430
+
431
+ @property
432
+ def int_ptr(self):
433
+ return self._get_prg().int_ptr
434
+ int_ptr.__doc__ = _cl._Program.int_ptr.__doc__
435
+
436
+ @staticmethod
437
+ def from_int_ptr(int_ptr_value, retain=True):
438
+ return Program(_cl._Program.from_int_ptr(int_ptr_value, retain))
439
+ from_int_ptr.__doc__ = _cl._Program.from_int_ptr.__doc__
440
+
441
+ def __getattr__(self, attr):
442
+ try:
443
+ knl = Kernel(self, attr)
444
+ # Nvidia does not raise errors even for invalid names,
445
+ # but this will give an error if the kernel is invalid.
446
+ knl.num_args # noqa: B018
447
+
448
+ if self._build_duration_info is not None:
449
+ build_descr, _was_cached, duration = self._build_duration_info
450
+ if duration > 0.2:
451
+ logger.info(
452
+ "build program: kernel '%s' was part of a "
453
+ "lengthy %s (%.2f s)", attr, build_descr, duration)
454
+
455
+ # don't whine about build times more than once.
456
+ self._build_duration_info = None
457
+
458
+ return knl
459
+ except LogicError as err:
460
+ raise AttributeError("'%s' was not found as a program "
461
+ "info attribute or as a kernel name" % attr) from err
462
+
463
+ # {{{ build
464
+
465
+ @classmethod
466
+ def _process_build_options(cls, context, options, _add_include_path=False):
467
+ if options is None:
468
+ options = []
469
+ if isinstance(options, tuple):
470
+ options = list(options)
471
+
472
+ options = _split_options_if_necessary(options)
473
+
474
+ options = (options
475
+ + _DEFAULT_BUILD_OPTIONS
476
+ + _DEFAULT_INCLUDE_OPTIONS
477
+ + _PLAT_BUILD_OPTIONS.get(
478
+ context.devices[0].platform.name, []))
479
+
480
+ forced_options = os.environ.get("PYOPENCL_BUILD_OPTIONS")
481
+ if forced_options:
482
+ options = options + forced_options.split()
483
+
484
+ return (
485
+ _options_to_bytestring(options),
486
+ _find_include_path(options))
487
+
488
+ def build(self, options=None, devices=None, cache_dir=None):
489
+ options_bytes, include_path = self._process_build_options(
490
+ self._context, options)
491
+
492
+ if cache_dir is None:
493
+ cache_dir = getattr(self._context, "cache_dir", None)
494
+
495
+ build_descr = None
496
+ from pyopencl.characterize import has_src_build_cache
497
+
498
+ if (
499
+ (_PYOPENCL_NO_CACHE or has_src_build_cache(self._context.devices[0]))
500
+ and self._prg is None):
501
+ if _PYOPENCL_NO_CACHE:
502
+ build_descr = "uncached source build (cache disabled by user)"
503
+ else:
504
+ build_descr = "uncached source build (assuming cached by ICD)"
505
+
506
+ self._prg = _cl._Program(self._context, self._source)
507
+
508
+ from time import time
509
+ start_time = time()
510
+ was_cached = False
511
+
512
+ if self._prg is not None:
513
+ # uncached
514
+
515
+ if build_descr is None:
516
+ build_descr = "uncached source build"
517
+
518
+ self._build_and_catch_errors(
519
+ lambda: self._prg.build(options_bytes, devices),
520
+ options_bytes=options_bytes)
521
+
522
+ else:
523
+ # cached
524
+
525
+ from pyopencl.cache import create_built_program_from_source_cached
526
+ self._prg, was_cached = self._build_and_catch_errors(
527
+ lambda: create_built_program_from_source_cached(
528
+ self._context, self._source, options_bytes, devices,
529
+ cache_dir=cache_dir, include_path=include_path),
530
+ options_bytes=options_bytes, source=self._source)
531
+
532
+ if was_cached:
533
+ build_descr = "cache retrieval"
534
+ else:
535
+ build_descr = "source build resulting from a binary cache miss"
536
+
537
+ del self._context
538
+
539
+ end_time = time()
540
+
541
+ self._build_duration_info = (build_descr, was_cached, end_time-start_time)
542
+
543
+ return self
544
+
545
+ def _build_and_catch_errors(self, build_func, options_bytes, source=None):
546
+ try:
547
+ return build_func()
548
+ except RuntimeError as e:
549
+ msg = str(e)
550
+ if options_bytes:
551
+ msg = msg + "\n(options: %s)" % options_bytes.decode("utf-8")
552
+
553
+ if source is not None:
554
+ from tempfile import NamedTemporaryFile
555
+ srcfile = NamedTemporaryFile(mode="wt", delete=False, suffix=".cl")
556
+ try:
557
+ srcfile.write(source)
558
+ finally:
559
+ srcfile.close()
560
+
561
+ msg = msg + "\n(source saved as %s)" % srcfile.name
562
+
563
+ code = e.code
564
+ routine = e.routine
565
+
566
+ err = RuntimeError(
567
+ _cl._ErrorRecord(
568
+ msg=msg,
569
+ code=code,
570
+ routine=routine))
571
+
572
+ # Python 3.2 outputs the whole list of currently active exceptions
573
+ # This serves to remove one (redundant) level from that nesting.
574
+ raise err
575
+
576
+ # }}}
577
+
578
+ def compile(self, options=None, devices=None, headers=None):
579
+ if headers is None:
580
+ headers = []
581
+
582
+ options_bytes, _ = self._process_build_options(self._context, options)
583
+
584
+ self._get_prg().compile(options_bytes, devices,
585
+ [(name, prg._get_prg()) for name, prg in headers])
586
+ return self
587
+
588
+ def __eq__(self, other):
589
+ return self._get_prg() == other._get_prg()
590
+
591
+ def __ne__(self, other):
592
+ return self._get_prg() == other._get_prg()
593
+
594
+ def __hash__(self):
595
+ return hash(self._get_prg())
596
+
597
+
598
+ def create_program_with_built_in_kernels(context, devices, kernel_names):
599
+ if not isinstance(kernel_names, str):
600
+ kernel_names = ":".join(kernel_names)
601
+
602
+ return Program(_Program.create_with_built_in_kernels(
603
+ context, devices, kernel_names))
604
+
605
+
606
+ def link_program(context, programs, options=None, devices=None):
607
+ if options is None:
608
+ options = []
609
+
610
+ options_bytes = _options_to_bytestring(_split_options_if_necessary(options))
611
+ programs = [prg._get_prg() for prg in programs]
612
+ raw_prg = _Program.link(context, programs, options_bytes, devices)
613
+ return Program(raw_prg)
614
+
615
+ # }}}
616
+
617
+
618
+ # {{{ monkeypatch C++ wrappers to add functionality
619
+
620
+ def _add_functionality():
621
+ def generic_get_cl_version(self):
622
+ import re
623
+ version_string = self.version
624
+ match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string)
625
+ if match is None:
626
+ raise RuntimeError("%s %s returned non-conformant "
627
+ "platform version string '%s'" %
628
+ (type(self).__name__, self, version_string))
629
+
630
+ return int(match.group(1)), int(match.group(2))
631
+
632
+ # {{{ Platform
633
+
634
+ def platform_repr(self):
635
+ return f"<pyopencl.Platform '{self.name}' at 0x{self.int_ptr:x}>"
636
+
637
+ Platform.__repr__ = platform_repr
638
+ Platform._get_cl_version = generic_get_cl_version
639
+
640
+ # }}}
641
+
642
+ # {{{ Device
643
+
644
+ def device_repr(self):
645
+ return "<pyopencl.Device '{}' on '{}' at 0x{:x}>".format(
646
+ self.name.strip(), self.platform.name.strip(), self.int_ptr)
647
+
648
+ def device_hashable_model_and_version_identifier(self):
649
+ return ("v1", self.vendor, self.vendor_id, self.name, self.version)
650
+
651
+ def device_persistent_unique_id(self):
652
+ warn("Device.persistent_unique_id is deprecated. "
653
+ "Use Device.hashable_model_and_version_identifier instead.",
654
+ DeprecationWarning, stacklevel=2)
655
+ return device_hashable_model_and_version_identifier(self)
656
+
657
+ Device.__repr__ = device_repr
658
+
659
+ # undocumented for now:
660
+ Device._get_cl_version = generic_get_cl_version
661
+ Device.hashable_model_and_version_identifier = property(
662
+ device_hashable_model_and_version_identifier)
663
+ Device.persistent_unique_id = property(device_persistent_unique_id)
664
+
665
+ # }}}
666
+
667
+ # {{{ Context
668
+
669
+ def context_repr(self):
670
+ return "<pyopencl.Context at 0x{:x} on {}>".format(self.int_ptr,
671
+ ", ".join(repr(dev) for dev in self.devices))
672
+
673
+ def context_get_cl_version(self):
674
+ return self.devices[0].platform._get_cl_version()
675
+
676
+ Context.__repr__ = context_repr
677
+ from pytools import memoize_method
678
+ Context._get_cl_version = memoize_method(context_get_cl_version)
679
+
680
+ # }}}
681
+
682
+ # {{{ CommandQueue
683
+
684
+ def command_queue_enter(self):
685
+ return self
686
+
687
+ def command_queue_exit(self, exc_type, exc_val, exc_tb):
688
+ self.finish()
689
+ self._finalize()
690
+
691
+ def command_queue_get_cl_version(self):
692
+ return self.device._get_cl_version()
693
+
694
+ CommandQueue.__enter__ = command_queue_enter
695
+ CommandQueue.__exit__ = command_queue_exit
696
+ CommandQueue._get_cl_version = memoize_method(command_queue_get_cl_version)
697
+
698
+ # }}}
699
+
700
+ # {{{ _Program (the internal, non-caching version)
701
+
702
+ def program_get_build_logs(self):
703
+ build_logs = []
704
+ for dev in self.get_info(_cl.program_info.DEVICES):
705
+ try:
706
+ log = self.get_build_info(dev, program_build_info.LOG)
707
+ except Exception:
708
+ log = "<error retrieving log>"
709
+
710
+ build_logs.append((dev, log))
711
+
712
+ return build_logs
713
+
714
+ def program_build(self, options_bytes, devices=None):
715
+ err = None
716
+ try:
717
+ self._build(options=options_bytes, devices=devices)
718
+ except Error as e:
719
+ msg = str(e) + "\n\n" + (75*"="+"\n").join(
720
+ f"Build on {dev}:\n\n{log}"
721
+ for dev, log in self._get_build_logs())
722
+ code = e.code
723
+ routine = e.routine
724
+
725
+ err = _cl.RuntimeError(
726
+ _cl._ErrorRecord(
727
+ msg=msg,
728
+ code=code,
729
+ routine=routine))
730
+
731
+ if err is not None:
732
+ # Python 3.2 outputs the whole list of currently active exceptions
733
+ # This serves to remove one (redundant) level from that nesting.
734
+ raise err
735
+
736
+ message = (75*"="+"\n").join(
737
+ f"Build on {dev} succeeded, but said:\n\n{log}"
738
+ for dev, log in self._get_build_logs()
739
+ if log is not None and log.strip())
740
+
741
+ if message:
742
+ if self.kind() == program_kind.SOURCE:
743
+ build_type = "From-source build"
744
+ elif self.kind() == program_kind.BINARY:
745
+ build_type = "From-binary build"
746
+ elif self.kind() == program_kind.IL:
747
+ build_type = "From-IL build"
748
+ else:
749
+ build_type = "Build"
750
+
751
+ compiler_output("%s succeeded, but resulted in non-empty logs:\n%s"
752
+ % (build_type, message))
753
+
754
+ return self
755
+
756
+ _cl._Program._get_build_logs = program_get_build_logs
757
+ _cl._Program.build = program_build
758
+
759
+ # }}}
760
+
761
+ # {{{ Event
762
+ class ProfilingInfoGetter:
763
+ def __init__(self, event):
764
+ self.event = event
765
+
766
+ def __getattr__(self, name):
767
+ info_cls = _cl.profiling_info
768
+
769
+ try:
770
+ inf_attr = getattr(info_cls, name.upper())
771
+ except AttributeError as err:
772
+ raise AttributeError("%s has no attribute '%s'"
773
+ % (type(self), name)) from err
774
+ else:
775
+ return self.event.get_profiling_info(inf_attr)
776
+
777
+ _cl.Event.profile = property(ProfilingInfoGetter)
778
+
779
+ # }}}
780
+
781
+ # {{{ Kernel
782
+
783
+ kernel_old_get_info = Kernel.get_info
784
+ kernel_old_get_work_group_info = Kernel.get_work_group_info
785
+
786
+ def kernel_set_arg_types(self, arg_types):
787
+ arg_types = tuple(arg_types)
788
+
789
+ # {{{ arg counting bug handling
790
+
791
+ # For example:
792
+ # https://github.com/pocl/pocl/issues/197
793
+ # (but Apple CPU has a similar bug)
794
+
795
+ work_around_arg_count_bug = False
796
+ warn_about_arg_count_bug = False
797
+
798
+ from pyopencl.characterize import has_struct_arg_count_bug
799
+
800
+ count_bug_per_dev = [
801
+ has_struct_arg_count_bug(dev, self.context)
802
+ for dev in self.context.devices]
803
+
804
+ from pytools import single_valued
805
+ if any(count_bug_per_dev):
806
+ if all(count_bug_per_dev):
807
+ work_around_arg_count_bug = single_valued(count_bug_per_dev)
808
+ else:
809
+ warn_about_arg_count_bug = True
810
+
811
+ # }}}
812
+
813
+ from pyopencl.invoker import generate_enqueue_and_set_args
814
+ self._set_enqueue_and_set_args(
815
+ *generate_enqueue_and_set_args(
816
+ self.function_name,
817
+ len(arg_types), self.num_args,
818
+ arg_types,
819
+ warn_about_arg_count_bug=warn_about_arg_count_bug,
820
+ work_around_arg_count_bug=work_around_arg_count_bug,
821
+ devs=self.context.devices))
822
+
823
+ def kernel_get_work_group_info(self, param, device):
824
+ try:
825
+ wg_info_cache = self._wg_info_cache
826
+ except AttributeError:
827
+ wg_info_cache = self._wg_info_cache = {}
828
+
829
+ cache_key = (param, device.int_ptr)
830
+ try:
831
+ return wg_info_cache[cache_key]
832
+ except KeyError:
833
+ pass
834
+
835
+ result = kernel_old_get_work_group_info(self, param, device)
836
+ wg_info_cache[cache_key] = result
837
+ return result
838
+
839
+ def kernel_capture_call(self, output_file, queue, global_size, local_size,
840
+ *args, **kwargs):
841
+ from pyopencl.capture_call import capture_kernel_call
842
+ capture_kernel_call(self, output_file, queue, global_size, local_size,
843
+ *args, **kwargs)
844
+
845
+ def kernel_get_info(self, param_name):
846
+ val = kernel_old_get_info(self, param_name)
847
+
848
+ if isinstance(val, _Program):
849
+ return Program(val)
850
+ else:
851
+ return val
852
+
853
+ Kernel.get_work_group_info = kernel_get_work_group_info
854
+
855
+ # FIXME: Possibly deprecate this version
856
+ Kernel.set_scalar_arg_dtypes = kernel_set_arg_types
857
+ Kernel.set_arg_types = kernel_set_arg_types
858
+
859
+ Kernel.capture_call = kernel_capture_call
860
+ Kernel.get_info = kernel_get_info
861
+
862
+ # }}}
863
+
864
+ # {{{ ImageFormat
865
+
866
+ def image_format_repr(self):
867
+ return "ImageFormat({}, {})".format(
868
+ channel_order.to_string(self.channel_order,
869
+ "<unknown channel order 0x%x>"),
870
+ channel_type.to_string(self.channel_data_type,
871
+ "<unknown channel data type 0x%x>"))
872
+
873
+ def image_format_eq(self, other):
874
+ return (self.channel_order == other.channel_order
875
+ and self.channel_data_type == other.channel_data_type)
876
+
877
+ def image_format_ne(self, other):
878
+ return not image_format_eq(self, other)
879
+
880
+ def image_format_hash(self):
881
+ return hash((type(self), self.channel_order, self.channel_data_type))
882
+
883
+ ImageFormat.__repr__ = image_format_repr
884
+ ImageFormat.__eq__ = image_format_eq
885
+ ImageFormat.__ne__ = image_format_ne
886
+ ImageFormat.__hash__ = image_format_hash
887
+
888
+ # }}}
889
+
890
+ # {{{ Image
891
+
892
+ def image_init(
893
+ self, context, flags, format, shape=None, pitches=None,
894
+ hostbuf=None, is_array=False, buffer=None, *,
895
+ desc: ImageDescriptor | None = None,
896
+ _through_create_image: bool = False,
897
+ ) -> None:
898
+ if hostbuf is not None and not \
899
+ (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)):
900
+ warn("'hostbuf' was passed, but no memory flags to make use of it.",
901
+ stacklevel=2)
902
+
903
+ if desc is not None:
904
+ if shape is not None:
905
+ raise TypeError("shape may not be passed when using descriptor")
906
+ if pitches is not None:
907
+ raise TypeError("pitches may not be passed when using descriptor")
908
+ if is_array:
909
+ raise TypeError("is_array may not be passed when using descriptor")
910
+ if buffer is not None:
911
+ raise TypeError("is_array may not be passed when using descriptor")
912
+
913
+ Image._custom_init(self, context, flags, format, desc, hostbuf)
914
+
915
+ return
916
+
917
+ if shape is None and hostbuf is None:
918
+ raise Error("'shape' must be passed if 'hostbuf' is not given")
919
+
920
+ if shape is None and hostbuf is not None:
921
+ shape = hostbuf.shape
922
+
923
+ if hostbuf is None and pitches is not None:
924
+ raise Error("'pitches' may only be given if 'hostbuf' is given")
925
+
926
+ if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
927
+ if not _through_create_image:
928
+ warn("Non-descriptor Image constructor called. "
929
+ "This will stop working in 2026. "
930
+ "Use create_image instead (with the same arguments).",
931
+ DeprecationWarning, stacklevel=2)
932
+
933
+ if buffer is not None and is_array:
934
+ raise ValueError(
935
+ "'buffer' and 'is_array' are mutually exclusive")
936
+
937
+ if len(shape) == 3:
938
+ if buffer is not None:
939
+ raise TypeError(
940
+ "'buffer' argument is not supported for 3D arrays")
941
+ elif is_array:
942
+ image_type = mem_object_type.IMAGE2D_ARRAY
943
+ else:
944
+ image_type = mem_object_type.IMAGE3D
945
+
946
+ elif len(shape) == 2:
947
+ if buffer is not None:
948
+ raise TypeError(
949
+ "'buffer' argument is not supported for 2D arrays")
950
+ elif is_array:
951
+ image_type = mem_object_type.IMAGE1D_ARRAY
952
+ else:
953
+ image_type = mem_object_type.IMAGE2D
954
+
955
+ elif len(shape) == 1:
956
+ if buffer is not None:
957
+ image_type = mem_object_type.IMAGE1D_BUFFER
958
+ elif is_array:
959
+ raise TypeError("array of zero-dimensional images not supported")
960
+ else:
961
+ image_type = mem_object_type.IMAGE1D
962
+
963
+ else:
964
+ raise ValueError("images cannot have more than three dimensions")
965
+
966
+ desc = ImageDescriptor() \
967
+ # pylint: disable=possibly-used-before-assignment
968
+
969
+ desc.image_type = image_type
970
+ desc.shape = shape # also sets desc.array_size
971
+
972
+ if pitches is None:
973
+ desc.pitches = (0, 0)
974
+ else:
975
+ desc.pitches = pitches
976
+
977
+ desc.num_mip_levels = 0 # per CL 1.2 spec
978
+ desc.num_samples = 0 # per CL 1.2 spec
979
+ desc.buffer = buffer
980
+
981
+ Image._custom_init(self, context, flags, format, desc, hostbuf)
982
+ else:
983
+ # legacy init for CL 1.1 and older
984
+ if is_array:
985
+ raise TypeError("'is_array=True' is not supported for CL < 1.2")
986
+ # if num_mip_levels is not None:
987
+ # raise TypeError(
988
+ # "'num_mip_levels' argument is not supported for CL < 1.2")
989
+ # if num_samples is not None:
990
+ # raise TypeError(
991
+ # "'num_samples' argument is not supported for CL < 1.2")
992
+ if buffer is not None:
993
+ raise TypeError("'buffer' argument is not supported for CL < 1.2")
994
+
995
+ Image._custom_init(self, context, flags, format, shape,
996
+ pitches, hostbuf)
997
+
998
+ class _ImageInfoGetter:
999
+ def __init__(self, event):
1000
+ warn(
1001
+ "Image.image.attr is deprecated and will go away in 2021. "
1002
+ "Use Image.attr directly, instead.", stacklevel=2)
1003
+
1004
+ self.event = event
1005
+
1006
+ def __getattr__(self, name):
1007
+ try:
1008
+ inf_attr = getattr(_cl.image_info, name.upper())
1009
+ except AttributeError as err:
1010
+ raise AttributeError("%s has no attribute '%s'"
1011
+ % (type(self), name)) from err
1012
+ else:
1013
+ return self.event.get_image_info(inf_attr)
1014
+
1015
+ def image_shape(self):
1016
+ if self.type == mem_object_type.IMAGE2D:
1017
+ return (self.width, self.height)
1018
+ elif self.type == mem_object_type.IMAGE3D:
1019
+ return (self.width, self.height, self.depth)
1020
+ else:
1021
+ raise LogicError("only images have shapes")
1022
+
1023
+ Image.__init__ = image_init
1024
+ Image.image = property(_ImageInfoGetter)
1025
+ Image.shape = property(image_shape)
1026
+
1027
+ # }}}
1028
+
1029
+ # {{{ Error
1030
+
1031
+ def error_str(self):
1032
+ val = self.what
1033
+ try:
1034
+ val.routine # noqa: B018
1035
+ except AttributeError:
1036
+ return str(val)
1037
+ else:
1038
+ result = ""
1039
+ if val.code() != status_code.SUCCESS:
1040
+ result = status_code.to_string(
1041
+ val.code(), "<unknown error %d>")
1042
+ routine = val.routine()
1043
+ if routine:
1044
+ result = f"{routine} failed: {result}"
1045
+ what = val.what()
1046
+ if what:
1047
+ if result:
1048
+ result += " - "
1049
+ result += what
1050
+ return result
1051
+
1052
+ def error_code(self):
1053
+ return self.args[0].code()
1054
+
1055
+ def error_routine(self):
1056
+ return self.args[0].routine()
1057
+
1058
+ def error_what(self):
1059
+ return self.args[0]
1060
+
1061
+ Error.__str__ = error_str
1062
+ Error.code = property(error_code)
1063
+ Error.routine = property(error_routine)
1064
+ Error.what = property(error_what)
1065
+
1066
+ # }}}
1067
+
1068
+ # {{{ MemoryMap
1069
+
1070
+ def memory_map_enter(self):
1071
+ return self
1072
+
1073
+ def memory_map_exit(self, exc_type, exc_val, exc_tb):
1074
+ self.release()
1075
+
1076
+ MemoryMap.__doc__ = """
1077
+ This class may also be used as a context manager in a ``with`` statement.
1078
+ The memory corresponding to this object will be unmapped when
1079
+ this object is deleted or :meth:`release` is called.
1080
+
1081
+ .. automethod:: release
1082
+ """
1083
+ MemoryMap.__enter__ = memory_map_enter
1084
+ MemoryMap.__exit__ = memory_map_exit
1085
+
1086
+ # }}}
1087
+
1088
+ # {{{ SVMPointer
1089
+
1090
+ if get_cl_header_version() >= (2, 0):
1091
+ SVMPointer.__doc__ = """A base class for things that can be passed to
1092
+ functions that allow an SVM pointer, e.g. kernel enqueues and memory
1093
+ copies.
1094
+
1095
+ Objects of this type cannot currently be directly created or
1096
+ implemented in Python. To obtain objects implementing this type,
1097
+ consider its subtypes :class:`SVMAllocation` and :class:`SVM`.
1098
+
1099
+
1100
+ .. property:: svm_ptr
1101
+
1102
+ Gives the SVM pointer as an :class:`int`.
1103
+
1104
+ .. property:: size
1105
+
1106
+ An :class:`int` denoting the size in bytes, or *None*, if the size
1107
+ of the SVM pointed to is not known.
1108
+
1109
+ *Most* objects of this type (e.g. instances of
1110
+ :class:`SVMAllocation` and :class:`SVM` know their size, so that,
1111
+ for example :class:`enqueue_copy` will automatically copy an entire
1112
+ :class:`SVMAllocation` when a size is not explicitly specified.
1113
+
1114
+ .. automethod:: map
1115
+ .. automethod:: map_ro
1116
+ .. automethod:: map_rw
1117
+ .. automethod:: as_buffer
1118
+ .. property:: buf
1119
+
1120
+ An opaque object implementing the :c:func:`Python buffer protocol
1121
+ <PyObject_GetBuffer>`. It exposes the pointed-to memory as
1122
+ a one-dimensional buffer of bytes, with the size matching
1123
+ :attr:`size`.
1124
+
1125
+ No guarantee is provided that two references to this attribute
1126
+ result in the same object.
1127
+ """
1128
+
1129
+ def svmptr_map(self, queue: CommandQueue, *, flags: int, is_blocking: bool =
1130
+ True, wait_for: Sequence[Event] | None = None,
1131
+ size: Event | None = None) -> SVMMap:
1132
+ """
1133
+ :arg is_blocking: If *False*, subsequent code must wait on
1134
+ :attr:`SVMMap.event` in the returned object before accessing the
1135
+ mapped memory.
1136
+ :arg flags: a combination of :class:`pyopencl.map_flags`.
1137
+ :arg size: The size of the map in bytes. If not provided, defaults to
1138
+ :attr:`size`.
1139
+
1140
+ |std-enqueue-blurb|
1141
+ """
1142
+ return SVMMap(self,
1143
+ np.asarray(self.buf),
1144
+ queue,
1145
+ _cl._enqueue_svm_map(queue, is_blocking, flags, self, wait_for,
1146
+ size=size))
1147
+
1148
+ def svmptr_map_ro(self, queue: CommandQueue, *, is_blocking: bool = True,
1149
+ wait_for: Sequence[Event] | None = None,
1150
+ size: int | None = None) -> SVMMap:
1151
+ """Like :meth:`map`, but with *flags* set for a read-only map.
1152
+ """
1153
+
1154
+ return self.map(queue, flags=map_flags.READ,
1155
+ is_blocking=is_blocking, wait_for=wait_for, size=size)
1156
+
1157
+ def svmptr_map_rw(self, queue: CommandQueue, *, is_blocking: bool = True,
1158
+ wait_for: Sequence[Event] | None = None,
1159
+ size: int | None = None) -> SVMMap:
1160
+ """Like :meth:`map`, but with *flags* set for a read-only map.
1161
+ """
1162
+
1163
+ return self.map(queue, flags=map_flags.READ | map_flags.WRITE,
1164
+ is_blocking=is_blocking, wait_for=wait_for, size=size)
1165
+
1166
+ def svmptr__enqueue_unmap(self, queue, wait_for=None):
1167
+ return _cl._enqueue_svm_unmap(queue, self, wait_for)
1168
+
1169
+ def svmptr_as_buffer(self, ctx: Context, *, flags: int | None = None,
1170
+ size: int | None = None) -> Buffer:
1171
+ """
1172
+ :arg ctx: a :class:`Context`
1173
+ :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to
1174
+ read-write.
1175
+ :arg size: The size of the map in bytes. If not provided, defaults to
1176
+ :attr:`size`.
1177
+ :returns: a :class:`Buffer` corresponding to *self*.
1178
+
1179
+ The memory referred to by this object must not be freed before
1180
+ the returned :class:`Buffer` is released.
1181
+ """
1182
+
1183
+ if flags is None:
1184
+ flags = mem_flags.READ_WRITE | mem_flags.USE_HOST_PTR
1185
+
1186
+ if size is None:
1187
+ size = self.size
1188
+
1189
+ return Buffer(ctx, flags, size=size, hostbuf=self.buf)
1190
+
1191
+ if get_cl_header_version() >= (2, 0):
1192
+ SVMPointer.map = svmptr_map
1193
+ SVMPointer.map_ro = svmptr_map_ro
1194
+ SVMPointer.map_rw = svmptr_map_rw
1195
+ SVMPointer._enqueue_unmap = svmptr__enqueue_unmap
1196
+ SVMPointer.as_buffer = svmptr_as_buffer
1197
+
1198
+ # }}}
1199
+
1200
+ # {{{ SVMAllocation
1201
+
1202
+ if get_cl_header_version() >= (2, 0):
1203
+ SVMAllocation.__doc__ = """
1204
+ Is a :class:`SVMPointer`.
1205
+
1206
+ .. versionadded:: 2016.2
1207
+
1208
+ .. automethod:: __init__
1209
+
1210
+ :arg flags: See :class:`svm_mem_flags`.
1211
+ :arg queue: If not specified, the allocation will be freed
1212
+ eagerly, irrespective of whether pending/enqueued operations
1213
+ are still using this memory.
1214
+
1215
+ If specified, deallocation of the memory will be enqueued
1216
+ with the given queue, and will only be performed
1217
+ after previously-enqueue operations in the queue have
1218
+ completed.
1219
+
1220
+ It is an error to specify an out-of-order queue.
1221
+
1222
+ .. warning::
1223
+
1224
+ Not specifying a queue will typically lead to undesired
1225
+ behavior, including crashes and memory corruption.
1226
+ See the warning in :ref:`svm`.
1227
+
1228
+ .. automethod:: enqueue_release
1229
+
1230
+ Enqueue the release of this allocation into *queue*.
1231
+ If *queue* is not specified, enqueue the deallocation
1232
+ into the queue provided at allocation time or via
1233
+ :class:`bind_to_queue`.
1234
+
1235
+ .. automethod:: bind_to_queue
1236
+
1237
+ Change the queue used for implicit enqueue of deallocation
1238
+ to *queue*. Sufficient synchronization is ensured by
1239
+ enqueuing a marker into the old queue and waiting on this
1240
+ marker in the new queue.
1241
+
1242
+ .. automethod:: unbind_from_queue
1243
+
1244
+ Configure the allocation to no longer implicitly enqueue
1245
+ memory allocation. If such a queue was previously provided,
1246
+ :meth:`~CommandQueue.finish` is automatically called on it.
1247
+ """
1248
+
1249
+ # }}}
1250
+
1251
+ # {{{ SVM
1252
+
1253
+ if get_cl_header_version() >= (2, 0):
1254
+ SVM.__doc__ = """Tags an object exhibiting the Python buffer interface
1255
+ (such as a :class:`numpy.ndarray`) as referring to shared virtual
1256
+ memory.
1257
+
1258
+ Is a :class:`SVMPointer`, hence objects of this type may be passed
1259
+ to kernel calls and :func:`enqueue_copy`, and all methods declared
1260
+ there are also available there. Note that :meth:`map` differs
1261
+ slightly from :meth:`SVMPointer.map`.
1262
+
1263
+ Depending on the features of the OpenCL implementation, the following
1264
+ types of objects may be passed to/wrapped in this type:
1265
+
1266
+ * fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`,
1267
+ if the implementation supports fine-grained shared virtual memory.
1268
+ This memory may directly be passed to a kernel::
1269
+
1270
+ ary = cl.fsvm_empty(ctx, 1000, np.float32)
1271
+ assert isinstance(ary, np.ndarray)
1272
+
1273
+ prg.twice(queue, ary.shape, None, cl.SVM(ary))
1274
+ queue.finish() # synchronize
1275
+ print(ary) # access from host
1276
+
1277
+ Observe how mapping (as needed in coarse-grain SVM) is no longer
1278
+ necessary.
1279
+
1280
+ * any :class:`numpy.ndarray` (or other Python object with a buffer
1281
+ interface) if the implementation supports fine-grained *system*
1282
+ shared virtual memory.
1283
+
1284
+ This is how plain :mod:`numpy` arrays may directly be passed to a
1285
+ kernel::
1286
+
1287
+ ary = np.zeros(1000, np.float32)
1288
+ prg.twice(queue, ary.shape, None, cl.SVM(ary))
1289
+ queue.finish() # synchronize
1290
+ print(ary) # access from host
1291
+
1292
+ * coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty`
1293
+ for any implementation of OpenCL 2.0.
1294
+
1295
+ .. note::
1296
+
1297
+ Applications making use of coarse-grain SVM may be better
1298
+ served by opaque-style SVM. See :ref:`opaque-svm`.
1299
+
1300
+ This is how coarse-grain SVM may be used from both host and device::
1301
+
1302
+ svm_ary = cl.SVM(
1303
+ cl.csvm_empty(ctx, 1000, np.float32, alignment=64))
1304
+ assert isinstance(svm_ary.mem, np.ndarray)
1305
+
1306
+ with svm_ary.map_rw(queue) as ary:
1307
+ ary.fill(17) # use from host
1308
+
1309
+ prg.twice(queue, svm_ary.mem.shape, None, svm_ary)
1310
+
1311
+ Coarse-grain shared-memory *must* be mapped into host address space
1312
+ using :meth:`~SVMPointer.map` before being accessed through the
1313
+ :mod:`numpy` interface.
1314
+
1315
+ .. note::
1316
+
1317
+ This object merely serves as a 'tag' that changes the behavior
1318
+ of functions to which it is passed. It has no special management
1319
+ relationship to the memory it tags. For example, it is permissible
1320
+ to grab a :class:`numpy.ndarray` out of :attr:`SVM.mem` of one
1321
+ :class:`SVM` instance and use the array to construct another.
1322
+ Neither of the tags need to be kept alive.
1323
+
1324
+ .. versionadded:: 2016.2
1325
+
1326
+ .. attribute:: mem
1327
+
1328
+ The wrapped object.
1329
+
1330
+ .. automethod:: __init__
1331
+ .. automethod:: map
1332
+ .. automethod:: map_ro
1333
+ .. automethod:: map_rw
1334
+ """
1335
+
1336
+ # }}}
1337
+
1338
+ def svm_map(self, queue, flags, is_blocking=True, wait_for=None):
1339
+ """
1340
+ :arg is_blocking: If *False*, subsequent code must wait on
1341
+ :attr:`SVMMap.event` in the returned object before accessing the
1342
+ mapped memory.
1343
+ :arg flags: a combination of :class:`pyopencl.map_flags`.
1344
+ :returns: an :class:`SVMMap` instance
1345
+
1346
+ This differs from the inherited :class:`SVMPointer.map` in that no size
1347
+ can be specified, and that :attr:`mem` is the exact array produced
1348
+ when the :class:`SVMMap` is used as a context manager.
1349
+
1350
+ |std-enqueue-blurb|
1351
+ """
1352
+ return SVMMap(
1353
+ self,
1354
+ self.mem,
1355
+ queue,
1356
+ _cl._enqueue_svm_map(queue, is_blocking, flags, self, wait_for))
1357
+
1358
+ def svm_map_ro(self, queue, is_blocking=True, wait_for=None):
1359
+ """Like :meth:`map`, but with *flags* set for a read-only map."""
1360
+
1361
+ return self.map(queue, map_flags.READ,
1362
+ is_blocking=is_blocking, wait_for=wait_for)
1363
+
1364
+ def svm_map_rw(self, queue, is_blocking=True, wait_for=None):
1365
+ """Like :meth:`map`, but with *flags* set for a read-only map."""
1366
+
1367
+ return self.map(queue, map_flags.READ | map_flags.WRITE,
1368
+ is_blocking=is_blocking, wait_for=wait_for)
1369
+
1370
+ def svm__enqueue_unmap(self, queue, wait_for=None):
1371
+ return _cl._enqueue_svm_unmap(queue, self, wait_for)
1372
+
1373
+ if get_cl_header_version() >= (2, 0):
1374
+ SVM.map = svm_map
1375
+ SVM.map_ro = svm_map_ro
1376
+ SVM.map_rw = svm_map_rw
1377
+ SVM._enqueue_unmap = svm__enqueue_unmap
1378
+
1379
+ # }}}
1380
+
1381
+ # ORDER DEPENDENCY: Some of the above may override get_info, the effect needs
1382
+ # to be visible through the attributes. So get_info attr creation needs to happen
1383
+ # after the overriding is complete.
1384
+ cls_to_info_cls = {
1385
+ _cl.Platform: (_cl.Platform.get_info, _cl.platform_info, []),
1386
+ _cl.Device: (_cl.Device.get_info, _cl.device_info,
1387
+ ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]),
1388
+ _cl.Context: (_cl.Context.get_info, _cl.context_info, []),
1389
+ _cl.CommandQueue: (_cl.CommandQueue.get_info, _cl.command_queue_info,
1390
+ ["CONTEXT", "DEVICE"]),
1391
+ _cl.Event: (_cl.Event.get_info, _cl.event_info, []),
1392
+ _cl.MemoryObjectHolder:
1393
+ (MemoryObjectHolder.get_info, _cl.mem_info, []),
1394
+ Image: (_cl.Image.get_image_info, _cl.image_info, []),
1395
+ Pipe: (_cl.Pipe.get_pipe_info, _cl.pipe_info, []),
1396
+ Program: (Program.get_info, _cl.program_info, []),
1397
+ Kernel: (Kernel.get_info, _cl.kernel_info, []),
1398
+ _cl.Sampler: (Sampler.get_info, _cl.sampler_info, []),
1399
+ }
1400
+
1401
+ def to_string(cls, value, default_format=None):
1402
+ if cls._is_bitfield:
1403
+ names = []
1404
+ for name in dir(cls):
1405
+ attr = getattr(cls, name)
1406
+ if not isinstance(attr, int):
1407
+ continue
1408
+ if attr == value or attr & value:
1409
+ names.append(name)
1410
+ if names:
1411
+ return " | ".join(names)
1412
+ else:
1413
+ for name in dir(cls):
1414
+ if (not name.startswith("_")
1415
+ and getattr(cls, name) == value):
1416
+ return name
1417
+
1418
+ if default_format is None:
1419
+ raise ValueError("a name for value %d was not found in %s"
1420
+ % (value, cls.__name__))
1421
+ else:
1422
+ return default_format % value
1423
+
1424
+ for cls in CONSTANT_CLASSES:
1425
+ cls._is_bitfield = cls in BITFIELD_CONSTANT_CLASSES
1426
+ cls.to_string = classmethod(to_string)
1427
+
1428
+ # {{{ get_info attributes -------------------------------------------------
1429
+
1430
+ def make_getinfo(info_method, info_name, info_attr):
1431
+ def result(self):
1432
+ return info_method(self, info_attr)
1433
+
1434
+ return property(result)
1435
+
1436
+ def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr):
1437
+ def result(self):
1438
+ try:
1439
+ return getattr(self, cache_attr)
1440
+ except AttributeError:
1441
+ pass
1442
+
1443
+ result = info_method(self, info_attr)
1444
+ setattr(self, cache_attr, result)
1445
+ return result
1446
+
1447
+ return property(result)
1448
+
1449
+ for cls, (info_method, info_class, cacheable_attrs) \
1450
+ in cls_to_info_cls.items():
1451
+ for info_name, _info_value in info_class.__dict__.items():
1452
+ if info_name == "to_string" or info_name.startswith("_"):
1453
+ continue
1454
+
1455
+ info_lower = info_name.lower()
1456
+ info_constant = getattr(info_class, info_name)
1457
+ if info_name in cacheable_attrs:
1458
+ cache_attr = intern("_info_cache_"+info_lower)
1459
+ setattr(cls, info_lower, make_cacheable_getinfo(
1460
+ info_method, info_lower, cache_attr, info_constant))
1461
+ else:
1462
+ setattr(cls, info_lower, make_getinfo(
1463
+ info_method, info_name, info_constant))
1464
+
1465
+ # }}}
1466
+
1467
+ if _cl.have_gl():
1468
+ def gl_object_get_gl_object(self):
1469
+ return self.get_gl_object_info()[1]
1470
+
1471
+ GLBuffer.gl_object = property(gl_object_get_gl_object)
1472
+ GLTexture.gl_object = property(gl_object_get_gl_object)
1473
+
1474
+
1475
+ _add_functionality()
1476
+
1477
+ # }}}
1478
+
1479
+
1480
+ # {{{ _OverriddenArrayInterfaceSVMAllocation
1481
+
1482
+ if get_cl_header_version() >= (2, 0):
1483
+ class _OverriddenArrayInterfaceSVMAllocation(SVMAllocation):
1484
+ def __init__(self, ctx, size, alignment, flags, *, _interface,
1485
+ queue=None):
1486
+ """
1487
+ :arg ctx: a :class:`Context`
1488
+ :arg flags: some of :class:`svm_mem_flags`.
1489
+ """
1490
+ super().__init__(ctx, size, alignment, flags, queue)
1491
+
1492
+ # mem_flags.READ_ONLY applies to kernels, not the host
1493
+ read_write = True
1494
+ _interface["data"] = (int(self.svm_ptr), not read_write)
1495
+
1496
+ self.__array_interface__ = _interface
1497
+
1498
+ # }}}
1499
+
1500
+
1501
+ # {{{ create_image
1502
+
1503
+ def create_image(context, flags, format, shape=None, pitches=None,
1504
+ hostbuf=None, is_array=False, buffer=None) -> Image:
1505
+ """
1506
+ See :class:`mem_flags` for values of *flags*.
1507
+ *shape* is a 2- or 3-tuple. *format* is an instance of :class:`ImageFormat`.
1508
+ *pitches* is a 1-tuple for 2D images and a 2-tuple for 3D images, indicating
1509
+ the distance in bytes from one scan line to the next, and from one 2D image
1510
+ slice to the next.
1511
+
1512
+ If *hostbuf* is given and *shape* is *None*, then *hostbuf.shape* is
1513
+ used as the *shape* parameter.
1514
+
1515
+ :class:`Image` inherits from :class:`MemoryObject`.
1516
+
1517
+ .. note::
1518
+
1519
+ If you want to load images from :class:`numpy.ndarray` instances or read images
1520
+ back into them, be aware that OpenCL images expect the *x* dimension to vary
1521
+ fastest, whereas in the default (C) order of :mod:`numpy` arrays, the last index
1522
+ varies fastest. If your array is arranged in the wrong order in memory,
1523
+ there are two possible fixes for this:
1524
+
1525
+ * Convert the array to Fortran (column-major) order using :func:`numpy.asarray`.
1526
+
1527
+ * Pass *ary.T.copy()* to the image creation function.
1528
+
1529
+ .. versionadded:: 2024.3
1530
+ """
1531
+
1532
+ return Image(context, flags, format, shape=shape, pitches=pitches,
1533
+ hostbuf=hostbuf, is_array=is_array, buffer=buffer,
1534
+ _through_create_image=True)
1535
+
1536
+ # }}}
1537
+
1538
+
1539
+ # {{{ create_some_context
1540
+
1541
+ def choose_devices(interactive: bool | None = None,
1542
+ answers: list[str] | None = None) -> list[Device]:
1543
+ """
1544
+ Choose :class:`Device` instances 'somehow'.
1545
+
1546
+ :arg interactive: If multiple choices for platform and/or device exist,
1547
+ *interactive* is ``True`` (or ``None`` and ``sys.stdin.isatty()``
1548
+ returns ``True``), then the user is queried about which device should be
1549
+ chosen. Otherwise, a device is chosen in an implementation-defined
1550
+ manner.
1551
+ :arg answers: A sequence of strings that will be used to answer the
1552
+ platform/device selection questions.
1553
+
1554
+ :returns: a list of :class:`Device` instances.
1555
+ """
1556
+
1557
+ if answers is None:
1558
+ if "PYOPENCL_CTX" in os.environ:
1559
+ ctx_spec = os.environ["PYOPENCL_CTX"]
1560
+ answers = ctx_spec.split(":")
1561
+
1562
+ if "PYOPENCL_TEST" in os.environ:
1563
+ from pyopencl.tools import get_test_platforms_and_devices
1564
+ for _plat, devs in get_test_platforms_and_devices():
1565
+ for dev in devs:
1566
+ return [dev]
1567
+
1568
+ if answers is not None:
1569
+ pre_provided_answers = answers
1570
+ answers = answers[:]
1571
+ else:
1572
+ pre_provided_answers = None
1573
+
1574
+ user_inputs = []
1575
+
1576
+ if interactive is None:
1577
+ interactive = True
1578
+ try:
1579
+ if not sys.stdin.isatty():
1580
+ interactive = False
1581
+ except Exception:
1582
+ interactive = False
1583
+
1584
+ def cc_print(s):
1585
+ if interactive:
1586
+ print(s)
1587
+
1588
+ def get_input(prompt):
1589
+ if answers:
1590
+ return str(answers.pop(0))
1591
+ elif not interactive:
1592
+ return ""
1593
+ else:
1594
+ user_input = input(prompt)
1595
+ user_inputs.append(user_input)
1596
+ return user_input
1597
+
1598
+ # {{{ pick a platform
1599
+
1600
+ platforms = get_platforms()
1601
+
1602
+ if not platforms:
1603
+ raise Error("no platforms found")
1604
+ else:
1605
+ if not answers:
1606
+ cc_print("Choose platform:")
1607
+ for i, pf in enumerate(platforms):
1608
+ cc_print("[%d] %s" % (i, pf))
1609
+
1610
+ answer = get_input("Choice [0]:")
1611
+ if not answer:
1612
+ platform = platforms[0]
1613
+ else:
1614
+ platform = None
1615
+ try:
1616
+ int_choice = int(answer)
1617
+ except ValueError:
1618
+ pass
1619
+ else:
1620
+ if 0 <= int_choice < len(platforms):
1621
+ platform = platforms[int_choice]
1622
+
1623
+ if platform is None:
1624
+ answer = answer.lower()
1625
+ for pf in platforms:
1626
+ if answer in pf.name.lower():
1627
+ platform = pf
1628
+ if platform is None:
1629
+ raise RuntimeError("input did not match any platform")
1630
+
1631
+ # }}}
1632
+
1633
+ # {{{ pick a device
1634
+
1635
+ devices = platform.get_devices()
1636
+
1637
+ def parse_device(choice):
1638
+ try:
1639
+ int_choice = int(choice)
1640
+ except ValueError:
1641
+ pass
1642
+ else:
1643
+ if 0 <= int_choice < len(devices):
1644
+ return devices[int_choice]
1645
+
1646
+ choice = choice.lower()
1647
+ for dev in devices:
1648
+ if choice in dev.name.lower():
1649
+ return dev
1650
+ raise RuntimeError("input did not match any device")
1651
+
1652
+ if not devices:
1653
+ raise Error("no devices found")
1654
+ elif len(devices) == 1 and not answers:
1655
+ cc_print(f"Choosing only available device: {devices[0]}")
1656
+ pass
1657
+ else:
1658
+ if not answers:
1659
+ cc_print("Choose device(s):")
1660
+ for i, dev in enumerate(devices):
1661
+ cc_print("[%d] %s" % (i, dev))
1662
+
1663
+ answer = get_input("Choice, comma-separated [0]:")
1664
+ if not answer:
1665
+ devices = [devices[0]]
1666
+ else:
1667
+ devices = [parse_device(i) for i in answer.split(",")]
1668
+
1669
+ # }}}
1670
+
1671
+ if user_inputs:
1672
+ if pre_provided_answers is not None:
1673
+ user_inputs = pre_provided_answers + user_inputs
1674
+ cc_print("Set the environment variable PYOPENCL_CTX='%s' to "
1675
+ "avoid being asked again." % ":".join(user_inputs))
1676
+
1677
+ if answers:
1678
+ raise RuntimeError("not all provided choices were used by "
1679
+ "choose_devices. (left over: '%s')" % ":".join(answers))
1680
+
1681
+ return devices
1682
+
1683
+
1684
+ def create_some_context(interactive: bool | None = None,
1685
+ answers: list[str] | None = None) -> Context:
1686
+ """
1687
+ Create a :class:`Context` 'somehow'.
1688
+
1689
+ :arg interactive: If multiple choices for platform and/or device exist,
1690
+ *interactive* is ``True`` (or ``None`` and ``sys.stdin.isatty()``
1691
+ returns ``True``), then the user is queried about which device should be
1692
+ chosen. Otherwise, a device is chosen in an implementation-defined
1693
+ manner.
1694
+ :arg answers: A sequence of strings that will be used to answer the
1695
+ platform/device selection questions.
1696
+
1697
+ :returns: an instance of :class:`Context`.
1698
+ """
1699
+ devices = choose_devices(interactive, answers)
1700
+
1701
+ return Context(devices)
1702
+
1703
+
1704
+ _csc = create_some_context
1705
+
1706
+ # }}}
1707
+
1708
+
1709
+ # {{{ SVMMap
1710
+
1711
+ class SVMMap:
1712
+ """
1713
+ Returned by :func:`SVMPointer.map` and :func:`SVM.map`.
1714
+ This class may also be used as a context manager in a ``with`` statement.
1715
+ :meth:`release` will be called upon exit from the ``with`` region.
1716
+ The value returned to the ``as`` part of the context manager is the
1717
+ mapped Python object (e.g. a :mod:`numpy` array).
1718
+
1719
+ .. versionadded:: 2016.2
1720
+
1721
+ .. property:: event
1722
+
1723
+ The :class:`Event` returned when mapping the memory.
1724
+
1725
+ .. automethod:: release
1726
+
1727
+ """
1728
+ def __init__(self, svm, array, queue, event):
1729
+ self.svm = svm
1730
+ self.array = array
1731
+ self.queue = queue
1732
+ self.event = event
1733
+
1734
+ def __del__(self):
1735
+ if self.svm is not None:
1736
+ self.release()
1737
+
1738
+ def __enter__(self):
1739
+ return self.array
1740
+
1741
+ def __exit__(self, exc_type, exc_val, exc_tb):
1742
+ self.release()
1743
+
1744
+ def release(self, queue=None, wait_for=None):
1745
+ """
1746
+ :arg queue: a :class:`pyopencl.CommandQueue`. Defaults to the one
1747
+ with which the map was created, if not specified.
1748
+ :returns: a :class:`pyopencl.Event`
1749
+
1750
+ |std-enqueue-blurb|
1751
+ """
1752
+
1753
+ evt = self.svm._enqueue_unmap(self.queue)
1754
+ self.svm = None
1755
+
1756
+ return evt
1757
+
1758
+ # }}}
1759
+
1760
+
1761
+ # {{{ enqueue_copy
1762
+
1763
+ _IMAGE_MEM_OBJ_TYPES = [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]
1764
+ if get_cl_header_version() >= (1, 2):
1765
+ _IMAGE_MEM_OBJ_TYPES.append(mem_object_type.IMAGE2D_ARRAY)
1766
+
1767
+
1768
+ def enqueue_copy(queue, dest, src, **kwargs):
1769
+ """Copy from :class:`Image`, :class:`Buffer` or the host to
1770
+ :class:`Image`, :class:`Buffer` or the host. (Note: host-to-host
1771
+ copies are unsupported.)
1772
+
1773
+ The following keyword arguments are available:
1774
+
1775
+ :arg wait_for: (optional, default empty)
1776
+ :arg is_blocking: Wait for completion. Defaults to *True*.
1777
+ (Available on any copy involving host memory)
1778
+ :return: A :class:`NannyEvent` if the transfer involved a
1779
+ host-side buffer, otherwise an :class:`Event`.
1780
+
1781
+ .. note::
1782
+
1783
+ Be aware that the deletion of the :class:`NannyEvent` that is
1784
+ returned by the function if the transfer involved a host-side buffer
1785
+ will block until the transfer is complete, so be sure to keep a
1786
+ reference to this :class:`Event` until the
1787
+ transfer has completed.
1788
+
1789
+ .. note::
1790
+
1791
+ Two types of 'buffer' occur in the arguments to this function,
1792
+ :class:`Buffer` and 'host-side buffers'. The latter are
1793
+ defined by Python and commonly called `buffer objects
1794
+ <https://docs.python.org/3/c-api/buffer.html>`__. :mod:`numpy`
1795
+ arrays are a very common example.
1796
+ Make sure to always be clear on whether a :class:`Buffer` or a
1797
+ Python buffer object is needed.
1798
+
1799
+ .. ------------------------------------------------------------------------
1800
+ .. rubric :: Transfer :class:`Buffer` ↔ host
1801
+ .. ------------------------------------------------------------------------
1802
+
1803
+ :arg src_offset: offset in bytes (optional)
1804
+
1805
+ May only be nonzero if applied on the device side.
1806
+
1807
+ :arg dst_offset: offset in bytes (optional)
1808
+
1809
+ May only be nonzero if applied on the device side.
1810
+
1811
+ .. note::
1812
+
1813
+ The size of the transfer is controlled by the size of the
1814
+ of the host-side buffer. If the host-side buffer
1815
+ is a :class:`numpy.ndarray`, you can control the transfer size by
1816
+ transferring into a smaller 'view' of the target array, like this::
1817
+
1818
+ cl.enqueue_copy(queue, large_dest_numpy_array[:15], src_buffer)
1819
+
1820
+ .. ------------------------------------------------------------------------
1821
+ .. rubric :: Transfer :class:`Buffer` ↔ :class:`Buffer`
1822
+ .. ------------------------------------------------------------------------
1823
+
1824
+ :arg byte_count: (optional) If not specified, defaults to the
1825
+ size of the source in versions 2012.x and earlier,
1826
+ and to the minimum of the size of the source and target
1827
+ from 2013.1 on.
1828
+ :arg src_offset: (optional)
1829
+ :arg dst_offset: (optional)
1830
+
1831
+ .. ------------------------------------------------------------------------
1832
+ .. rubric :: Rectangular :class:`Buffer` ↔ host transfers (CL 1.1 and newer)
1833
+ .. ------------------------------------------------------------------------
1834
+
1835
+ :arg buffer_origin: :class:`tuple` of :class:`int` of length
1836
+ three or shorter. (mandatory)
1837
+ :arg host_origin: :class:`tuple` of :class:`int` of length
1838
+ three or shorter. (mandatory)
1839
+ :arg region: :class:`tuple` of :class:`int` of length
1840
+ three or shorter. (mandatory)
1841
+ :arg buffer_pitches: :class:`tuple` of :class:`int` of length
1842
+ two or shorter. (optional, "tightly-packed" if unspecified)
1843
+ :arg host_pitches: :class:`tuple` of :class:`int` of length
1844
+ two or shorter. (optional, "tightly-packed" if unspecified)
1845
+
1846
+ .. ------------------------------------------------------------------------
1847
+ .. rubric :: Rectangular :class:`Buffer` ↔ :class:`Buffer`
1848
+ transfers (CL 1.1 and newer)
1849
+ .. ------------------------------------------------------------------------
1850
+
1851
+ :arg src_origin: :class:`tuple` of :class:`int` of length
1852
+ three or shorter. (mandatory)
1853
+ :arg dst_origin: :class:`tuple` of :class:`int` of length
1854
+ three or shorter. (mandatory)
1855
+ :arg region: :class:`tuple` of :class:`int` of length
1856
+ three or shorter. (mandatory)
1857
+ :arg src_pitches: :class:`tuple` of :class:`int` of length
1858
+ two or shorter. (optional, "tightly-packed" if unspecified)
1859
+ :arg dst_pitches: :class:`tuple` of :class:`int` of length
1860
+ two or shorter. (optional, "tightly-packed" if unspecified)
1861
+
1862
+ .. ------------------------------------------------------------------------
1863
+ .. rubric :: Transfer :class:`Image` ↔ host
1864
+ .. ------------------------------------------------------------------------
1865
+
1866
+ :arg origin: :class:`tuple` of :class:`int` of length
1867
+ three or shorter. (mandatory)
1868
+ :arg region: :class:`tuple` of :class:`int` of length
1869
+ three or shorter. (mandatory)
1870
+ :arg pitches: :class:`tuple` of :class:`int` of length
1871
+ two or shorter. (optional)
1872
+
1873
+ .. ------------------------------------------------------------------------
1874
+ .. rubric :: Transfer :class:`Buffer` ↔ :class:`Image`
1875
+ .. ------------------------------------------------------------------------
1876
+
1877
+ :arg offset: offset in buffer (mandatory)
1878
+ :arg origin: :class:`tuple` of :class:`int` of length
1879
+ three or shorter. (mandatory)
1880
+ :arg region: :class:`tuple` of :class:`int` of length
1881
+ three or shorter. (mandatory)
1882
+
1883
+ .. ------------------------------------------------------------------------
1884
+ .. rubric :: Transfer :class:`Image` ↔ :class:`Image`
1885
+ .. ------------------------------------------------------------------------
1886
+
1887
+ :arg src_origin: :class:`tuple` of :class:`int` of length
1888
+ three or shorter. (mandatory)
1889
+ :arg dest_origin: :class:`tuple` of :class:`int` of length
1890
+ three or shorter. (mandatory)
1891
+ :arg region: :class:`tuple` of :class:`int` of length
1892
+ three or shorter. (mandatory)
1893
+
1894
+ .. ------------------------------------------------------------------------
1895
+ .. rubric :: Transfer :class:`SVMPointer`/host ↔ :class:`SVMPointer`/host
1896
+ .. ------------------------------------------------------------------------
1897
+
1898
+ :arg byte_count: (optional) If not specified, defaults to the
1899
+ size of the source in versions 2012.x and earlier,
1900
+ and to the minimum of the size of the source and target
1901
+ from 2013.1 on.
1902
+
1903
+ |std-enqueue-blurb|
1904
+
1905
+ .. versionadded:: 2011.1
1906
+ """
1907
+
1908
+ if isinstance(dest, MemoryObjectHolder):
1909
+ if dest.type == mem_object_type.BUFFER:
1910
+ if isinstance(src, MemoryObjectHolder):
1911
+ if src.type == mem_object_type.BUFFER:
1912
+ # {{{ buffer -> buffer
1913
+
1914
+ if "src_origin" in kwargs:
1915
+ # rectangular
1916
+ return _cl._enqueue_copy_buffer_rect(
1917
+ queue, src, dest, **kwargs)
1918
+ else:
1919
+ # linear
1920
+ dest_offset = kwargs.pop("dest_offset", None)
1921
+ if dest_offset is not None:
1922
+ if "dst_offset" in kwargs:
1923
+ raise TypeError("may not specify both 'dst_offset' "
1924
+ "and 'dest_offset'")
1925
+
1926
+ warn("The 'dest_offset' argument of enqueue_copy "
1927
+ "is deprecated. Use 'dst_offset' instead. "
1928
+ "'dest_offset' will stop working in 2023.x.",
1929
+ DeprecationWarning, stacklevel=2)
1930
+
1931
+ kwargs["dst_offset"] = dest_offset
1932
+
1933
+ return _cl._enqueue_copy_buffer(queue, src, dest, **kwargs)
1934
+
1935
+ # }}}
1936
+ elif src.type in _IMAGE_MEM_OBJ_TYPES:
1937
+ return _cl._enqueue_copy_image_to_buffer(
1938
+ queue, src, dest, **kwargs)
1939
+ else:
1940
+ raise ValueError("invalid src mem object type")
1941
+ else:
1942
+ # {{{ host -> buffer
1943
+
1944
+ if "buffer_origin" in kwargs:
1945
+ return _cl._enqueue_write_buffer_rect(queue, dest, src, **kwargs)
1946
+ else:
1947
+ device_offset = kwargs.pop("device_offset", None)
1948
+ if device_offset is not None:
1949
+ if "dst_offset" in kwargs:
1950
+ raise TypeError("may not specify both 'device_offset' "
1951
+ "and 'dst_offset'")
1952
+
1953
+ warn("The 'device_offset' argument of enqueue_copy "
1954
+ "is deprecated. Use 'dst_offset' instead. "
1955
+ "'dst_offset' will stop working in 2023.x.",
1956
+ DeprecationWarning, stacklevel=2)
1957
+
1958
+ kwargs["dst_offset"] = device_offset
1959
+
1960
+ return _cl._enqueue_write_buffer(queue, dest, src, **kwargs)
1961
+
1962
+ # }}}
1963
+
1964
+ elif dest.type in _IMAGE_MEM_OBJ_TYPES:
1965
+ # {{{ ... -> image
1966
+
1967
+ if isinstance(src, MemoryObjectHolder):
1968
+ if src.type == mem_object_type.BUFFER:
1969
+ return _cl._enqueue_copy_buffer_to_image(
1970
+ queue, src, dest, **kwargs)
1971
+ elif src.type in _IMAGE_MEM_OBJ_TYPES:
1972
+ return _cl._enqueue_copy_image(queue, src, dest, **kwargs)
1973
+ else:
1974
+ raise ValueError("invalid src mem object type")
1975
+ else:
1976
+ # assume from-host
1977
+ origin = kwargs.pop("origin")
1978
+ region = kwargs.pop("region")
1979
+
1980
+ pitches = kwargs.pop("pitches", (0, 0))
1981
+ if len(pitches) == 1:
1982
+ kwargs["row_pitch"], = pitches
1983
+ else:
1984
+ kwargs["row_pitch"], kwargs["slice_pitch"] = pitches
1985
+
1986
+ return _cl._enqueue_write_image(
1987
+ queue, dest, origin, region, src, **kwargs)
1988
+
1989
+ # }}}
1990
+ else:
1991
+ raise ValueError("invalid dest mem object type")
1992
+
1993
+ elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVMPointer):
1994
+ # {{{ ... -> SVM
1995
+
1996
+ if not isinstance(src, SVMPointer):
1997
+ src = SVM(src)
1998
+
1999
+ is_blocking = kwargs.pop("is_blocking", True)
2000
+
2001
+ # These are NOT documented. They only support consistency with the
2002
+ # Buffer-based API for the sake of the Array.
2003
+ if kwargs.pop("src_offset", 0) != 0:
2004
+ raise ValueError("src_offset must be 0")
2005
+ if kwargs.pop("dst_offset", 0) != 0:
2006
+ raise ValueError("dst_offset must be 0")
2007
+
2008
+ return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs)
2009
+
2010
+ # }}}
2011
+
2012
+ else:
2013
+ # assume to-host
2014
+
2015
+ if isinstance(src, MemoryObjectHolder):
2016
+ if src.type == mem_object_type.BUFFER:
2017
+ if "buffer_origin" in kwargs:
2018
+ return _cl._enqueue_read_buffer_rect(queue, src, dest, **kwargs)
2019
+ else:
2020
+ device_offset = kwargs.pop("device_offset", None)
2021
+ if device_offset is not None:
2022
+ if "src_offset" in kwargs:
2023
+ raise TypeError("may not specify both 'device_offset' "
2024
+ "and 'src_offset'")
2025
+
2026
+ warn("The 'device_offset' argument of enqueue_copy "
2027
+ "is deprecated. Use 'src_offset' instead. "
2028
+ "'dst_offset' will stop working in 2023.x.",
2029
+ DeprecationWarning, stacklevel=2)
2030
+
2031
+ kwargs["src_offset"] = device_offset
2032
+
2033
+ return _cl._enqueue_read_buffer(queue, src, dest, **kwargs)
2034
+
2035
+ elif src.type in _IMAGE_MEM_OBJ_TYPES:
2036
+ origin = kwargs.pop("origin")
2037
+ region = kwargs.pop("region")
2038
+
2039
+ pitches = kwargs.pop("pitches", (0, 0))
2040
+ if len(pitches) == 1:
2041
+ kwargs["row_pitch"], = pitches
2042
+ else:
2043
+ kwargs["row_pitch"], kwargs["slice_pitch"] = pitches
2044
+
2045
+ return _cl._enqueue_read_image(
2046
+ queue, src, origin, region, dest, **kwargs)
2047
+ else:
2048
+ raise ValueError("invalid src mem object type")
2049
+ elif isinstance(src, SVMPointer):
2050
+ # {{{ svm -> host
2051
+
2052
+ # dest is not a SVM instance, otherwise we'd be in the branch above
2053
+
2054
+ # This is NOT documented. They only support consistency with the
2055
+ # Buffer-based API for the sake of the Array.
2056
+ if kwargs.pop("src_offset", 0) != 0:
2057
+ raise ValueError("src_offset must be 0")
2058
+
2059
+ is_blocking = kwargs.pop("is_blocking", True)
2060
+ return _cl._enqueue_svm_memcpy(
2061
+ queue, is_blocking, SVM(dest), src, **kwargs)
2062
+
2063
+ # }}}
2064
+ else:
2065
+ # assume from-host
2066
+ raise TypeError("enqueue_copy cannot perform host-to-host transfers")
2067
+
2068
+ # }}}
2069
+
2070
+
2071
+ # {{{ enqueue_fill
2072
+
2073
+ def enqueue_fill(queue: CommandQueue,
2074
+ dest: MemoryObject | SVMPointer,
2075
+ pattern: Any, size: int, *, offset: int = 0,
2076
+ wait_for: Sequence[Event] | None = None) -> Event:
2077
+ """
2078
+ .. versionadded:: 2022.2
2079
+ """
2080
+ if isinstance(dest, MemoryObjectHolder):
2081
+ return enqueue_fill_buffer(queue, dest, pattern, offset, size, wait_for)
2082
+ elif isinstance(dest, SVMPointer):
2083
+ if offset:
2084
+ raise NotImplementedError("enqueue_fill with SVM does not yet support "
2085
+ "offsets")
2086
+ return enqueue_svm_memfill(queue, dest, pattern, size, wait_for)
2087
+ else:
2088
+ raise TypeError(f"enqueue_fill does not know how to fill '{type(dest)}'")
2089
+
2090
+ # }}}
2091
+
2092
+
2093
+ # {{{ image creation
2094
+
2095
+ DTYPE_TO_CHANNEL_TYPE = {
2096
+ np.dtype(np.float32): channel_type.FLOAT,
2097
+ np.dtype(np.int16): channel_type.SIGNED_INT16,
2098
+ np.dtype(np.int32): channel_type.SIGNED_INT32,
2099
+ np.dtype(np.int8): channel_type.SIGNED_INT8,
2100
+ np.dtype(np.uint16): channel_type.UNSIGNED_INT16,
2101
+ np.dtype(np.uint32): channel_type.UNSIGNED_INT32,
2102
+ np.dtype(np.uint8): channel_type.UNSIGNED_INT8,
2103
+ }
2104
+ try:
2105
+ np.float16 # noqa: B018
2106
+ except Exception:
2107
+ pass
2108
+ else:
2109
+ DTYPE_TO_CHANNEL_TYPE[np.dtype(np.float16)] = channel_type.HALF_FLOAT
2110
+
2111
+ DTYPE_TO_CHANNEL_TYPE_NORM = {
2112
+ np.dtype(np.int16): channel_type.SNORM_INT16,
2113
+ np.dtype(np.int8): channel_type.SNORM_INT8,
2114
+ np.dtype(np.uint16): channel_type.UNORM_INT16,
2115
+ np.dtype(np.uint8): channel_type.UNORM_INT8,
2116
+ }
2117
+
2118
+
2119
+ def image_from_array(ctx, ary, num_channels=None, mode="r", norm_int=False):
2120
+ if not ary.flags.c_contiguous:
2121
+ raise ValueError("array must be C-contiguous")
2122
+
2123
+ dtype = ary.dtype
2124
+ if num_channels is None:
2125
+
2126
+ try:
2127
+ dtype, num_channels = \
2128
+ pyopencl.cltypes.vec_type_to_scalar_and_count[dtype]
2129
+ except KeyError:
2130
+ # It must be a scalar type then.
2131
+ num_channels = 1
2132
+
2133
+ shape = ary.shape
2134
+ strides = ary.strides
2135
+
2136
+ elif num_channels == 1:
2137
+ shape = ary.shape
2138
+ strides = ary.strides
2139
+ else:
2140
+ if ary.shape[-1] != num_channels:
2141
+ raise RuntimeError("last dimension must be equal to number of channels")
2142
+
2143
+ shape = ary.shape[:-1]
2144
+ strides = ary.strides[:-1]
2145
+
2146
+ if mode == "r":
2147
+ mode_flags = mem_flags.READ_ONLY
2148
+ elif mode == "w":
2149
+ mode_flags = mem_flags.WRITE_ONLY
2150
+ else:
2151
+ raise ValueError("invalid value '%s' for 'mode'" % mode)
2152
+
2153
+ img_format = {
2154
+ 1: channel_order.R,
2155
+ 2: channel_order.RG,
2156
+ 3: channel_order.RGB,
2157
+ 4: channel_order.RGBA,
2158
+ }[num_channels]
2159
+
2160
+ assert ary.strides[-1] == ary.dtype.itemsize
2161
+
2162
+ if norm_int:
2163
+ channel_type = DTYPE_TO_CHANNEL_TYPE_NORM[dtype]
2164
+ else:
2165
+ channel_type = DTYPE_TO_CHANNEL_TYPE[dtype]
2166
+
2167
+ return create_image(ctx, mode_flags | mem_flags.COPY_HOST_PTR,
2168
+ ImageFormat(img_format, channel_type),
2169
+ shape=shape[::-1], pitches=strides[::-1][1:],
2170
+ hostbuf=ary)
2171
+
2172
+ # }}}
2173
+
2174
+
2175
+ # {{{ enqueue_* compatibility shims
2176
+
2177
+ def enqueue_marker(queue, wait_for=None):
2178
+ if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
2179
+ return _cl._enqueue_marker_with_wait_list(queue, wait_for)
2180
+ else:
2181
+ if wait_for:
2182
+ _cl._enqueue_wait_for_events(queue, wait_for)
2183
+ return _cl._enqueue_marker(queue)
2184
+
2185
+
2186
+ def enqueue_barrier(queue, wait_for=None):
2187
+ if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
2188
+ return _cl._enqueue_barrier_with_wait_list(queue, wait_for)
2189
+ else:
2190
+ _cl._enqueue_barrier(queue)
2191
+ if wait_for:
2192
+ _cl._enqueue_wait_for_events(queue, wait_for)
2193
+ return _cl._enqueue_marker(queue)
2194
+
2195
+
2196
+ def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None):
2197
+ if not (queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2)):
2198
+ warn(
2199
+ "The context for this queue does not declare OpenCL 1.2 support, so "
2200
+ "the next thing you might see is a crash",
2201
+ stacklevel=2)
2202
+
2203
+ if _PYPY and isinstance(pattern, np.generic):
2204
+ pattern = np.asarray(pattern)
2205
+
2206
+ return _cl._enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for)
2207
+
2208
+ # }}}
2209
+
2210
+
2211
+ # {{{ numpy-like svm allocation
2212
+
2213
+ def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None):
2214
+ """Fill shared virtual memory with a pattern.
2215
+
2216
+ :arg dest: a Python buffer object, or any implementation of :class:`SVMPointer`.
2217
+ :arg pattern: a Python buffer object (e.g. a :class:`numpy.ndarray` with the
2218
+ fill pattern to be used.
2219
+ :arg byte_count: The size of the memory to be fill. Defaults to the
2220
+ entirety of *dest*.
2221
+
2222
+ |std-enqueue-blurb|
2223
+
2224
+ .. versionadded:: 2016.2
2225
+ """
2226
+
2227
+ if not isinstance(dest, SVMPointer):
2228
+ dest = SVM(dest)
2229
+
2230
+ return _cl._enqueue_svm_memfill(
2231
+ queue, dest, pattern, byte_count=byte_count, wait_for=wait_for)
2232
+
2233
+
2234
+ def enqueue_svm_migratemem(queue, svms, flags, wait_for=None):
2235
+ """
2236
+ :arg svms: a collection of Python buffer objects (e.g. :mod:`numpy`
2237
+ arrays), or any implementation of :class:`SVMPointer`.
2238
+ :arg flags: a combination of :class:`mem_migration_flags`
2239
+
2240
+ |std-enqueue-blurb|
2241
+
2242
+ .. versionadded:: 2016.2
2243
+
2244
+ This function requires OpenCL 2.1.
2245
+ """
2246
+
2247
+ return _cl._enqueue_svm_migratemem(queue, svms, flags, wait_for)
2248
+
2249
+
2250
+ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None, queue=None):
2251
+ """Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype*
2252
+ and *order*. (See :func:`numpy.empty` for the meaning of these arguments.)
2253
+ The array will be allocated in shared virtual memory belonging
2254
+ to *ctx*.
2255
+
2256
+ :arg ctx: a :class:`Context`
2257
+ :arg flags: a combination of flags from :class:`svm_mem_flags`.
2258
+ :arg alignment: the number of bytes to which the beginning of the memory
2259
+ is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*.
2260
+
2261
+ :returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute
2262
+ is a :class:`SVMAllocation`.
2263
+
2264
+ To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you
2265
+ will likely want to wrap the returned array in an :class:`SVM` tag.
2266
+
2267
+ .. versionadded:: 2016.2
2268
+
2269
+ .. versionchanged:: 2022.2
2270
+
2271
+ *queue* argument added.
2272
+ """
2273
+
2274
+ dtype = np.dtype(dtype)
2275
+
2276
+ try:
2277
+ s = 1
2278
+ for dim in shape:
2279
+ s *= dim
2280
+ except TypeError as err:
2281
+ admissible_types = (int, np.integer)
2282
+
2283
+ if not isinstance(shape, admissible_types):
2284
+ raise TypeError("shape must either be iterable or "
2285
+ "castable to an integer") from err
2286
+ s = shape
2287
+ shape = (shape,)
2288
+
2289
+ itemsize = dtype.itemsize
2290
+ nbytes = s * itemsize
2291
+
2292
+ from pyopencl.compyte.array import c_contiguous_strides, f_contiguous_strides
2293
+
2294
+ if order in "fF":
2295
+ strides = f_contiguous_strides(itemsize, shape)
2296
+ elif order in "cC":
2297
+ strides = c_contiguous_strides(itemsize, shape)
2298
+ else:
2299
+ raise ValueError("order not recognized: %s" % order)
2300
+
2301
+ descr = dtype.descr
2302
+
2303
+ interface = {
2304
+ "version": 3,
2305
+ "shape": shape,
2306
+ "strides": strides,
2307
+ }
2308
+
2309
+ if len(descr) == 1:
2310
+ interface["typestr"] = descr[0][1]
2311
+ else:
2312
+ interface["typestr"] = "V%d" % itemsize
2313
+ interface["descr"] = descr
2314
+
2315
+ if alignment is None:
2316
+ alignment = itemsize
2317
+
2318
+ svm_alloc = _OverriddenArrayInterfaceSVMAllocation(
2319
+ ctx, nbytes, alignment, flags, _interface=interface,
2320
+ queue=queue)
2321
+ return np.asarray(svm_alloc)
2322
+
2323
+
2324
+ def svm_empty_like(ctx, flags, ary, alignment=None):
2325
+ """Allocate an empty :class:`numpy.ndarray` like the existing
2326
+ :class:`numpy.ndarray` *ary*. The array will be allocated in shared
2327
+ virtual memory belonging to *ctx*.
2328
+
2329
+ :arg ctx: a :class:`Context`
2330
+ :arg flags: a combination of flags from :class:`svm_mem_flags`.
2331
+ :arg alignment: the number of bytes to which the beginning of the memory
2332
+ is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*.
2333
+
2334
+ :returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute
2335
+ is a :class:`SVMAllocation`.
2336
+
2337
+ To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you
2338
+ will likely want to wrap the returned array in an :class:`SVM` tag.
2339
+
2340
+ .. versionadded:: 2016.2
2341
+ """
2342
+ if ary.flags.c_contiguous:
2343
+ order = "C"
2344
+ elif ary.flags.f_contiguous:
2345
+ order = "F"
2346
+ else:
2347
+ raise ValueError("array is neither C- nor Fortran-contiguous")
2348
+
2349
+ return svm_empty(ctx, flags, ary.shape, ary.dtype, order,
2350
+ alignment=alignment)
2351
+
2352
+
2353
+ def csvm_empty(ctx, shape, dtype, order="C", alignment=None):
2354
+ """
2355
+ Like :func:`svm_empty`, but with *flags* set for a coarse-grain read-write
2356
+ buffer.
2357
+
2358
+ .. versionadded:: 2016.2
2359
+ """
2360
+ return svm_empty(ctx, svm_mem_flags.READ_WRITE, shape, dtype, order, alignment)
2361
+
2362
+
2363
+ def csvm_empty_like(ctx, ary, alignment=None):
2364
+ """
2365
+ Like :func:`svm_empty_like`, but with *flags* set for a coarse-grain
2366
+ read-write buffer.
2367
+
2368
+ .. versionadded:: 2016.2
2369
+ """
2370
+ return svm_empty_like(ctx, svm_mem_flags.READ_WRITE, ary)
2371
+
2372
+
2373
+ def fsvm_empty(ctx, shape, dtype, order="C", alignment=None):
2374
+ """
2375
+ Like :func:`svm_empty`, but with *flags* set for a fine-grain read-write
2376
+ buffer.
2377
+
2378
+ .. versionadded:: 2016.2
2379
+ """
2380
+ return svm_empty(ctx,
2381
+ svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER,
2382
+ shape, dtype, order, alignment)
2383
+
2384
+
2385
+ def fsvm_empty_like(ctx, ary, alignment=None):
2386
+ """
2387
+ Like :func:`svm_empty_like`, but with *flags* set for a fine-grain
2388
+ read-write buffer.
2389
+
2390
+ .. versionadded:: 2016.2
2391
+ """
2392
+ return svm_empty_like(
2393
+ ctx,
2394
+ svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER,
2395
+ ary)
2396
+
2397
+ # }}}
2398
+
2399
+
2400
+ _KERNEL_ARG_CLASSES: tuple[type, ...] = (
2401
+ MemoryObjectHolder,
2402
+ Sampler,
2403
+ CommandQueue,
2404
+ LocalMemory,
2405
+ )
2406
+ if get_cl_header_version() >= (2, 0):
2407
+ _KERNEL_ARG_CLASSES = (*_KERNEL_ARG_CLASSES, SVM)
2408
+
2409
+
2410
+ # vim: foldmethod=marker