pyopencl 2025.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +2410 -0
- pyopencl/_cl.cp313-win_amd64.pyd +0 -0
- pyopencl/_cluda.py +54 -0
- pyopencl/_mymako.py +14 -0
- pyopencl/algorithm.py +1449 -0
- pyopencl/array.py +3362 -0
- pyopencl/bitonic_sort.py +242 -0
- pyopencl/bitonic_sort_templates.py +594 -0
- pyopencl/cache.py +535 -0
- pyopencl/capture_call.py +177 -0
- pyopencl/characterize/__init__.py +456 -0
- pyopencl/characterize/performance.py +237 -0
- pyopencl/cl/pyopencl-airy.cl +324 -0
- pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
- pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
- pyopencl/cl/pyopencl-bessel-y.cl +435 -0
- pyopencl/cl/pyopencl-complex.h +303 -0
- pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
- pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
- pyopencl/cl/pyopencl-random123/array.h +325 -0
- pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
- pyopencl/cl/pyopencl-random123/philox.cl +486 -0
- pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
- pyopencl/clmath.py +280 -0
- pyopencl/clrandom.py +409 -0
- pyopencl/cltypes.py +137 -0
- pyopencl/compyte/.gitignore +21 -0
- pyopencl/compyte/__init__.py +0 -0
- pyopencl/compyte/array.py +214 -0
- pyopencl/compyte/dtypes.py +290 -0
- pyopencl/compyte/pyproject.toml +54 -0
- pyopencl/elementwise.py +1171 -0
- pyopencl/invoker.py +421 -0
- pyopencl/ipython_ext.py +68 -0
- pyopencl/reduction.py +786 -0
- pyopencl/scan.py +1915 -0
- pyopencl/tools.py +1527 -0
- pyopencl/version.py +9 -0
- pyopencl-2025.1.dist-info/METADATA +108 -0
- pyopencl-2025.1.dist-info/RECORD +42 -0
- pyopencl-2025.1.dist-info/WHEEL +5 -0
- pyopencl-2025.1.dist-info/licenses/LICENSE +282 -0
pyopencl/__init__.py
ADDED
|
@@ -0,0 +1,2410 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
__copyright__ = "Copyright (C) 2009-15 Andreas Kloeckner"
|
|
5
|
+
|
|
6
|
+
__license__ = """
|
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
in the Software without restriction, including without limitation the rights
|
|
10
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
furnished to do so, subject to the following conditions:
|
|
13
|
+
|
|
14
|
+
The above copyright notice and this permission notice shall be included in
|
|
15
|
+
all copies or substantial portions of the Software.
|
|
16
|
+
|
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
23
|
+
THE SOFTWARE.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import logging
|
|
27
|
+
from sys import intern
|
|
28
|
+
from typing import Any, Sequence
|
|
29
|
+
from warnings import warn
|
|
30
|
+
|
|
31
|
+
# must import, otherwise dtype registry will not be fully populated
|
|
32
|
+
import pyopencl.cltypes
|
|
33
|
+
from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa: F401
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
__version__ = VERSION_TEXT
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
# This supports ocl-icd find shipped OpenCL ICDs, cf.
|
|
41
|
+
# https://github.com/isuruf/ocl-icd/commit/3862386b51930f95d9ad1089f7157a98165d5a6b
|
|
42
|
+
# via
|
|
43
|
+
# https://github.com/inducer/pyopencl/blob/0b3d0ef92497e6838eea300b974f385f94cb5100/scripts/build-wheels.sh#L43-L44
|
|
44
|
+
import os
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
os.environ["PYOPENCL_HOME"] = os.path.dirname(os.path.abspath(__file__))
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
import pyopencl._cl as _cl
|
|
51
|
+
except ImportError:
|
|
52
|
+
from os.path import dirname, join, realpath
|
|
53
|
+
if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)):
|
|
54
|
+
warn(
|
|
55
|
+
"It looks like you are importing PyOpenCL from "
|
|
56
|
+
"its source directory. This likely won't work.",
|
|
57
|
+
stacklevel=2)
|
|
58
|
+
raise
|
|
59
|
+
|
|
60
|
+
import numpy as np
|
|
61
|
+
|
|
62
|
+
import sys
|
|
63
|
+
|
|
64
|
+
_PYPY = "__pypy__" in sys.builtin_module_names
|
|
65
|
+
|
|
66
|
+
from pyopencl._cl import ( # noqa: F401
|
|
67
|
+
get_cl_header_version,
|
|
68
|
+
program_kind,
|
|
69
|
+
status_code,
|
|
70
|
+
platform_info,
|
|
71
|
+
device_type,
|
|
72
|
+
device_info,
|
|
73
|
+
device_topology_type_amd,
|
|
74
|
+
device_fp_config,
|
|
75
|
+
device_mem_cache_type,
|
|
76
|
+
device_local_mem_type,
|
|
77
|
+
device_exec_capabilities,
|
|
78
|
+
device_svm_capabilities,
|
|
79
|
+
|
|
80
|
+
command_queue_properties,
|
|
81
|
+
context_info,
|
|
82
|
+
gl_context_info,
|
|
83
|
+
context_properties,
|
|
84
|
+
command_queue_info,
|
|
85
|
+
queue_properties,
|
|
86
|
+
|
|
87
|
+
mem_flags,
|
|
88
|
+
svm_mem_flags,
|
|
89
|
+
|
|
90
|
+
channel_order,
|
|
91
|
+
channel_type,
|
|
92
|
+
mem_object_type,
|
|
93
|
+
mem_info,
|
|
94
|
+
image_info,
|
|
95
|
+
pipe_info,
|
|
96
|
+
pipe_properties,
|
|
97
|
+
addressing_mode,
|
|
98
|
+
filter_mode,
|
|
99
|
+
sampler_info,
|
|
100
|
+
sampler_properties,
|
|
101
|
+
map_flags,
|
|
102
|
+
program_info,
|
|
103
|
+
program_build_info,
|
|
104
|
+
program_binary_type,
|
|
105
|
+
|
|
106
|
+
kernel_info,
|
|
107
|
+
kernel_arg_info,
|
|
108
|
+
kernel_arg_address_qualifier,
|
|
109
|
+
kernel_arg_access_qualifier,
|
|
110
|
+
kernel_arg_type_qualifier,
|
|
111
|
+
kernel_work_group_info,
|
|
112
|
+
kernel_sub_group_info,
|
|
113
|
+
|
|
114
|
+
event_info,
|
|
115
|
+
command_type,
|
|
116
|
+
command_execution_status,
|
|
117
|
+
profiling_info,
|
|
118
|
+
mem_migration_flags,
|
|
119
|
+
device_partition_property,
|
|
120
|
+
device_affinity_domain,
|
|
121
|
+
device_atomic_capabilities,
|
|
122
|
+
device_device_enqueue_capabilities,
|
|
123
|
+
|
|
124
|
+
version_bits,
|
|
125
|
+
khronos_vendor_id,
|
|
126
|
+
|
|
127
|
+
Error, MemoryError, LogicError, RuntimeError,
|
|
128
|
+
|
|
129
|
+
Platform,
|
|
130
|
+
get_platforms,
|
|
131
|
+
|
|
132
|
+
Device,
|
|
133
|
+
Context,
|
|
134
|
+
CommandQueue,
|
|
135
|
+
LocalMemory,
|
|
136
|
+
MemoryObjectHolder,
|
|
137
|
+
MemoryObject,
|
|
138
|
+
MemoryMap,
|
|
139
|
+
Buffer,
|
|
140
|
+
|
|
141
|
+
_Program,
|
|
142
|
+
Kernel,
|
|
143
|
+
|
|
144
|
+
Event,
|
|
145
|
+
wait_for_events,
|
|
146
|
+
NannyEvent,
|
|
147
|
+
|
|
148
|
+
enqueue_nd_range_kernel,
|
|
149
|
+
|
|
150
|
+
_enqueue_marker,
|
|
151
|
+
|
|
152
|
+
_enqueue_read_buffer,
|
|
153
|
+
_enqueue_write_buffer,
|
|
154
|
+
_enqueue_copy_buffer,
|
|
155
|
+
_enqueue_read_buffer_rect,
|
|
156
|
+
_enqueue_write_buffer_rect,
|
|
157
|
+
_enqueue_copy_buffer_rect,
|
|
158
|
+
|
|
159
|
+
_enqueue_read_image,
|
|
160
|
+
_enqueue_copy_image,
|
|
161
|
+
_enqueue_write_image,
|
|
162
|
+
_enqueue_copy_image_to_buffer,
|
|
163
|
+
_enqueue_copy_buffer_to_image,
|
|
164
|
+
|
|
165
|
+
have_gl,
|
|
166
|
+
|
|
167
|
+
ImageFormat,
|
|
168
|
+
get_supported_image_formats,
|
|
169
|
+
|
|
170
|
+
Image,
|
|
171
|
+
Sampler,
|
|
172
|
+
|
|
173
|
+
# This class is available unconditionally, even though CL only
|
|
174
|
+
# has it on CL2.0 and newer.
|
|
175
|
+
Pipe,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
from pyopencl._cl import DeviceTopologyAmd # noqa: F401
|
|
181
|
+
from pyopencl._cl import enqueue_copy_buffer_p2p_amd # noqa: F401
|
|
182
|
+
except ImportError:
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
if not _PYPY:
|
|
186
|
+
# FIXME: Add back to default set when pypy support catches up
|
|
187
|
+
from pyopencl._cl import enqueue_map_buffer # noqa: F401
|
|
188
|
+
from pyopencl._cl import enqueue_map_image # noqa: F401
|
|
189
|
+
|
|
190
|
+
if get_cl_header_version() >= (1, 1):
|
|
191
|
+
from pyopencl._cl import UserEvent # noqa: F401
|
|
192
|
+
if get_cl_header_version() >= (1, 2):
|
|
193
|
+
from pyopencl._cl import ImageDescriptor
|
|
194
|
+
from pyopencl._cl import ( # noqa: F401
|
|
195
|
+
_enqueue_barrier_with_wait_list, _enqueue_fill_buffer,
|
|
196
|
+
_enqueue_marker_with_wait_list, enqueue_fill_image,
|
|
197
|
+
enqueue_migrate_mem_objects, unload_platform_compiler)
|
|
198
|
+
|
|
199
|
+
if get_cl_header_version() >= (2, 0):
|
|
200
|
+
from pyopencl._cl import SVM, SVMAllocation, SVMPointer
|
|
201
|
+
|
|
202
|
+
if _cl.have_gl():
|
|
203
|
+
from pyopencl._cl import ( # noqa: F401
|
|
204
|
+
GLBuffer, GLRenderBuffer, GLTexture, gl_object_type, gl_texture_info)
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
from pyopencl._cl import get_apple_cgl_share_group # noqa: F401
|
|
208
|
+
except ImportError:
|
|
209
|
+
pass
|
|
210
|
+
|
|
211
|
+
try:
|
|
212
|
+
from pyopencl._cl import enqueue_acquire_gl_objects # noqa: F401
|
|
213
|
+
from pyopencl._cl import enqueue_release_gl_objects # noqa: F401
|
|
214
|
+
except ImportError:
|
|
215
|
+
pass
|
|
216
|
+
|
|
217
|
+
import inspect as _inspect
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
CONSTANT_CLASSES = tuple(
|
|
221
|
+
getattr(_cl, name) for name in dir(_cl)
|
|
222
|
+
if _inspect.isclass(getattr(_cl, name))
|
|
223
|
+
and name[0].islower() and name not in ["zip", "map", "range"])
|
|
224
|
+
|
|
225
|
+
BITFIELD_CONSTANT_CLASSES = (
|
|
226
|
+
_cl.device_type,
|
|
227
|
+
_cl.device_fp_config,
|
|
228
|
+
_cl.device_exec_capabilities,
|
|
229
|
+
_cl.command_queue_properties,
|
|
230
|
+
_cl.mem_flags,
|
|
231
|
+
_cl.map_flags,
|
|
232
|
+
_cl.kernel_arg_type_qualifier,
|
|
233
|
+
_cl.device_affinity_domain,
|
|
234
|
+
_cl.mem_migration_flags,
|
|
235
|
+
_cl.device_svm_capabilities,
|
|
236
|
+
_cl.queue_properties,
|
|
237
|
+
_cl.svm_mem_flags,
|
|
238
|
+
_cl.device_atomic_capabilities,
|
|
239
|
+
_cl.device_device_enqueue_capabilities,
|
|
240
|
+
_cl.version_bits,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# {{{ diagnostics
|
|
245
|
+
|
|
246
|
+
class CompilerWarning(UserWarning):
|
|
247
|
+
pass
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class CommandQueueUsedAfterExit(UserWarning):
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def compiler_output(text: str) -> None:
|
|
255
|
+
from pytools import strtobool
|
|
256
|
+
if strtobool(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "False")):
|
|
257
|
+
warn(text, CompilerWarning, stacklevel=3)
|
|
258
|
+
else:
|
|
259
|
+
warn("Non-empty compiler output encountered. Set the "
|
|
260
|
+
"environment variable PYOPENCL_COMPILER_OUTPUT=1 "
|
|
261
|
+
"to see more.", CompilerWarning, stacklevel=3)
|
|
262
|
+
|
|
263
|
+
# }}}
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# {{{ find pyopencl shipped source code
|
|
267
|
+
|
|
268
|
+
def _find_pyopencl_include_path() -> str:
|
|
269
|
+
from os.path import abspath, dirname, exists, join
|
|
270
|
+
|
|
271
|
+
# Try to find the include path in the same directory as this file
|
|
272
|
+
include_path = join(abspath(dirname(__file__)), "cl")
|
|
273
|
+
if not exists(include_path):
|
|
274
|
+
try:
|
|
275
|
+
# NOTE: only available in Python >=3.9
|
|
276
|
+
from importlib.resources import files
|
|
277
|
+
except ImportError:
|
|
278
|
+
from importlib_resources import files # type: ignore[no-redef]
|
|
279
|
+
|
|
280
|
+
include_path = str(files("pyopencl") / "cl")
|
|
281
|
+
if not exists(include_path):
|
|
282
|
+
raise OSError("Unable to find PyOpenCL include path")
|
|
283
|
+
|
|
284
|
+
# Quote the path if it contains a space and is not quoted already.
|
|
285
|
+
# See https://github.com/inducer/pyopencl/issues/250 for discussion.
|
|
286
|
+
if " " in include_path and not include_path.startswith('"'):
|
|
287
|
+
return '"' + include_path + '"'
|
|
288
|
+
else:
|
|
289
|
+
return include_path
|
|
290
|
+
|
|
291
|
+
# }}}
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
# {{{ build option munging
|
|
295
|
+
|
|
296
|
+
def _split_options_if_necessary(options):
|
|
297
|
+
if isinstance(options, str):
|
|
298
|
+
import shlex
|
|
299
|
+
|
|
300
|
+
options = shlex.split(options)
|
|
301
|
+
|
|
302
|
+
return options
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _find_include_path(options):
|
|
306
|
+
def unquote(path):
|
|
307
|
+
if path.startswith('"') and path.endswith('"'):
|
|
308
|
+
return path[1:-1]
|
|
309
|
+
else:
|
|
310
|
+
return path
|
|
311
|
+
|
|
312
|
+
include_path = ["."]
|
|
313
|
+
|
|
314
|
+
option_idx = 0
|
|
315
|
+
while option_idx < len(options):
|
|
316
|
+
option = options[option_idx].strip()
|
|
317
|
+
if option.startswith("-I") or option.startswith("/I"):
|
|
318
|
+
if len(option) == 2:
|
|
319
|
+
if option_idx+1 < len(options):
|
|
320
|
+
include_path.append(unquote(options[option_idx+1]))
|
|
321
|
+
option_idx += 2
|
|
322
|
+
else:
|
|
323
|
+
include_path.append(unquote(option[2:].lstrip()))
|
|
324
|
+
option_idx += 1
|
|
325
|
+
else:
|
|
326
|
+
option_idx += 1
|
|
327
|
+
|
|
328
|
+
# }}}
|
|
329
|
+
|
|
330
|
+
return include_path
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def _options_to_bytestring(options):
|
|
334
|
+
def encode_if_necessary(s):
|
|
335
|
+
if isinstance(s, str):
|
|
336
|
+
return s.encode("utf-8")
|
|
337
|
+
else:
|
|
338
|
+
return s
|
|
339
|
+
|
|
340
|
+
return b" ".join(encode_if_necessary(s) for s in options)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
# }}}
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
# {{{ Program (wrapper around _Program, adds caching support)
|
|
347
|
+
|
|
348
|
+
from pytools import strtobool
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
_PYOPENCL_NO_CACHE = strtobool(os.environ.get("PYOPENCL_NO_CACHE", "false"))
|
|
352
|
+
|
|
353
|
+
_DEFAULT_BUILD_OPTIONS: list[str] = []
|
|
354
|
+
_DEFAULT_INCLUDE_OPTIONS: list[str] = ["-I", _find_pyopencl_include_path()]
|
|
355
|
+
|
|
356
|
+
# map of platform.name to build options list
|
|
357
|
+
_PLAT_BUILD_OPTIONS: dict[str, list[str]] = {
|
|
358
|
+
"Oclgrind": ["-D", "PYOPENCL_USING_OCLGRIND"],
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def enable_debugging(platform_or_context):
|
|
363
|
+
"""Enables debugging for all code subsequently compiled by
|
|
364
|
+
PyOpenCL on the passed *platform*. Alternatively, a context
|
|
365
|
+
may be passed.
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
if isinstance(platform_or_context, Context):
|
|
369
|
+
platform = platform_or_context.devices[0].platform
|
|
370
|
+
else:
|
|
371
|
+
platform = platform_or_context
|
|
372
|
+
|
|
373
|
+
if "AMD Accelerated" in platform.name:
|
|
374
|
+
_PLAT_BUILD_OPTIONS.setdefault(platform.name, []).extend(
|
|
375
|
+
["-g", "-O0"])
|
|
376
|
+
os.environ["CPU_MAX_COMPUTE_UNITS"] = "1"
|
|
377
|
+
else:
|
|
378
|
+
warn(f"Do not know how to enable debugging on '{platform.name}'",
|
|
379
|
+
stacklevel=2)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
class Program:
|
|
383
|
+
def __init__(self, arg1, arg2=None, arg3=None):
|
|
384
|
+
if arg2 is None:
|
|
385
|
+
# 1-argument form: program
|
|
386
|
+
self._prg = arg1
|
|
387
|
+
self._context = self._prg.get_info(program_info.CONTEXT)
|
|
388
|
+
|
|
389
|
+
elif arg3 is None:
|
|
390
|
+
# 2-argument form: context, source
|
|
391
|
+
context, source = arg1, arg2
|
|
392
|
+
|
|
393
|
+
from pyopencl.tools import is_spirv
|
|
394
|
+
if is_spirv(source):
|
|
395
|
+
# FIXME no caching in SPIR-V case
|
|
396
|
+
self._context = context
|
|
397
|
+
self._prg = _cl._create_program_with_il(context, source)
|
|
398
|
+
return
|
|
399
|
+
|
|
400
|
+
self._context = context
|
|
401
|
+
self._source = source
|
|
402
|
+
self._prg = None
|
|
403
|
+
|
|
404
|
+
else:
|
|
405
|
+
context, device, binaries = arg1, arg2, arg3
|
|
406
|
+
self._context = context
|
|
407
|
+
self._prg = _cl._Program(context, device, binaries)
|
|
408
|
+
|
|
409
|
+
self._build_duration_info = None
|
|
410
|
+
|
|
411
|
+
def _get_prg(self):
|
|
412
|
+
if self._prg is not None:
|
|
413
|
+
return self._prg
|
|
414
|
+
else:
|
|
415
|
+
# "no program" can only happen in from-source case.
|
|
416
|
+
warn("Pre-build attribute access defeats compiler caching.",
|
|
417
|
+
stacklevel=3)
|
|
418
|
+
|
|
419
|
+
self._prg = _cl._Program(self._context, self._source)
|
|
420
|
+
return self._prg
|
|
421
|
+
|
|
422
|
+
def get_info(self, arg):
|
|
423
|
+
return self._get_prg().get_info(arg)
|
|
424
|
+
|
|
425
|
+
def get_build_info(self, *args, **kwargs):
|
|
426
|
+
return self._get_prg().get_build_info(*args, **kwargs)
|
|
427
|
+
|
|
428
|
+
def all_kernels(self):
|
|
429
|
+
return self._get_prg().all_kernels()
|
|
430
|
+
|
|
431
|
+
@property
|
|
432
|
+
def int_ptr(self):
|
|
433
|
+
return self._get_prg().int_ptr
|
|
434
|
+
int_ptr.__doc__ = _cl._Program.int_ptr.__doc__
|
|
435
|
+
|
|
436
|
+
@staticmethod
|
|
437
|
+
def from_int_ptr(int_ptr_value, retain=True):
|
|
438
|
+
return Program(_cl._Program.from_int_ptr(int_ptr_value, retain))
|
|
439
|
+
from_int_ptr.__doc__ = _cl._Program.from_int_ptr.__doc__
|
|
440
|
+
|
|
441
|
+
def __getattr__(self, attr):
|
|
442
|
+
try:
|
|
443
|
+
knl = Kernel(self, attr)
|
|
444
|
+
# Nvidia does not raise errors even for invalid names,
|
|
445
|
+
# but this will give an error if the kernel is invalid.
|
|
446
|
+
knl.num_args # noqa: B018
|
|
447
|
+
|
|
448
|
+
if self._build_duration_info is not None:
|
|
449
|
+
build_descr, _was_cached, duration = self._build_duration_info
|
|
450
|
+
if duration > 0.2:
|
|
451
|
+
logger.info(
|
|
452
|
+
"build program: kernel '%s' was part of a "
|
|
453
|
+
"lengthy %s (%.2f s)", attr, build_descr, duration)
|
|
454
|
+
|
|
455
|
+
# don't whine about build times more than once.
|
|
456
|
+
self._build_duration_info = None
|
|
457
|
+
|
|
458
|
+
return knl
|
|
459
|
+
except LogicError as err:
|
|
460
|
+
raise AttributeError("'%s' was not found as a program "
|
|
461
|
+
"info attribute or as a kernel name" % attr) from err
|
|
462
|
+
|
|
463
|
+
# {{{ build
|
|
464
|
+
|
|
465
|
+
@classmethod
|
|
466
|
+
def _process_build_options(cls, context, options, _add_include_path=False):
|
|
467
|
+
if options is None:
|
|
468
|
+
options = []
|
|
469
|
+
if isinstance(options, tuple):
|
|
470
|
+
options = list(options)
|
|
471
|
+
|
|
472
|
+
options = _split_options_if_necessary(options)
|
|
473
|
+
|
|
474
|
+
options = (options
|
|
475
|
+
+ _DEFAULT_BUILD_OPTIONS
|
|
476
|
+
+ _DEFAULT_INCLUDE_OPTIONS
|
|
477
|
+
+ _PLAT_BUILD_OPTIONS.get(
|
|
478
|
+
context.devices[0].platform.name, []))
|
|
479
|
+
|
|
480
|
+
forced_options = os.environ.get("PYOPENCL_BUILD_OPTIONS")
|
|
481
|
+
if forced_options:
|
|
482
|
+
options = options + forced_options.split()
|
|
483
|
+
|
|
484
|
+
return (
|
|
485
|
+
_options_to_bytestring(options),
|
|
486
|
+
_find_include_path(options))
|
|
487
|
+
|
|
488
|
+
def build(self, options=None, devices=None, cache_dir=None):
|
|
489
|
+
options_bytes, include_path = self._process_build_options(
|
|
490
|
+
self._context, options)
|
|
491
|
+
|
|
492
|
+
if cache_dir is None:
|
|
493
|
+
cache_dir = getattr(self._context, "cache_dir", None)
|
|
494
|
+
|
|
495
|
+
build_descr = None
|
|
496
|
+
from pyopencl.characterize import has_src_build_cache
|
|
497
|
+
|
|
498
|
+
if (
|
|
499
|
+
(_PYOPENCL_NO_CACHE or has_src_build_cache(self._context.devices[0]))
|
|
500
|
+
and self._prg is None):
|
|
501
|
+
if _PYOPENCL_NO_CACHE:
|
|
502
|
+
build_descr = "uncached source build (cache disabled by user)"
|
|
503
|
+
else:
|
|
504
|
+
build_descr = "uncached source build (assuming cached by ICD)"
|
|
505
|
+
|
|
506
|
+
self._prg = _cl._Program(self._context, self._source)
|
|
507
|
+
|
|
508
|
+
from time import time
|
|
509
|
+
start_time = time()
|
|
510
|
+
was_cached = False
|
|
511
|
+
|
|
512
|
+
if self._prg is not None:
|
|
513
|
+
# uncached
|
|
514
|
+
|
|
515
|
+
if build_descr is None:
|
|
516
|
+
build_descr = "uncached source build"
|
|
517
|
+
|
|
518
|
+
self._build_and_catch_errors(
|
|
519
|
+
lambda: self._prg.build(options_bytes, devices),
|
|
520
|
+
options_bytes=options_bytes)
|
|
521
|
+
|
|
522
|
+
else:
|
|
523
|
+
# cached
|
|
524
|
+
|
|
525
|
+
from pyopencl.cache import create_built_program_from_source_cached
|
|
526
|
+
self._prg, was_cached = self._build_and_catch_errors(
|
|
527
|
+
lambda: create_built_program_from_source_cached(
|
|
528
|
+
self._context, self._source, options_bytes, devices,
|
|
529
|
+
cache_dir=cache_dir, include_path=include_path),
|
|
530
|
+
options_bytes=options_bytes, source=self._source)
|
|
531
|
+
|
|
532
|
+
if was_cached:
|
|
533
|
+
build_descr = "cache retrieval"
|
|
534
|
+
else:
|
|
535
|
+
build_descr = "source build resulting from a binary cache miss"
|
|
536
|
+
|
|
537
|
+
del self._context
|
|
538
|
+
|
|
539
|
+
end_time = time()
|
|
540
|
+
|
|
541
|
+
self._build_duration_info = (build_descr, was_cached, end_time-start_time)
|
|
542
|
+
|
|
543
|
+
return self
|
|
544
|
+
|
|
545
|
+
def _build_and_catch_errors(self, build_func, options_bytes, source=None):
|
|
546
|
+
try:
|
|
547
|
+
return build_func()
|
|
548
|
+
except RuntimeError as e:
|
|
549
|
+
msg = str(e)
|
|
550
|
+
if options_bytes:
|
|
551
|
+
msg = msg + "\n(options: %s)" % options_bytes.decode("utf-8")
|
|
552
|
+
|
|
553
|
+
if source is not None:
|
|
554
|
+
from tempfile import NamedTemporaryFile
|
|
555
|
+
srcfile = NamedTemporaryFile(mode="wt", delete=False, suffix=".cl")
|
|
556
|
+
try:
|
|
557
|
+
srcfile.write(source)
|
|
558
|
+
finally:
|
|
559
|
+
srcfile.close()
|
|
560
|
+
|
|
561
|
+
msg = msg + "\n(source saved as %s)" % srcfile.name
|
|
562
|
+
|
|
563
|
+
code = e.code
|
|
564
|
+
routine = e.routine
|
|
565
|
+
|
|
566
|
+
err = RuntimeError(
|
|
567
|
+
_cl._ErrorRecord(
|
|
568
|
+
msg=msg,
|
|
569
|
+
code=code,
|
|
570
|
+
routine=routine))
|
|
571
|
+
|
|
572
|
+
# Python 3.2 outputs the whole list of currently active exceptions
|
|
573
|
+
# This serves to remove one (redundant) level from that nesting.
|
|
574
|
+
raise err
|
|
575
|
+
|
|
576
|
+
# }}}
|
|
577
|
+
|
|
578
|
+
def compile(self, options=None, devices=None, headers=None):
|
|
579
|
+
if headers is None:
|
|
580
|
+
headers = []
|
|
581
|
+
|
|
582
|
+
options_bytes, _ = self._process_build_options(self._context, options)
|
|
583
|
+
|
|
584
|
+
self._get_prg().compile(options_bytes, devices,
|
|
585
|
+
[(name, prg._get_prg()) for name, prg in headers])
|
|
586
|
+
return self
|
|
587
|
+
|
|
588
|
+
def __eq__(self, other):
|
|
589
|
+
return self._get_prg() == other._get_prg()
|
|
590
|
+
|
|
591
|
+
def __ne__(self, other):
|
|
592
|
+
return self._get_prg() == other._get_prg()
|
|
593
|
+
|
|
594
|
+
def __hash__(self):
|
|
595
|
+
return hash(self._get_prg())
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def create_program_with_built_in_kernels(context, devices, kernel_names):
|
|
599
|
+
if not isinstance(kernel_names, str):
|
|
600
|
+
kernel_names = ":".join(kernel_names)
|
|
601
|
+
|
|
602
|
+
return Program(_Program.create_with_built_in_kernels(
|
|
603
|
+
context, devices, kernel_names))
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
def link_program(context, programs, options=None, devices=None):
|
|
607
|
+
if options is None:
|
|
608
|
+
options = []
|
|
609
|
+
|
|
610
|
+
options_bytes = _options_to_bytestring(_split_options_if_necessary(options))
|
|
611
|
+
programs = [prg._get_prg() for prg in programs]
|
|
612
|
+
raw_prg = _Program.link(context, programs, options_bytes, devices)
|
|
613
|
+
return Program(raw_prg)
|
|
614
|
+
|
|
615
|
+
# }}}
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
# {{{ monkeypatch C++ wrappers to add functionality
|
|
619
|
+
|
|
620
|
+
def _add_functionality():
|
|
621
|
+
def generic_get_cl_version(self):
|
|
622
|
+
import re
|
|
623
|
+
version_string = self.version
|
|
624
|
+
match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string)
|
|
625
|
+
if match is None:
|
|
626
|
+
raise RuntimeError("%s %s returned non-conformant "
|
|
627
|
+
"platform version string '%s'" %
|
|
628
|
+
(type(self).__name__, self, version_string))
|
|
629
|
+
|
|
630
|
+
return int(match.group(1)), int(match.group(2))
|
|
631
|
+
|
|
632
|
+
# {{{ Platform
|
|
633
|
+
|
|
634
|
+
def platform_repr(self):
|
|
635
|
+
return f"<pyopencl.Platform '{self.name}' at 0x{self.int_ptr:x}>"
|
|
636
|
+
|
|
637
|
+
Platform.__repr__ = platform_repr
|
|
638
|
+
Platform._get_cl_version = generic_get_cl_version
|
|
639
|
+
|
|
640
|
+
# }}}
|
|
641
|
+
|
|
642
|
+
# {{{ Device
|
|
643
|
+
|
|
644
|
+
def device_repr(self):
|
|
645
|
+
return "<pyopencl.Device '{}' on '{}' at 0x{:x}>".format(
|
|
646
|
+
self.name.strip(), self.platform.name.strip(), self.int_ptr)
|
|
647
|
+
|
|
648
|
+
def device_hashable_model_and_version_identifier(self):
|
|
649
|
+
return ("v1", self.vendor, self.vendor_id, self.name, self.version)
|
|
650
|
+
|
|
651
|
+
def device_persistent_unique_id(self):
|
|
652
|
+
warn("Device.persistent_unique_id is deprecated. "
|
|
653
|
+
"Use Device.hashable_model_and_version_identifier instead.",
|
|
654
|
+
DeprecationWarning, stacklevel=2)
|
|
655
|
+
return device_hashable_model_and_version_identifier(self)
|
|
656
|
+
|
|
657
|
+
Device.__repr__ = device_repr
|
|
658
|
+
|
|
659
|
+
# undocumented for now:
|
|
660
|
+
Device._get_cl_version = generic_get_cl_version
|
|
661
|
+
Device.hashable_model_and_version_identifier = property(
|
|
662
|
+
device_hashable_model_and_version_identifier)
|
|
663
|
+
Device.persistent_unique_id = property(device_persistent_unique_id)
|
|
664
|
+
|
|
665
|
+
# }}}
|
|
666
|
+
|
|
667
|
+
# {{{ Context
|
|
668
|
+
|
|
669
|
+
def context_repr(self):
|
|
670
|
+
return "<pyopencl.Context at 0x{:x} on {}>".format(self.int_ptr,
|
|
671
|
+
", ".join(repr(dev) for dev in self.devices))
|
|
672
|
+
|
|
673
|
+
def context_get_cl_version(self):
|
|
674
|
+
return self.devices[0].platform._get_cl_version()
|
|
675
|
+
|
|
676
|
+
Context.__repr__ = context_repr
|
|
677
|
+
from pytools import memoize_method
|
|
678
|
+
Context._get_cl_version = memoize_method(context_get_cl_version)
|
|
679
|
+
|
|
680
|
+
# }}}
|
|
681
|
+
|
|
682
|
+
# {{{ CommandQueue
|
|
683
|
+
|
|
684
|
+
def command_queue_enter(self):
|
|
685
|
+
return self
|
|
686
|
+
|
|
687
|
+
def command_queue_exit(self, exc_type, exc_val, exc_tb):
|
|
688
|
+
self.finish()
|
|
689
|
+
self._finalize()
|
|
690
|
+
|
|
691
|
+
def command_queue_get_cl_version(self):
|
|
692
|
+
return self.device._get_cl_version()
|
|
693
|
+
|
|
694
|
+
CommandQueue.__enter__ = command_queue_enter
|
|
695
|
+
CommandQueue.__exit__ = command_queue_exit
|
|
696
|
+
CommandQueue._get_cl_version = memoize_method(command_queue_get_cl_version)
|
|
697
|
+
|
|
698
|
+
# }}}
|
|
699
|
+
|
|
700
|
+
# {{{ _Program (the internal, non-caching version)
|
|
701
|
+
|
|
702
|
+
def program_get_build_logs(self):
|
|
703
|
+
build_logs = []
|
|
704
|
+
for dev in self.get_info(_cl.program_info.DEVICES):
|
|
705
|
+
try:
|
|
706
|
+
log = self.get_build_info(dev, program_build_info.LOG)
|
|
707
|
+
except Exception:
|
|
708
|
+
log = "<error retrieving log>"
|
|
709
|
+
|
|
710
|
+
build_logs.append((dev, log))
|
|
711
|
+
|
|
712
|
+
return build_logs
|
|
713
|
+
|
|
714
|
+
def program_build(self, options_bytes, devices=None):
|
|
715
|
+
err = None
|
|
716
|
+
try:
|
|
717
|
+
self._build(options=options_bytes, devices=devices)
|
|
718
|
+
except Error as e:
|
|
719
|
+
msg = str(e) + "\n\n" + (75*"="+"\n").join(
|
|
720
|
+
f"Build on {dev}:\n\n{log}"
|
|
721
|
+
for dev, log in self._get_build_logs())
|
|
722
|
+
code = e.code
|
|
723
|
+
routine = e.routine
|
|
724
|
+
|
|
725
|
+
err = _cl.RuntimeError(
|
|
726
|
+
_cl._ErrorRecord(
|
|
727
|
+
msg=msg,
|
|
728
|
+
code=code,
|
|
729
|
+
routine=routine))
|
|
730
|
+
|
|
731
|
+
if err is not None:
|
|
732
|
+
# Python 3.2 outputs the whole list of currently active exceptions
|
|
733
|
+
# This serves to remove one (redundant) level from that nesting.
|
|
734
|
+
raise err
|
|
735
|
+
|
|
736
|
+
message = (75*"="+"\n").join(
|
|
737
|
+
f"Build on {dev} succeeded, but said:\n\n{log}"
|
|
738
|
+
for dev, log in self._get_build_logs()
|
|
739
|
+
if log is not None and log.strip())
|
|
740
|
+
|
|
741
|
+
if message:
|
|
742
|
+
if self.kind() == program_kind.SOURCE:
|
|
743
|
+
build_type = "From-source build"
|
|
744
|
+
elif self.kind() == program_kind.BINARY:
|
|
745
|
+
build_type = "From-binary build"
|
|
746
|
+
elif self.kind() == program_kind.IL:
|
|
747
|
+
build_type = "From-IL build"
|
|
748
|
+
else:
|
|
749
|
+
build_type = "Build"
|
|
750
|
+
|
|
751
|
+
compiler_output("%s succeeded, but resulted in non-empty logs:\n%s"
|
|
752
|
+
% (build_type, message))
|
|
753
|
+
|
|
754
|
+
return self
|
|
755
|
+
|
|
756
|
+
_cl._Program._get_build_logs = program_get_build_logs
|
|
757
|
+
_cl._Program.build = program_build
|
|
758
|
+
|
|
759
|
+
# }}}
|
|
760
|
+
|
|
761
|
+
# {{{ Event
|
|
762
|
+
class ProfilingInfoGetter:
|
|
763
|
+
def __init__(self, event):
|
|
764
|
+
self.event = event
|
|
765
|
+
|
|
766
|
+
def __getattr__(self, name):
|
|
767
|
+
info_cls = _cl.profiling_info
|
|
768
|
+
|
|
769
|
+
try:
|
|
770
|
+
inf_attr = getattr(info_cls, name.upper())
|
|
771
|
+
except AttributeError as err:
|
|
772
|
+
raise AttributeError("%s has no attribute '%s'"
|
|
773
|
+
% (type(self), name)) from err
|
|
774
|
+
else:
|
|
775
|
+
return self.event.get_profiling_info(inf_attr)
|
|
776
|
+
|
|
777
|
+
_cl.Event.profile = property(ProfilingInfoGetter)
|
|
778
|
+
|
|
779
|
+
# }}}
|
|
780
|
+
|
|
781
|
+
# {{{ Kernel
|
|
782
|
+
|
|
783
|
+
kernel_old_get_info = Kernel.get_info
|
|
784
|
+
kernel_old_get_work_group_info = Kernel.get_work_group_info
|
|
785
|
+
|
|
786
|
+
def kernel_set_arg_types(self, arg_types):
|
|
787
|
+
arg_types = tuple(arg_types)
|
|
788
|
+
|
|
789
|
+
# {{{ arg counting bug handling
|
|
790
|
+
|
|
791
|
+
# For example:
|
|
792
|
+
# https://github.com/pocl/pocl/issues/197
|
|
793
|
+
# (but Apple CPU has a similar bug)
|
|
794
|
+
|
|
795
|
+
work_around_arg_count_bug = False
|
|
796
|
+
warn_about_arg_count_bug = False
|
|
797
|
+
|
|
798
|
+
from pyopencl.characterize import has_struct_arg_count_bug
|
|
799
|
+
|
|
800
|
+
count_bug_per_dev = [
|
|
801
|
+
has_struct_arg_count_bug(dev, self.context)
|
|
802
|
+
for dev in self.context.devices]
|
|
803
|
+
|
|
804
|
+
from pytools import single_valued
|
|
805
|
+
if any(count_bug_per_dev):
|
|
806
|
+
if all(count_bug_per_dev):
|
|
807
|
+
work_around_arg_count_bug = single_valued(count_bug_per_dev)
|
|
808
|
+
else:
|
|
809
|
+
warn_about_arg_count_bug = True
|
|
810
|
+
|
|
811
|
+
# }}}
|
|
812
|
+
|
|
813
|
+
from pyopencl.invoker import generate_enqueue_and_set_args
|
|
814
|
+
self._set_enqueue_and_set_args(
|
|
815
|
+
*generate_enqueue_and_set_args(
|
|
816
|
+
self.function_name,
|
|
817
|
+
len(arg_types), self.num_args,
|
|
818
|
+
arg_types,
|
|
819
|
+
warn_about_arg_count_bug=warn_about_arg_count_bug,
|
|
820
|
+
work_around_arg_count_bug=work_around_arg_count_bug,
|
|
821
|
+
devs=self.context.devices))
|
|
822
|
+
|
|
823
|
+
def kernel_get_work_group_info(self, param, device):
|
|
824
|
+
try:
|
|
825
|
+
wg_info_cache = self._wg_info_cache
|
|
826
|
+
except AttributeError:
|
|
827
|
+
wg_info_cache = self._wg_info_cache = {}
|
|
828
|
+
|
|
829
|
+
cache_key = (param, device.int_ptr)
|
|
830
|
+
try:
|
|
831
|
+
return wg_info_cache[cache_key]
|
|
832
|
+
except KeyError:
|
|
833
|
+
pass
|
|
834
|
+
|
|
835
|
+
result = kernel_old_get_work_group_info(self, param, device)
|
|
836
|
+
wg_info_cache[cache_key] = result
|
|
837
|
+
return result
|
|
838
|
+
|
|
839
|
+
def kernel_capture_call(self, output_file, queue, global_size, local_size,
|
|
840
|
+
*args, **kwargs):
|
|
841
|
+
from pyopencl.capture_call import capture_kernel_call
|
|
842
|
+
capture_kernel_call(self, output_file, queue, global_size, local_size,
|
|
843
|
+
*args, **kwargs)
|
|
844
|
+
|
|
845
|
+
def kernel_get_info(self, param_name):
|
|
846
|
+
val = kernel_old_get_info(self, param_name)
|
|
847
|
+
|
|
848
|
+
if isinstance(val, _Program):
|
|
849
|
+
return Program(val)
|
|
850
|
+
else:
|
|
851
|
+
return val
|
|
852
|
+
|
|
853
|
+
Kernel.get_work_group_info = kernel_get_work_group_info
|
|
854
|
+
|
|
855
|
+
# FIXME: Possibly deprecate this version
|
|
856
|
+
Kernel.set_scalar_arg_dtypes = kernel_set_arg_types
|
|
857
|
+
Kernel.set_arg_types = kernel_set_arg_types
|
|
858
|
+
|
|
859
|
+
Kernel.capture_call = kernel_capture_call
|
|
860
|
+
Kernel.get_info = kernel_get_info
|
|
861
|
+
|
|
862
|
+
# }}}
|
|
863
|
+
|
|
864
|
+
# {{{ ImageFormat
|
|
865
|
+
|
|
866
|
+
def image_format_repr(self):
|
|
867
|
+
return "ImageFormat({}, {})".format(
|
|
868
|
+
channel_order.to_string(self.channel_order,
|
|
869
|
+
"<unknown channel order 0x%x>"),
|
|
870
|
+
channel_type.to_string(self.channel_data_type,
|
|
871
|
+
"<unknown channel data type 0x%x>"))
|
|
872
|
+
|
|
873
|
+
def image_format_eq(self, other):
|
|
874
|
+
return (self.channel_order == other.channel_order
|
|
875
|
+
and self.channel_data_type == other.channel_data_type)
|
|
876
|
+
|
|
877
|
+
def image_format_ne(self, other):
|
|
878
|
+
return not image_format_eq(self, other)
|
|
879
|
+
|
|
880
|
+
def image_format_hash(self):
|
|
881
|
+
return hash((type(self), self.channel_order, self.channel_data_type))
|
|
882
|
+
|
|
883
|
+
ImageFormat.__repr__ = image_format_repr
|
|
884
|
+
ImageFormat.__eq__ = image_format_eq
|
|
885
|
+
ImageFormat.__ne__ = image_format_ne
|
|
886
|
+
ImageFormat.__hash__ = image_format_hash
|
|
887
|
+
|
|
888
|
+
# }}}
|
|
889
|
+
|
|
890
|
+
# {{{ Image
|
|
891
|
+
|
|
892
|
+
def image_init(
|
|
893
|
+
self, context, flags, format, shape=None, pitches=None,
|
|
894
|
+
hostbuf=None, is_array=False, buffer=None, *,
|
|
895
|
+
desc: ImageDescriptor | None = None,
|
|
896
|
+
_through_create_image: bool = False,
|
|
897
|
+
) -> None:
|
|
898
|
+
if hostbuf is not None and not \
|
|
899
|
+
(flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)):
|
|
900
|
+
warn("'hostbuf' was passed, but no memory flags to make use of it.",
|
|
901
|
+
stacklevel=2)
|
|
902
|
+
|
|
903
|
+
if desc is not None:
|
|
904
|
+
if shape is not None:
|
|
905
|
+
raise TypeError("shape may not be passed when using descriptor")
|
|
906
|
+
if pitches is not None:
|
|
907
|
+
raise TypeError("pitches may not be passed when using descriptor")
|
|
908
|
+
if is_array:
|
|
909
|
+
raise TypeError("is_array may not be passed when using descriptor")
|
|
910
|
+
if buffer is not None:
|
|
911
|
+
raise TypeError("is_array may not be passed when using descriptor")
|
|
912
|
+
|
|
913
|
+
Image._custom_init(self, context, flags, format, desc, hostbuf)
|
|
914
|
+
|
|
915
|
+
return
|
|
916
|
+
|
|
917
|
+
if shape is None and hostbuf is None:
|
|
918
|
+
raise Error("'shape' must be passed if 'hostbuf' is not given")
|
|
919
|
+
|
|
920
|
+
if shape is None and hostbuf is not None:
|
|
921
|
+
shape = hostbuf.shape
|
|
922
|
+
|
|
923
|
+
if hostbuf is None and pitches is not None:
|
|
924
|
+
raise Error("'pitches' may only be given if 'hostbuf' is given")
|
|
925
|
+
|
|
926
|
+
if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
|
|
927
|
+
if not _through_create_image:
|
|
928
|
+
warn("Non-descriptor Image constructor called. "
|
|
929
|
+
"This will stop working in 2026. "
|
|
930
|
+
"Use create_image instead (with the same arguments).",
|
|
931
|
+
DeprecationWarning, stacklevel=2)
|
|
932
|
+
|
|
933
|
+
if buffer is not None and is_array:
|
|
934
|
+
raise ValueError(
|
|
935
|
+
"'buffer' and 'is_array' are mutually exclusive")
|
|
936
|
+
|
|
937
|
+
if len(shape) == 3:
|
|
938
|
+
if buffer is not None:
|
|
939
|
+
raise TypeError(
|
|
940
|
+
"'buffer' argument is not supported for 3D arrays")
|
|
941
|
+
elif is_array:
|
|
942
|
+
image_type = mem_object_type.IMAGE2D_ARRAY
|
|
943
|
+
else:
|
|
944
|
+
image_type = mem_object_type.IMAGE3D
|
|
945
|
+
|
|
946
|
+
elif len(shape) == 2:
|
|
947
|
+
if buffer is not None:
|
|
948
|
+
raise TypeError(
|
|
949
|
+
"'buffer' argument is not supported for 2D arrays")
|
|
950
|
+
elif is_array:
|
|
951
|
+
image_type = mem_object_type.IMAGE1D_ARRAY
|
|
952
|
+
else:
|
|
953
|
+
image_type = mem_object_type.IMAGE2D
|
|
954
|
+
|
|
955
|
+
elif len(shape) == 1:
|
|
956
|
+
if buffer is not None:
|
|
957
|
+
image_type = mem_object_type.IMAGE1D_BUFFER
|
|
958
|
+
elif is_array:
|
|
959
|
+
raise TypeError("array of zero-dimensional images not supported")
|
|
960
|
+
else:
|
|
961
|
+
image_type = mem_object_type.IMAGE1D
|
|
962
|
+
|
|
963
|
+
else:
|
|
964
|
+
raise ValueError("images cannot have more than three dimensions")
|
|
965
|
+
|
|
966
|
+
desc = ImageDescriptor() \
|
|
967
|
+
# pylint: disable=possibly-used-before-assignment
|
|
968
|
+
|
|
969
|
+
desc.image_type = image_type
|
|
970
|
+
desc.shape = shape # also sets desc.array_size
|
|
971
|
+
|
|
972
|
+
if pitches is None:
|
|
973
|
+
desc.pitches = (0, 0)
|
|
974
|
+
else:
|
|
975
|
+
desc.pitches = pitches
|
|
976
|
+
|
|
977
|
+
desc.num_mip_levels = 0 # per CL 1.2 spec
|
|
978
|
+
desc.num_samples = 0 # per CL 1.2 spec
|
|
979
|
+
desc.buffer = buffer
|
|
980
|
+
|
|
981
|
+
Image._custom_init(self, context, flags, format, desc, hostbuf)
|
|
982
|
+
else:
|
|
983
|
+
# legacy init for CL 1.1 and older
|
|
984
|
+
if is_array:
|
|
985
|
+
raise TypeError("'is_array=True' is not supported for CL < 1.2")
|
|
986
|
+
# if num_mip_levels is not None:
|
|
987
|
+
# raise TypeError(
|
|
988
|
+
# "'num_mip_levels' argument is not supported for CL < 1.2")
|
|
989
|
+
# if num_samples is not None:
|
|
990
|
+
# raise TypeError(
|
|
991
|
+
# "'num_samples' argument is not supported for CL < 1.2")
|
|
992
|
+
if buffer is not None:
|
|
993
|
+
raise TypeError("'buffer' argument is not supported for CL < 1.2")
|
|
994
|
+
|
|
995
|
+
Image._custom_init(self, context, flags, format, shape,
|
|
996
|
+
pitches, hostbuf)
|
|
997
|
+
|
|
998
|
+
class _ImageInfoGetter:
|
|
999
|
+
def __init__(self, event):
|
|
1000
|
+
warn(
|
|
1001
|
+
"Image.image.attr is deprecated and will go away in 2021. "
|
|
1002
|
+
"Use Image.attr directly, instead.", stacklevel=2)
|
|
1003
|
+
|
|
1004
|
+
self.event = event
|
|
1005
|
+
|
|
1006
|
+
def __getattr__(self, name):
|
|
1007
|
+
try:
|
|
1008
|
+
inf_attr = getattr(_cl.image_info, name.upper())
|
|
1009
|
+
except AttributeError as err:
|
|
1010
|
+
raise AttributeError("%s has no attribute '%s'"
|
|
1011
|
+
% (type(self), name)) from err
|
|
1012
|
+
else:
|
|
1013
|
+
return self.event.get_image_info(inf_attr)
|
|
1014
|
+
|
|
1015
|
+
def image_shape(self):
|
|
1016
|
+
if self.type == mem_object_type.IMAGE2D:
|
|
1017
|
+
return (self.width, self.height)
|
|
1018
|
+
elif self.type == mem_object_type.IMAGE3D:
|
|
1019
|
+
return (self.width, self.height, self.depth)
|
|
1020
|
+
else:
|
|
1021
|
+
raise LogicError("only images have shapes")
|
|
1022
|
+
|
|
1023
|
+
Image.__init__ = image_init
|
|
1024
|
+
Image.image = property(_ImageInfoGetter)
|
|
1025
|
+
Image.shape = property(image_shape)
|
|
1026
|
+
|
|
1027
|
+
# }}}
|
|
1028
|
+
|
|
1029
|
+
# {{{ Error
|
|
1030
|
+
|
|
1031
|
+
def error_str(self):
|
|
1032
|
+
val = self.what
|
|
1033
|
+
try:
|
|
1034
|
+
val.routine # noqa: B018
|
|
1035
|
+
except AttributeError:
|
|
1036
|
+
return str(val)
|
|
1037
|
+
else:
|
|
1038
|
+
result = ""
|
|
1039
|
+
if val.code() != status_code.SUCCESS:
|
|
1040
|
+
result = status_code.to_string(
|
|
1041
|
+
val.code(), "<unknown error %d>")
|
|
1042
|
+
routine = val.routine()
|
|
1043
|
+
if routine:
|
|
1044
|
+
result = f"{routine} failed: {result}"
|
|
1045
|
+
what = val.what()
|
|
1046
|
+
if what:
|
|
1047
|
+
if result:
|
|
1048
|
+
result += " - "
|
|
1049
|
+
result += what
|
|
1050
|
+
return result
|
|
1051
|
+
|
|
1052
|
+
def error_code(self):
|
|
1053
|
+
return self.args[0].code()
|
|
1054
|
+
|
|
1055
|
+
def error_routine(self):
|
|
1056
|
+
return self.args[0].routine()
|
|
1057
|
+
|
|
1058
|
+
def error_what(self):
|
|
1059
|
+
return self.args[0]
|
|
1060
|
+
|
|
1061
|
+
Error.__str__ = error_str
|
|
1062
|
+
Error.code = property(error_code)
|
|
1063
|
+
Error.routine = property(error_routine)
|
|
1064
|
+
Error.what = property(error_what)
|
|
1065
|
+
|
|
1066
|
+
# }}}
|
|
1067
|
+
|
|
1068
|
+
# {{{ MemoryMap
|
|
1069
|
+
|
|
1070
|
+
def memory_map_enter(self):
|
|
1071
|
+
return self
|
|
1072
|
+
|
|
1073
|
+
def memory_map_exit(self, exc_type, exc_val, exc_tb):
|
|
1074
|
+
self.release()
|
|
1075
|
+
|
|
1076
|
+
MemoryMap.__doc__ = """
|
|
1077
|
+
This class may also be used as a context manager in a ``with`` statement.
|
|
1078
|
+
The memory corresponding to this object will be unmapped when
|
|
1079
|
+
this object is deleted or :meth:`release` is called.
|
|
1080
|
+
|
|
1081
|
+
.. automethod:: release
|
|
1082
|
+
"""
|
|
1083
|
+
MemoryMap.__enter__ = memory_map_enter
|
|
1084
|
+
MemoryMap.__exit__ = memory_map_exit
|
|
1085
|
+
|
|
1086
|
+
# }}}
|
|
1087
|
+
|
|
1088
|
+
# {{{ SVMPointer
|
|
1089
|
+
|
|
1090
|
+
if get_cl_header_version() >= (2, 0):
|
|
1091
|
+
SVMPointer.__doc__ = """A base class for things that can be passed to
|
|
1092
|
+
functions that allow an SVM pointer, e.g. kernel enqueues and memory
|
|
1093
|
+
copies.
|
|
1094
|
+
|
|
1095
|
+
Objects of this type cannot currently be directly created or
|
|
1096
|
+
implemented in Python. To obtain objects implementing this type,
|
|
1097
|
+
consider its subtypes :class:`SVMAllocation` and :class:`SVM`.
|
|
1098
|
+
|
|
1099
|
+
|
|
1100
|
+
.. property:: svm_ptr
|
|
1101
|
+
|
|
1102
|
+
Gives the SVM pointer as an :class:`int`.
|
|
1103
|
+
|
|
1104
|
+
.. property:: size
|
|
1105
|
+
|
|
1106
|
+
An :class:`int` denoting the size in bytes, or *None*, if the size
|
|
1107
|
+
of the SVM pointed to is not known.
|
|
1108
|
+
|
|
1109
|
+
*Most* objects of this type (e.g. instances of
|
|
1110
|
+
:class:`SVMAllocation` and :class:`SVM` know their size, so that,
|
|
1111
|
+
for example :class:`enqueue_copy` will automatically copy an entire
|
|
1112
|
+
:class:`SVMAllocation` when a size is not explicitly specified.
|
|
1113
|
+
|
|
1114
|
+
.. automethod:: map
|
|
1115
|
+
.. automethod:: map_ro
|
|
1116
|
+
.. automethod:: map_rw
|
|
1117
|
+
.. automethod:: as_buffer
|
|
1118
|
+
.. property:: buf
|
|
1119
|
+
|
|
1120
|
+
An opaque object implementing the :c:func:`Python buffer protocol
|
|
1121
|
+
<PyObject_GetBuffer>`. It exposes the pointed-to memory as
|
|
1122
|
+
a one-dimensional buffer of bytes, with the size matching
|
|
1123
|
+
:attr:`size`.
|
|
1124
|
+
|
|
1125
|
+
No guarantee is provided that two references to this attribute
|
|
1126
|
+
result in the same object.
|
|
1127
|
+
"""
|
|
1128
|
+
|
|
1129
|
+
def svmptr_map(self, queue: CommandQueue, *, flags: int, is_blocking: bool =
|
|
1130
|
+
True, wait_for: Sequence[Event] | None = None,
|
|
1131
|
+
size: Event | None = None) -> SVMMap:
|
|
1132
|
+
"""
|
|
1133
|
+
:arg is_blocking: If *False*, subsequent code must wait on
|
|
1134
|
+
:attr:`SVMMap.event` in the returned object before accessing the
|
|
1135
|
+
mapped memory.
|
|
1136
|
+
:arg flags: a combination of :class:`pyopencl.map_flags`.
|
|
1137
|
+
:arg size: The size of the map in bytes. If not provided, defaults to
|
|
1138
|
+
:attr:`size`.
|
|
1139
|
+
|
|
1140
|
+
|std-enqueue-blurb|
|
|
1141
|
+
"""
|
|
1142
|
+
return SVMMap(self,
|
|
1143
|
+
np.asarray(self.buf),
|
|
1144
|
+
queue,
|
|
1145
|
+
_cl._enqueue_svm_map(queue, is_blocking, flags, self, wait_for,
|
|
1146
|
+
size=size))
|
|
1147
|
+
|
|
1148
|
+
def svmptr_map_ro(self, queue: CommandQueue, *, is_blocking: bool = True,
|
|
1149
|
+
wait_for: Sequence[Event] | None = None,
|
|
1150
|
+
size: int | None = None) -> SVMMap:
|
|
1151
|
+
"""Like :meth:`map`, but with *flags* set for a read-only map.
|
|
1152
|
+
"""
|
|
1153
|
+
|
|
1154
|
+
return self.map(queue, flags=map_flags.READ,
|
|
1155
|
+
is_blocking=is_blocking, wait_for=wait_for, size=size)
|
|
1156
|
+
|
|
1157
|
+
def svmptr_map_rw(self, queue: CommandQueue, *, is_blocking: bool = True,
|
|
1158
|
+
wait_for: Sequence[Event] | None = None,
|
|
1159
|
+
size: int | None = None) -> SVMMap:
|
|
1160
|
+
"""Like :meth:`map`, but with *flags* set for a read-only map.
|
|
1161
|
+
"""
|
|
1162
|
+
|
|
1163
|
+
return self.map(queue, flags=map_flags.READ | map_flags.WRITE,
|
|
1164
|
+
is_blocking=is_blocking, wait_for=wait_for, size=size)
|
|
1165
|
+
|
|
1166
|
+
def svmptr__enqueue_unmap(self, queue, wait_for=None):
|
|
1167
|
+
return _cl._enqueue_svm_unmap(queue, self, wait_for)
|
|
1168
|
+
|
|
1169
|
+
def svmptr_as_buffer(self, ctx: Context, *, flags: int | None = None,
|
|
1170
|
+
size: int | None = None) -> Buffer:
|
|
1171
|
+
"""
|
|
1172
|
+
:arg ctx: a :class:`Context`
|
|
1173
|
+
:arg flags: a combination of :class:`pyopencl.map_flags`, defaults to
|
|
1174
|
+
read-write.
|
|
1175
|
+
:arg size: The size of the map in bytes. If not provided, defaults to
|
|
1176
|
+
:attr:`size`.
|
|
1177
|
+
:returns: a :class:`Buffer` corresponding to *self*.
|
|
1178
|
+
|
|
1179
|
+
The memory referred to by this object must not be freed before
|
|
1180
|
+
the returned :class:`Buffer` is released.
|
|
1181
|
+
"""
|
|
1182
|
+
|
|
1183
|
+
if flags is None:
|
|
1184
|
+
flags = mem_flags.READ_WRITE | mem_flags.USE_HOST_PTR
|
|
1185
|
+
|
|
1186
|
+
if size is None:
|
|
1187
|
+
size = self.size
|
|
1188
|
+
|
|
1189
|
+
return Buffer(ctx, flags, size=size, hostbuf=self.buf)
|
|
1190
|
+
|
|
1191
|
+
if get_cl_header_version() >= (2, 0):
|
|
1192
|
+
SVMPointer.map = svmptr_map
|
|
1193
|
+
SVMPointer.map_ro = svmptr_map_ro
|
|
1194
|
+
SVMPointer.map_rw = svmptr_map_rw
|
|
1195
|
+
SVMPointer._enqueue_unmap = svmptr__enqueue_unmap
|
|
1196
|
+
SVMPointer.as_buffer = svmptr_as_buffer
|
|
1197
|
+
|
|
1198
|
+
# }}}
|
|
1199
|
+
|
|
1200
|
+
# {{{ SVMAllocation
|
|
1201
|
+
|
|
1202
|
+
if get_cl_header_version() >= (2, 0):
|
|
1203
|
+
SVMAllocation.__doc__ = """
|
|
1204
|
+
Is a :class:`SVMPointer`.
|
|
1205
|
+
|
|
1206
|
+
.. versionadded:: 2016.2
|
|
1207
|
+
|
|
1208
|
+
.. automethod:: __init__
|
|
1209
|
+
|
|
1210
|
+
:arg flags: See :class:`svm_mem_flags`.
|
|
1211
|
+
:arg queue: If not specified, the allocation will be freed
|
|
1212
|
+
eagerly, irrespective of whether pending/enqueued operations
|
|
1213
|
+
are still using this memory.
|
|
1214
|
+
|
|
1215
|
+
If specified, deallocation of the memory will be enqueued
|
|
1216
|
+
with the given queue, and will only be performed
|
|
1217
|
+
after previously-enqueue operations in the queue have
|
|
1218
|
+
completed.
|
|
1219
|
+
|
|
1220
|
+
It is an error to specify an out-of-order queue.
|
|
1221
|
+
|
|
1222
|
+
.. warning::
|
|
1223
|
+
|
|
1224
|
+
Not specifying a queue will typically lead to undesired
|
|
1225
|
+
behavior, including crashes and memory corruption.
|
|
1226
|
+
See the warning in :ref:`svm`.
|
|
1227
|
+
|
|
1228
|
+
.. automethod:: enqueue_release
|
|
1229
|
+
|
|
1230
|
+
Enqueue the release of this allocation into *queue*.
|
|
1231
|
+
If *queue* is not specified, enqueue the deallocation
|
|
1232
|
+
into the queue provided at allocation time or via
|
|
1233
|
+
:class:`bind_to_queue`.
|
|
1234
|
+
|
|
1235
|
+
.. automethod:: bind_to_queue
|
|
1236
|
+
|
|
1237
|
+
Change the queue used for implicit enqueue of deallocation
|
|
1238
|
+
to *queue*. Sufficient synchronization is ensured by
|
|
1239
|
+
enqueuing a marker into the old queue and waiting on this
|
|
1240
|
+
marker in the new queue.
|
|
1241
|
+
|
|
1242
|
+
.. automethod:: unbind_from_queue
|
|
1243
|
+
|
|
1244
|
+
Configure the allocation to no longer implicitly enqueue
|
|
1245
|
+
memory allocation. If such a queue was previously provided,
|
|
1246
|
+
:meth:`~CommandQueue.finish` is automatically called on it.
|
|
1247
|
+
"""
|
|
1248
|
+
|
|
1249
|
+
# }}}
|
|
1250
|
+
|
|
1251
|
+
# {{{ SVM
|
|
1252
|
+
|
|
1253
|
+
if get_cl_header_version() >= (2, 0):
|
|
1254
|
+
SVM.__doc__ = """Tags an object exhibiting the Python buffer interface
|
|
1255
|
+
(such as a :class:`numpy.ndarray`) as referring to shared virtual
|
|
1256
|
+
memory.
|
|
1257
|
+
|
|
1258
|
+
Is a :class:`SVMPointer`, hence objects of this type may be passed
|
|
1259
|
+
to kernel calls and :func:`enqueue_copy`, and all methods declared
|
|
1260
|
+
there are also available there. Note that :meth:`map` differs
|
|
1261
|
+
slightly from :meth:`SVMPointer.map`.
|
|
1262
|
+
|
|
1263
|
+
Depending on the features of the OpenCL implementation, the following
|
|
1264
|
+
types of objects may be passed to/wrapped in this type:
|
|
1265
|
+
|
|
1266
|
+
* fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`,
|
|
1267
|
+
if the implementation supports fine-grained shared virtual memory.
|
|
1268
|
+
This memory may directly be passed to a kernel::
|
|
1269
|
+
|
|
1270
|
+
ary = cl.fsvm_empty(ctx, 1000, np.float32)
|
|
1271
|
+
assert isinstance(ary, np.ndarray)
|
|
1272
|
+
|
|
1273
|
+
prg.twice(queue, ary.shape, None, cl.SVM(ary))
|
|
1274
|
+
queue.finish() # synchronize
|
|
1275
|
+
print(ary) # access from host
|
|
1276
|
+
|
|
1277
|
+
Observe how mapping (as needed in coarse-grain SVM) is no longer
|
|
1278
|
+
necessary.
|
|
1279
|
+
|
|
1280
|
+
* any :class:`numpy.ndarray` (or other Python object with a buffer
|
|
1281
|
+
interface) if the implementation supports fine-grained *system*
|
|
1282
|
+
shared virtual memory.
|
|
1283
|
+
|
|
1284
|
+
This is how plain :mod:`numpy` arrays may directly be passed to a
|
|
1285
|
+
kernel::
|
|
1286
|
+
|
|
1287
|
+
ary = np.zeros(1000, np.float32)
|
|
1288
|
+
prg.twice(queue, ary.shape, None, cl.SVM(ary))
|
|
1289
|
+
queue.finish() # synchronize
|
|
1290
|
+
print(ary) # access from host
|
|
1291
|
+
|
|
1292
|
+
* coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty`
|
|
1293
|
+
for any implementation of OpenCL 2.0.
|
|
1294
|
+
|
|
1295
|
+
.. note::
|
|
1296
|
+
|
|
1297
|
+
Applications making use of coarse-grain SVM may be better
|
|
1298
|
+
served by opaque-style SVM. See :ref:`opaque-svm`.
|
|
1299
|
+
|
|
1300
|
+
This is how coarse-grain SVM may be used from both host and device::
|
|
1301
|
+
|
|
1302
|
+
svm_ary = cl.SVM(
|
|
1303
|
+
cl.csvm_empty(ctx, 1000, np.float32, alignment=64))
|
|
1304
|
+
assert isinstance(svm_ary.mem, np.ndarray)
|
|
1305
|
+
|
|
1306
|
+
with svm_ary.map_rw(queue) as ary:
|
|
1307
|
+
ary.fill(17) # use from host
|
|
1308
|
+
|
|
1309
|
+
prg.twice(queue, svm_ary.mem.shape, None, svm_ary)
|
|
1310
|
+
|
|
1311
|
+
Coarse-grain shared-memory *must* be mapped into host address space
|
|
1312
|
+
using :meth:`~SVMPointer.map` before being accessed through the
|
|
1313
|
+
:mod:`numpy` interface.
|
|
1314
|
+
|
|
1315
|
+
.. note::
|
|
1316
|
+
|
|
1317
|
+
This object merely serves as a 'tag' that changes the behavior
|
|
1318
|
+
of functions to which it is passed. It has no special management
|
|
1319
|
+
relationship to the memory it tags. For example, it is permissible
|
|
1320
|
+
to grab a :class:`numpy.ndarray` out of :attr:`SVM.mem` of one
|
|
1321
|
+
:class:`SVM` instance and use the array to construct another.
|
|
1322
|
+
Neither of the tags need to be kept alive.
|
|
1323
|
+
|
|
1324
|
+
.. versionadded:: 2016.2
|
|
1325
|
+
|
|
1326
|
+
.. attribute:: mem
|
|
1327
|
+
|
|
1328
|
+
The wrapped object.
|
|
1329
|
+
|
|
1330
|
+
.. automethod:: __init__
|
|
1331
|
+
.. automethod:: map
|
|
1332
|
+
.. automethod:: map_ro
|
|
1333
|
+
.. automethod:: map_rw
|
|
1334
|
+
"""
|
|
1335
|
+
|
|
1336
|
+
# }}}
|
|
1337
|
+
|
|
1338
|
+
def svm_map(self, queue, flags, is_blocking=True, wait_for=None):
|
|
1339
|
+
"""
|
|
1340
|
+
:arg is_blocking: If *False*, subsequent code must wait on
|
|
1341
|
+
:attr:`SVMMap.event` in the returned object before accessing the
|
|
1342
|
+
mapped memory.
|
|
1343
|
+
:arg flags: a combination of :class:`pyopencl.map_flags`.
|
|
1344
|
+
:returns: an :class:`SVMMap` instance
|
|
1345
|
+
|
|
1346
|
+
This differs from the inherited :class:`SVMPointer.map` in that no size
|
|
1347
|
+
can be specified, and that :attr:`mem` is the exact array produced
|
|
1348
|
+
when the :class:`SVMMap` is used as a context manager.
|
|
1349
|
+
|
|
1350
|
+
|std-enqueue-blurb|
|
|
1351
|
+
"""
|
|
1352
|
+
return SVMMap(
|
|
1353
|
+
self,
|
|
1354
|
+
self.mem,
|
|
1355
|
+
queue,
|
|
1356
|
+
_cl._enqueue_svm_map(queue, is_blocking, flags, self, wait_for))
|
|
1357
|
+
|
|
1358
|
+
def svm_map_ro(self, queue, is_blocking=True, wait_for=None):
|
|
1359
|
+
"""Like :meth:`map`, but with *flags* set for a read-only map."""
|
|
1360
|
+
|
|
1361
|
+
return self.map(queue, map_flags.READ,
|
|
1362
|
+
is_blocking=is_blocking, wait_for=wait_for)
|
|
1363
|
+
|
|
1364
|
+
def svm_map_rw(self, queue, is_blocking=True, wait_for=None):
|
|
1365
|
+
"""Like :meth:`map`, but with *flags* set for a read-only map."""
|
|
1366
|
+
|
|
1367
|
+
return self.map(queue, map_flags.READ | map_flags.WRITE,
|
|
1368
|
+
is_blocking=is_blocking, wait_for=wait_for)
|
|
1369
|
+
|
|
1370
|
+
def svm__enqueue_unmap(self, queue, wait_for=None):
|
|
1371
|
+
return _cl._enqueue_svm_unmap(queue, self, wait_for)
|
|
1372
|
+
|
|
1373
|
+
if get_cl_header_version() >= (2, 0):
|
|
1374
|
+
SVM.map = svm_map
|
|
1375
|
+
SVM.map_ro = svm_map_ro
|
|
1376
|
+
SVM.map_rw = svm_map_rw
|
|
1377
|
+
SVM._enqueue_unmap = svm__enqueue_unmap
|
|
1378
|
+
|
|
1379
|
+
# }}}
|
|
1380
|
+
|
|
1381
|
+
# ORDER DEPENDENCY: Some of the above may override get_info, the effect needs
|
|
1382
|
+
# to be visible through the attributes. So get_info attr creation needs to happen
|
|
1383
|
+
# after the overriding is complete.
|
|
1384
|
+
cls_to_info_cls = {
|
|
1385
|
+
_cl.Platform: (_cl.Platform.get_info, _cl.platform_info, []),
|
|
1386
|
+
_cl.Device: (_cl.Device.get_info, _cl.device_info,
|
|
1387
|
+
["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]),
|
|
1388
|
+
_cl.Context: (_cl.Context.get_info, _cl.context_info, []),
|
|
1389
|
+
_cl.CommandQueue: (_cl.CommandQueue.get_info, _cl.command_queue_info,
|
|
1390
|
+
["CONTEXT", "DEVICE"]),
|
|
1391
|
+
_cl.Event: (_cl.Event.get_info, _cl.event_info, []),
|
|
1392
|
+
_cl.MemoryObjectHolder:
|
|
1393
|
+
(MemoryObjectHolder.get_info, _cl.mem_info, []),
|
|
1394
|
+
Image: (_cl.Image.get_image_info, _cl.image_info, []),
|
|
1395
|
+
Pipe: (_cl.Pipe.get_pipe_info, _cl.pipe_info, []),
|
|
1396
|
+
Program: (Program.get_info, _cl.program_info, []),
|
|
1397
|
+
Kernel: (Kernel.get_info, _cl.kernel_info, []),
|
|
1398
|
+
_cl.Sampler: (Sampler.get_info, _cl.sampler_info, []),
|
|
1399
|
+
}
|
|
1400
|
+
|
|
1401
|
+
def to_string(cls, value, default_format=None):
|
|
1402
|
+
if cls._is_bitfield:
|
|
1403
|
+
names = []
|
|
1404
|
+
for name in dir(cls):
|
|
1405
|
+
attr = getattr(cls, name)
|
|
1406
|
+
if not isinstance(attr, int):
|
|
1407
|
+
continue
|
|
1408
|
+
if attr == value or attr & value:
|
|
1409
|
+
names.append(name)
|
|
1410
|
+
if names:
|
|
1411
|
+
return " | ".join(names)
|
|
1412
|
+
else:
|
|
1413
|
+
for name in dir(cls):
|
|
1414
|
+
if (not name.startswith("_")
|
|
1415
|
+
and getattr(cls, name) == value):
|
|
1416
|
+
return name
|
|
1417
|
+
|
|
1418
|
+
if default_format is None:
|
|
1419
|
+
raise ValueError("a name for value %d was not found in %s"
|
|
1420
|
+
% (value, cls.__name__))
|
|
1421
|
+
else:
|
|
1422
|
+
return default_format % value
|
|
1423
|
+
|
|
1424
|
+
for cls in CONSTANT_CLASSES:
|
|
1425
|
+
cls._is_bitfield = cls in BITFIELD_CONSTANT_CLASSES
|
|
1426
|
+
cls.to_string = classmethod(to_string)
|
|
1427
|
+
|
|
1428
|
+
# {{{ get_info attributes -------------------------------------------------
|
|
1429
|
+
|
|
1430
|
+
def make_getinfo(info_method, info_name, info_attr):
|
|
1431
|
+
def result(self):
|
|
1432
|
+
return info_method(self, info_attr)
|
|
1433
|
+
|
|
1434
|
+
return property(result)
|
|
1435
|
+
|
|
1436
|
+
def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr):
|
|
1437
|
+
def result(self):
|
|
1438
|
+
try:
|
|
1439
|
+
return getattr(self, cache_attr)
|
|
1440
|
+
except AttributeError:
|
|
1441
|
+
pass
|
|
1442
|
+
|
|
1443
|
+
result = info_method(self, info_attr)
|
|
1444
|
+
setattr(self, cache_attr, result)
|
|
1445
|
+
return result
|
|
1446
|
+
|
|
1447
|
+
return property(result)
|
|
1448
|
+
|
|
1449
|
+
for cls, (info_method, info_class, cacheable_attrs) \
|
|
1450
|
+
in cls_to_info_cls.items():
|
|
1451
|
+
for info_name, _info_value in info_class.__dict__.items():
|
|
1452
|
+
if info_name == "to_string" or info_name.startswith("_"):
|
|
1453
|
+
continue
|
|
1454
|
+
|
|
1455
|
+
info_lower = info_name.lower()
|
|
1456
|
+
info_constant = getattr(info_class, info_name)
|
|
1457
|
+
if info_name in cacheable_attrs:
|
|
1458
|
+
cache_attr = intern("_info_cache_"+info_lower)
|
|
1459
|
+
setattr(cls, info_lower, make_cacheable_getinfo(
|
|
1460
|
+
info_method, info_lower, cache_attr, info_constant))
|
|
1461
|
+
else:
|
|
1462
|
+
setattr(cls, info_lower, make_getinfo(
|
|
1463
|
+
info_method, info_name, info_constant))
|
|
1464
|
+
|
|
1465
|
+
# }}}
|
|
1466
|
+
|
|
1467
|
+
if _cl.have_gl():
|
|
1468
|
+
def gl_object_get_gl_object(self):
|
|
1469
|
+
return self.get_gl_object_info()[1]
|
|
1470
|
+
|
|
1471
|
+
GLBuffer.gl_object = property(gl_object_get_gl_object)
|
|
1472
|
+
GLTexture.gl_object = property(gl_object_get_gl_object)
|
|
1473
|
+
|
|
1474
|
+
|
|
1475
|
+
_add_functionality()
|
|
1476
|
+
|
|
1477
|
+
# }}}
|
|
1478
|
+
|
|
1479
|
+
|
|
1480
|
+
# {{{ _OverriddenArrayInterfaceSVMAllocation
|
|
1481
|
+
|
|
1482
|
+
if get_cl_header_version() >= (2, 0):
|
|
1483
|
+
class _OverriddenArrayInterfaceSVMAllocation(SVMAllocation):
|
|
1484
|
+
def __init__(self, ctx, size, alignment, flags, *, _interface,
|
|
1485
|
+
queue=None):
|
|
1486
|
+
"""
|
|
1487
|
+
:arg ctx: a :class:`Context`
|
|
1488
|
+
:arg flags: some of :class:`svm_mem_flags`.
|
|
1489
|
+
"""
|
|
1490
|
+
super().__init__(ctx, size, alignment, flags, queue)
|
|
1491
|
+
|
|
1492
|
+
# mem_flags.READ_ONLY applies to kernels, not the host
|
|
1493
|
+
read_write = True
|
|
1494
|
+
_interface["data"] = (int(self.svm_ptr), not read_write)
|
|
1495
|
+
|
|
1496
|
+
self.__array_interface__ = _interface
|
|
1497
|
+
|
|
1498
|
+
# }}}
|
|
1499
|
+
|
|
1500
|
+
|
|
1501
|
+
# {{{ create_image
|
|
1502
|
+
|
|
1503
|
+
def create_image(context, flags, format, shape=None, pitches=None,
|
|
1504
|
+
hostbuf=None, is_array=False, buffer=None) -> Image:
|
|
1505
|
+
"""
|
|
1506
|
+
See :class:`mem_flags` for values of *flags*.
|
|
1507
|
+
*shape* is a 2- or 3-tuple. *format* is an instance of :class:`ImageFormat`.
|
|
1508
|
+
*pitches* is a 1-tuple for 2D images and a 2-tuple for 3D images, indicating
|
|
1509
|
+
the distance in bytes from one scan line to the next, and from one 2D image
|
|
1510
|
+
slice to the next.
|
|
1511
|
+
|
|
1512
|
+
If *hostbuf* is given and *shape* is *None*, then *hostbuf.shape* is
|
|
1513
|
+
used as the *shape* parameter.
|
|
1514
|
+
|
|
1515
|
+
:class:`Image` inherits from :class:`MemoryObject`.
|
|
1516
|
+
|
|
1517
|
+
.. note::
|
|
1518
|
+
|
|
1519
|
+
If you want to load images from :class:`numpy.ndarray` instances or read images
|
|
1520
|
+
back into them, be aware that OpenCL images expect the *x* dimension to vary
|
|
1521
|
+
fastest, whereas in the default (C) order of :mod:`numpy` arrays, the last index
|
|
1522
|
+
varies fastest. If your array is arranged in the wrong order in memory,
|
|
1523
|
+
there are two possible fixes for this:
|
|
1524
|
+
|
|
1525
|
+
* Convert the array to Fortran (column-major) order using :func:`numpy.asarray`.
|
|
1526
|
+
|
|
1527
|
+
* Pass *ary.T.copy()* to the image creation function.
|
|
1528
|
+
|
|
1529
|
+
.. versionadded:: 2024.3
|
|
1530
|
+
"""
|
|
1531
|
+
|
|
1532
|
+
return Image(context, flags, format, shape=shape, pitches=pitches,
|
|
1533
|
+
hostbuf=hostbuf, is_array=is_array, buffer=buffer,
|
|
1534
|
+
_through_create_image=True)
|
|
1535
|
+
|
|
1536
|
+
# }}}
|
|
1537
|
+
|
|
1538
|
+
|
|
1539
|
+
# {{{ create_some_context
|
|
1540
|
+
|
|
1541
|
+
def choose_devices(interactive: bool | None = None,
|
|
1542
|
+
answers: list[str] | None = None) -> list[Device]:
|
|
1543
|
+
"""
|
|
1544
|
+
Choose :class:`Device` instances 'somehow'.
|
|
1545
|
+
|
|
1546
|
+
:arg interactive: If multiple choices for platform and/or device exist,
|
|
1547
|
+
*interactive* is ``True`` (or ``None`` and ``sys.stdin.isatty()``
|
|
1548
|
+
returns ``True``), then the user is queried about which device should be
|
|
1549
|
+
chosen. Otherwise, a device is chosen in an implementation-defined
|
|
1550
|
+
manner.
|
|
1551
|
+
:arg answers: A sequence of strings that will be used to answer the
|
|
1552
|
+
platform/device selection questions.
|
|
1553
|
+
|
|
1554
|
+
:returns: a list of :class:`Device` instances.
|
|
1555
|
+
"""
|
|
1556
|
+
|
|
1557
|
+
if answers is None:
|
|
1558
|
+
if "PYOPENCL_CTX" in os.environ:
|
|
1559
|
+
ctx_spec = os.environ["PYOPENCL_CTX"]
|
|
1560
|
+
answers = ctx_spec.split(":")
|
|
1561
|
+
|
|
1562
|
+
if "PYOPENCL_TEST" in os.environ:
|
|
1563
|
+
from pyopencl.tools import get_test_platforms_and_devices
|
|
1564
|
+
for _plat, devs in get_test_platforms_and_devices():
|
|
1565
|
+
for dev in devs:
|
|
1566
|
+
return [dev]
|
|
1567
|
+
|
|
1568
|
+
if answers is not None:
|
|
1569
|
+
pre_provided_answers = answers
|
|
1570
|
+
answers = answers[:]
|
|
1571
|
+
else:
|
|
1572
|
+
pre_provided_answers = None
|
|
1573
|
+
|
|
1574
|
+
user_inputs = []
|
|
1575
|
+
|
|
1576
|
+
if interactive is None:
|
|
1577
|
+
interactive = True
|
|
1578
|
+
try:
|
|
1579
|
+
if not sys.stdin.isatty():
|
|
1580
|
+
interactive = False
|
|
1581
|
+
except Exception:
|
|
1582
|
+
interactive = False
|
|
1583
|
+
|
|
1584
|
+
def cc_print(s):
|
|
1585
|
+
if interactive:
|
|
1586
|
+
print(s)
|
|
1587
|
+
|
|
1588
|
+
def get_input(prompt):
|
|
1589
|
+
if answers:
|
|
1590
|
+
return str(answers.pop(0))
|
|
1591
|
+
elif not interactive:
|
|
1592
|
+
return ""
|
|
1593
|
+
else:
|
|
1594
|
+
user_input = input(prompt)
|
|
1595
|
+
user_inputs.append(user_input)
|
|
1596
|
+
return user_input
|
|
1597
|
+
|
|
1598
|
+
# {{{ pick a platform
|
|
1599
|
+
|
|
1600
|
+
platforms = get_platforms()
|
|
1601
|
+
|
|
1602
|
+
if not platforms:
|
|
1603
|
+
raise Error("no platforms found")
|
|
1604
|
+
else:
|
|
1605
|
+
if not answers:
|
|
1606
|
+
cc_print("Choose platform:")
|
|
1607
|
+
for i, pf in enumerate(platforms):
|
|
1608
|
+
cc_print("[%d] %s" % (i, pf))
|
|
1609
|
+
|
|
1610
|
+
answer = get_input("Choice [0]:")
|
|
1611
|
+
if not answer:
|
|
1612
|
+
platform = platforms[0]
|
|
1613
|
+
else:
|
|
1614
|
+
platform = None
|
|
1615
|
+
try:
|
|
1616
|
+
int_choice = int(answer)
|
|
1617
|
+
except ValueError:
|
|
1618
|
+
pass
|
|
1619
|
+
else:
|
|
1620
|
+
if 0 <= int_choice < len(platforms):
|
|
1621
|
+
platform = platforms[int_choice]
|
|
1622
|
+
|
|
1623
|
+
if platform is None:
|
|
1624
|
+
answer = answer.lower()
|
|
1625
|
+
for pf in platforms:
|
|
1626
|
+
if answer in pf.name.lower():
|
|
1627
|
+
platform = pf
|
|
1628
|
+
if platform is None:
|
|
1629
|
+
raise RuntimeError("input did not match any platform")
|
|
1630
|
+
|
|
1631
|
+
# }}}
|
|
1632
|
+
|
|
1633
|
+
# {{{ pick a device
|
|
1634
|
+
|
|
1635
|
+
devices = platform.get_devices()
|
|
1636
|
+
|
|
1637
|
+
def parse_device(choice):
|
|
1638
|
+
try:
|
|
1639
|
+
int_choice = int(choice)
|
|
1640
|
+
except ValueError:
|
|
1641
|
+
pass
|
|
1642
|
+
else:
|
|
1643
|
+
if 0 <= int_choice < len(devices):
|
|
1644
|
+
return devices[int_choice]
|
|
1645
|
+
|
|
1646
|
+
choice = choice.lower()
|
|
1647
|
+
for dev in devices:
|
|
1648
|
+
if choice in dev.name.lower():
|
|
1649
|
+
return dev
|
|
1650
|
+
raise RuntimeError("input did not match any device")
|
|
1651
|
+
|
|
1652
|
+
if not devices:
|
|
1653
|
+
raise Error("no devices found")
|
|
1654
|
+
elif len(devices) == 1 and not answers:
|
|
1655
|
+
cc_print(f"Choosing only available device: {devices[0]}")
|
|
1656
|
+
pass
|
|
1657
|
+
else:
|
|
1658
|
+
if not answers:
|
|
1659
|
+
cc_print("Choose device(s):")
|
|
1660
|
+
for i, dev in enumerate(devices):
|
|
1661
|
+
cc_print("[%d] %s" % (i, dev))
|
|
1662
|
+
|
|
1663
|
+
answer = get_input("Choice, comma-separated [0]:")
|
|
1664
|
+
if not answer:
|
|
1665
|
+
devices = [devices[0]]
|
|
1666
|
+
else:
|
|
1667
|
+
devices = [parse_device(i) for i in answer.split(",")]
|
|
1668
|
+
|
|
1669
|
+
# }}}
|
|
1670
|
+
|
|
1671
|
+
if user_inputs:
|
|
1672
|
+
if pre_provided_answers is not None:
|
|
1673
|
+
user_inputs = pre_provided_answers + user_inputs
|
|
1674
|
+
cc_print("Set the environment variable PYOPENCL_CTX='%s' to "
|
|
1675
|
+
"avoid being asked again." % ":".join(user_inputs))
|
|
1676
|
+
|
|
1677
|
+
if answers:
|
|
1678
|
+
raise RuntimeError("not all provided choices were used by "
|
|
1679
|
+
"choose_devices. (left over: '%s')" % ":".join(answers))
|
|
1680
|
+
|
|
1681
|
+
return devices
|
|
1682
|
+
|
|
1683
|
+
|
|
1684
|
+
def create_some_context(interactive: bool | None = None,
|
|
1685
|
+
answers: list[str] | None = None) -> Context:
|
|
1686
|
+
"""
|
|
1687
|
+
Create a :class:`Context` 'somehow'.
|
|
1688
|
+
|
|
1689
|
+
:arg interactive: If multiple choices for platform and/or device exist,
|
|
1690
|
+
*interactive* is ``True`` (or ``None`` and ``sys.stdin.isatty()``
|
|
1691
|
+
returns ``True``), then the user is queried about which device should be
|
|
1692
|
+
chosen. Otherwise, a device is chosen in an implementation-defined
|
|
1693
|
+
manner.
|
|
1694
|
+
:arg answers: A sequence of strings that will be used to answer the
|
|
1695
|
+
platform/device selection questions.
|
|
1696
|
+
|
|
1697
|
+
:returns: an instance of :class:`Context`.
|
|
1698
|
+
"""
|
|
1699
|
+
devices = choose_devices(interactive, answers)
|
|
1700
|
+
|
|
1701
|
+
return Context(devices)
|
|
1702
|
+
|
|
1703
|
+
|
|
1704
|
+
_csc = create_some_context
|
|
1705
|
+
|
|
1706
|
+
# }}}
|
|
1707
|
+
|
|
1708
|
+
|
|
1709
|
+
# {{{ SVMMap
|
|
1710
|
+
|
|
1711
|
+
class SVMMap:
|
|
1712
|
+
"""
|
|
1713
|
+
Returned by :func:`SVMPointer.map` and :func:`SVM.map`.
|
|
1714
|
+
This class may also be used as a context manager in a ``with`` statement.
|
|
1715
|
+
:meth:`release` will be called upon exit from the ``with`` region.
|
|
1716
|
+
The value returned to the ``as`` part of the context manager is the
|
|
1717
|
+
mapped Python object (e.g. a :mod:`numpy` array).
|
|
1718
|
+
|
|
1719
|
+
.. versionadded:: 2016.2
|
|
1720
|
+
|
|
1721
|
+
.. property:: event
|
|
1722
|
+
|
|
1723
|
+
The :class:`Event` returned when mapping the memory.
|
|
1724
|
+
|
|
1725
|
+
.. automethod:: release
|
|
1726
|
+
|
|
1727
|
+
"""
|
|
1728
|
+
def __init__(self, svm, array, queue, event):
|
|
1729
|
+
self.svm = svm
|
|
1730
|
+
self.array = array
|
|
1731
|
+
self.queue = queue
|
|
1732
|
+
self.event = event
|
|
1733
|
+
|
|
1734
|
+
def __del__(self):
|
|
1735
|
+
if self.svm is not None:
|
|
1736
|
+
self.release()
|
|
1737
|
+
|
|
1738
|
+
def __enter__(self):
|
|
1739
|
+
return self.array
|
|
1740
|
+
|
|
1741
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
1742
|
+
self.release()
|
|
1743
|
+
|
|
1744
|
+
def release(self, queue=None, wait_for=None):
|
|
1745
|
+
"""
|
|
1746
|
+
:arg queue: a :class:`pyopencl.CommandQueue`. Defaults to the one
|
|
1747
|
+
with which the map was created, if not specified.
|
|
1748
|
+
:returns: a :class:`pyopencl.Event`
|
|
1749
|
+
|
|
1750
|
+
|std-enqueue-blurb|
|
|
1751
|
+
"""
|
|
1752
|
+
|
|
1753
|
+
evt = self.svm._enqueue_unmap(self.queue)
|
|
1754
|
+
self.svm = None
|
|
1755
|
+
|
|
1756
|
+
return evt
|
|
1757
|
+
|
|
1758
|
+
# }}}
|
|
1759
|
+
|
|
1760
|
+
|
|
1761
|
+
# {{{ enqueue_copy
|
|
1762
|
+
|
|
1763
|
+
_IMAGE_MEM_OBJ_TYPES = [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]
|
|
1764
|
+
if get_cl_header_version() >= (1, 2):
|
|
1765
|
+
_IMAGE_MEM_OBJ_TYPES.append(mem_object_type.IMAGE2D_ARRAY)
|
|
1766
|
+
|
|
1767
|
+
|
|
1768
|
+
def enqueue_copy(queue, dest, src, **kwargs):
|
|
1769
|
+
"""Copy from :class:`Image`, :class:`Buffer` or the host to
|
|
1770
|
+
:class:`Image`, :class:`Buffer` or the host. (Note: host-to-host
|
|
1771
|
+
copies are unsupported.)
|
|
1772
|
+
|
|
1773
|
+
The following keyword arguments are available:
|
|
1774
|
+
|
|
1775
|
+
:arg wait_for: (optional, default empty)
|
|
1776
|
+
:arg is_blocking: Wait for completion. Defaults to *True*.
|
|
1777
|
+
(Available on any copy involving host memory)
|
|
1778
|
+
:return: A :class:`NannyEvent` if the transfer involved a
|
|
1779
|
+
host-side buffer, otherwise an :class:`Event`.
|
|
1780
|
+
|
|
1781
|
+
.. note::
|
|
1782
|
+
|
|
1783
|
+
Be aware that the deletion of the :class:`NannyEvent` that is
|
|
1784
|
+
returned by the function if the transfer involved a host-side buffer
|
|
1785
|
+
will block until the transfer is complete, so be sure to keep a
|
|
1786
|
+
reference to this :class:`Event` until the
|
|
1787
|
+
transfer has completed.
|
|
1788
|
+
|
|
1789
|
+
.. note::
|
|
1790
|
+
|
|
1791
|
+
Two types of 'buffer' occur in the arguments to this function,
|
|
1792
|
+
:class:`Buffer` and 'host-side buffers'. The latter are
|
|
1793
|
+
defined by Python and commonly called `buffer objects
|
|
1794
|
+
<https://docs.python.org/3/c-api/buffer.html>`__. :mod:`numpy`
|
|
1795
|
+
arrays are a very common example.
|
|
1796
|
+
Make sure to always be clear on whether a :class:`Buffer` or a
|
|
1797
|
+
Python buffer object is needed.
|
|
1798
|
+
|
|
1799
|
+
.. ------------------------------------------------------------------------
|
|
1800
|
+
.. rubric :: Transfer :class:`Buffer` ↔ host
|
|
1801
|
+
.. ------------------------------------------------------------------------
|
|
1802
|
+
|
|
1803
|
+
:arg src_offset: offset in bytes (optional)
|
|
1804
|
+
|
|
1805
|
+
May only be nonzero if applied on the device side.
|
|
1806
|
+
|
|
1807
|
+
:arg dst_offset: offset in bytes (optional)
|
|
1808
|
+
|
|
1809
|
+
May only be nonzero if applied on the device side.
|
|
1810
|
+
|
|
1811
|
+
.. note::
|
|
1812
|
+
|
|
1813
|
+
The size of the transfer is controlled by the size of the
|
|
1814
|
+
of the host-side buffer. If the host-side buffer
|
|
1815
|
+
is a :class:`numpy.ndarray`, you can control the transfer size by
|
|
1816
|
+
transferring into a smaller 'view' of the target array, like this::
|
|
1817
|
+
|
|
1818
|
+
cl.enqueue_copy(queue, large_dest_numpy_array[:15], src_buffer)
|
|
1819
|
+
|
|
1820
|
+
.. ------------------------------------------------------------------------
|
|
1821
|
+
.. rubric :: Transfer :class:`Buffer` ↔ :class:`Buffer`
|
|
1822
|
+
.. ------------------------------------------------------------------------
|
|
1823
|
+
|
|
1824
|
+
:arg byte_count: (optional) If not specified, defaults to the
|
|
1825
|
+
size of the source in versions 2012.x and earlier,
|
|
1826
|
+
and to the minimum of the size of the source and target
|
|
1827
|
+
from 2013.1 on.
|
|
1828
|
+
:arg src_offset: (optional)
|
|
1829
|
+
:arg dst_offset: (optional)
|
|
1830
|
+
|
|
1831
|
+
.. ------------------------------------------------------------------------
|
|
1832
|
+
.. rubric :: Rectangular :class:`Buffer` ↔ host transfers (CL 1.1 and newer)
|
|
1833
|
+
.. ------------------------------------------------------------------------
|
|
1834
|
+
|
|
1835
|
+
:arg buffer_origin: :class:`tuple` of :class:`int` of length
|
|
1836
|
+
three or shorter. (mandatory)
|
|
1837
|
+
:arg host_origin: :class:`tuple` of :class:`int` of length
|
|
1838
|
+
three or shorter. (mandatory)
|
|
1839
|
+
:arg region: :class:`tuple` of :class:`int` of length
|
|
1840
|
+
three or shorter. (mandatory)
|
|
1841
|
+
:arg buffer_pitches: :class:`tuple` of :class:`int` of length
|
|
1842
|
+
two or shorter. (optional, "tightly-packed" if unspecified)
|
|
1843
|
+
:arg host_pitches: :class:`tuple` of :class:`int` of length
|
|
1844
|
+
two or shorter. (optional, "tightly-packed" if unspecified)
|
|
1845
|
+
|
|
1846
|
+
.. ------------------------------------------------------------------------
|
|
1847
|
+
.. rubric :: Rectangular :class:`Buffer` ↔ :class:`Buffer`
|
|
1848
|
+
transfers (CL 1.1 and newer)
|
|
1849
|
+
.. ------------------------------------------------------------------------
|
|
1850
|
+
|
|
1851
|
+
:arg src_origin: :class:`tuple` of :class:`int` of length
|
|
1852
|
+
three or shorter. (mandatory)
|
|
1853
|
+
:arg dst_origin: :class:`tuple` of :class:`int` of length
|
|
1854
|
+
three or shorter. (mandatory)
|
|
1855
|
+
:arg region: :class:`tuple` of :class:`int` of length
|
|
1856
|
+
three or shorter. (mandatory)
|
|
1857
|
+
:arg src_pitches: :class:`tuple` of :class:`int` of length
|
|
1858
|
+
two or shorter. (optional, "tightly-packed" if unspecified)
|
|
1859
|
+
:arg dst_pitches: :class:`tuple` of :class:`int` of length
|
|
1860
|
+
two or shorter. (optional, "tightly-packed" if unspecified)
|
|
1861
|
+
|
|
1862
|
+
.. ------------------------------------------------------------------------
|
|
1863
|
+
.. rubric :: Transfer :class:`Image` ↔ host
|
|
1864
|
+
.. ------------------------------------------------------------------------
|
|
1865
|
+
|
|
1866
|
+
:arg origin: :class:`tuple` of :class:`int` of length
|
|
1867
|
+
three or shorter. (mandatory)
|
|
1868
|
+
:arg region: :class:`tuple` of :class:`int` of length
|
|
1869
|
+
three or shorter. (mandatory)
|
|
1870
|
+
:arg pitches: :class:`tuple` of :class:`int` of length
|
|
1871
|
+
two or shorter. (optional)
|
|
1872
|
+
|
|
1873
|
+
.. ------------------------------------------------------------------------
|
|
1874
|
+
.. rubric :: Transfer :class:`Buffer` ↔ :class:`Image`
|
|
1875
|
+
.. ------------------------------------------------------------------------
|
|
1876
|
+
|
|
1877
|
+
:arg offset: offset in buffer (mandatory)
|
|
1878
|
+
:arg origin: :class:`tuple` of :class:`int` of length
|
|
1879
|
+
three or shorter. (mandatory)
|
|
1880
|
+
:arg region: :class:`tuple` of :class:`int` of length
|
|
1881
|
+
three or shorter. (mandatory)
|
|
1882
|
+
|
|
1883
|
+
.. ------------------------------------------------------------------------
|
|
1884
|
+
.. rubric :: Transfer :class:`Image` ↔ :class:`Image`
|
|
1885
|
+
.. ------------------------------------------------------------------------
|
|
1886
|
+
|
|
1887
|
+
:arg src_origin: :class:`tuple` of :class:`int` of length
|
|
1888
|
+
three or shorter. (mandatory)
|
|
1889
|
+
:arg dest_origin: :class:`tuple` of :class:`int` of length
|
|
1890
|
+
three or shorter. (mandatory)
|
|
1891
|
+
:arg region: :class:`tuple` of :class:`int` of length
|
|
1892
|
+
three or shorter. (mandatory)
|
|
1893
|
+
|
|
1894
|
+
.. ------------------------------------------------------------------------
|
|
1895
|
+
.. rubric :: Transfer :class:`SVMPointer`/host ↔ :class:`SVMPointer`/host
|
|
1896
|
+
.. ------------------------------------------------------------------------
|
|
1897
|
+
|
|
1898
|
+
:arg byte_count: (optional) If not specified, defaults to the
|
|
1899
|
+
size of the source in versions 2012.x and earlier,
|
|
1900
|
+
and to the minimum of the size of the source and target
|
|
1901
|
+
from 2013.1 on.
|
|
1902
|
+
|
|
1903
|
+
|std-enqueue-blurb|
|
|
1904
|
+
|
|
1905
|
+
.. versionadded:: 2011.1
|
|
1906
|
+
"""
|
|
1907
|
+
|
|
1908
|
+
if isinstance(dest, MemoryObjectHolder):
|
|
1909
|
+
if dest.type == mem_object_type.BUFFER:
|
|
1910
|
+
if isinstance(src, MemoryObjectHolder):
|
|
1911
|
+
if src.type == mem_object_type.BUFFER:
|
|
1912
|
+
# {{{ buffer -> buffer
|
|
1913
|
+
|
|
1914
|
+
if "src_origin" in kwargs:
|
|
1915
|
+
# rectangular
|
|
1916
|
+
return _cl._enqueue_copy_buffer_rect(
|
|
1917
|
+
queue, src, dest, **kwargs)
|
|
1918
|
+
else:
|
|
1919
|
+
# linear
|
|
1920
|
+
dest_offset = kwargs.pop("dest_offset", None)
|
|
1921
|
+
if dest_offset is not None:
|
|
1922
|
+
if "dst_offset" in kwargs:
|
|
1923
|
+
raise TypeError("may not specify both 'dst_offset' "
|
|
1924
|
+
"and 'dest_offset'")
|
|
1925
|
+
|
|
1926
|
+
warn("The 'dest_offset' argument of enqueue_copy "
|
|
1927
|
+
"is deprecated. Use 'dst_offset' instead. "
|
|
1928
|
+
"'dest_offset' will stop working in 2023.x.",
|
|
1929
|
+
DeprecationWarning, stacklevel=2)
|
|
1930
|
+
|
|
1931
|
+
kwargs["dst_offset"] = dest_offset
|
|
1932
|
+
|
|
1933
|
+
return _cl._enqueue_copy_buffer(queue, src, dest, **kwargs)
|
|
1934
|
+
|
|
1935
|
+
# }}}
|
|
1936
|
+
elif src.type in _IMAGE_MEM_OBJ_TYPES:
|
|
1937
|
+
return _cl._enqueue_copy_image_to_buffer(
|
|
1938
|
+
queue, src, dest, **kwargs)
|
|
1939
|
+
else:
|
|
1940
|
+
raise ValueError("invalid src mem object type")
|
|
1941
|
+
else:
|
|
1942
|
+
# {{{ host -> buffer
|
|
1943
|
+
|
|
1944
|
+
if "buffer_origin" in kwargs:
|
|
1945
|
+
return _cl._enqueue_write_buffer_rect(queue, dest, src, **kwargs)
|
|
1946
|
+
else:
|
|
1947
|
+
device_offset = kwargs.pop("device_offset", None)
|
|
1948
|
+
if device_offset is not None:
|
|
1949
|
+
if "dst_offset" in kwargs:
|
|
1950
|
+
raise TypeError("may not specify both 'device_offset' "
|
|
1951
|
+
"and 'dst_offset'")
|
|
1952
|
+
|
|
1953
|
+
warn("The 'device_offset' argument of enqueue_copy "
|
|
1954
|
+
"is deprecated. Use 'dst_offset' instead. "
|
|
1955
|
+
"'dst_offset' will stop working in 2023.x.",
|
|
1956
|
+
DeprecationWarning, stacklevel=2)
|
|
1957
|
+
|
|
1958
|
+
kwargs["dst_offset"] = device_offset
|
|
1959
|
+
|
|
1960
|
+
return _cl._enqueue_write_buffer(queue, dest, src, **kwargs)
|
|
1961
|
+
|
|
1962
|
+
# }}}
|
|
1963
|
+
|
|
1964
|
+
elif dest.type in _IMAGE_MEM_OBJ_TYPES:
|
|
1965
|
+
# {{{ ... -> image
|
|
1966
|
+
|
|
1967
|
+
if isinstance(src, MemoryObjectHolder):
|
|
1968
|
+
if src.type == mem_object_type.BUFFER:
|
|
1969
|
+
return _cl._enqueue_copy_buffer_to_image(
|
|
1970
|
+
queue, src, dest, **kwargs)
|
|
1971
|
+
elif src.type in _IMAGE_MEM_OBJ_TYPES:
|
|
1972
|
+
return _cl._enqueue_copy_image(queue, src, dest, **kwargs)
|
|
1973
|
+
else:
|
|
1974
|
+
raise ValueError("invalid src mem object type")
|
|
1975
|
+
else:
|
|
1976
|
+
# assume from-host
|
|
1977
|
+
origin = kwargs.pop("origin")
|
|
1978
|
+
region = kwargs.pop("region")
|
|
1979
|
+
|
|
1980
|
+
pitches = kwargs.pop("pitches", (0, 0))
|
|
1981
|
+
if len(pitches) == 1:
|
|
1982
|
+
kwargs["row_pitch"], = pitches
|
|
1983
|
+
else:
|
|
1984
|
+
kwargs["row_pitch"], kwargs["slice_pitch"] = pitches
|
|
1985
|
+
|
|
1986
|
+
return _cl._enqueue_write_image(
|
|
1987
|
+
queue, dest, origin, region, src, **kwargs)
|
|
1988
|
+
|
|
1989
|
+
# }}}
|
|
1990
|
+
else:
|
|
1991
|
+
raise ValueError("invalid dest mem object type")
|
|
1992
|
+
|
|
1993
|
+
elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVMPointer):
|
|
1994
|
+
# {{{ ... -> SVM
|
|
1995
|
+
|
|
1996
|
+
if not isinstance(src, SVMPointer):
|
|
1997
|
+
src = SVM(src)
|
|
1998
|
+
|
|
1999
|
+
is_blocking = kwargs.pop("is_blocking", True)
|
|
2000
|
+
|
|
2001
|
+
# These are NOT documented. They only support consistency with the
|
|
2002
|
+
# Buffer-based API for the sake of the Array.
|
|
2003
|
+
if kwargs.pop("src_offset", 0) != 0:
|
|
2004
|
+
raise ValueError("src_offset must be 0")
|
|
2005
|
+
if kwargs.pop("dst_offset", 0) != 0:
|
|
2006
|
+
raise ValueError("dst_offset must be 0")
|
|
2007
|
+
|
|
2008
|
+
return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs)
|
|
2009
|
+
|
|
2010
|
+
# }}}
|
|
2011
|
+
|
|
2012
|
+
else:
|
|
2013
|
+
# assume to-host
|
|
2014
|
+
|
|
2015
|
+
if isinstance(src, MemoryObjectHolder):
|
|
2016
|
+
if src.type == mem_object_type.BUFFER:
|
|
2017
|
+
if "buffer_origin" in kwargs:
|
|
2018
|
+
return _cl._enqueue_read_buffer_rect(queue, src, dest, **kwargs)
|
|
2019
|
+
else:
|
|
2020
|
+
device_offset = kwargs.pop("device_offset", None)
|
|
2021
|
+
if device_offset is not None:
|
|
2022
|
+
if "src_offset" in kwargs:
|
|
2023
|
+
raise TypeError("may not specify both 'device_offset' "
|
|
2024
|
+
"and 'src_offset'")
|
|
2025
|
+
|
|
2026
|
+
warn("The 'device_offset' argument of enqueue_copy "
|
|
2027
|
+
"is deprecated. Use 'src_offset' instead. "
|
|
2028
|
+
"'dst_offset' will stop working in 2023.x.",
|
|
2029
|
+
DeprecationWarning, stacklevel=2)
|
|
2030
|
+
|
|
2031
|
+
kwargs["src_offset"] = device_offset
|
|
2032
|
+
|
|
2033
|
+
return _cl._enqueue_read_buffer(queue, src, dest, **kwargs)
|
|
2034
|
+
|
|
2035
|
+
elif src.type in _IMAGE_MEM_OBJ_TYPES:
|
|
2036
|
+
origin = kwargs.pop("origin")
|
|
2037
|
+
region = kwargs.pop("region")
|
|
2038
|
+
|
|
2039
|
+
pitches = kwargs.pop("pitches", (0, 0))
|
|
2040
|
+
if len(pitches) == 1:
|
|
2041
|
+
kwargs["row_pitch"], = pitches
|
|
2042
|
+
else:
|
|
2043
|
+
kwargs["row_pitch"], kwargs["slice_pitch"] = pitches
|
|
2044
|
+
|
|
2045
|
+
return _cl._enqueue_read_image(
|
|
2046
|
+
queue, src, origin, region, dest, **kwargs)
|
|
2047
|
+
else:
|
|
2048
|
+
raise ValueError("invalid src mem object type")
|
|
2049
|
+
elif isinstance(src, SVMPointer):
|
|
2050
|
+
# {{{ svm -> host
|
|
2051
|
+
|
|
2052
|
+
# dest is not a SVM instance, otherwise we'd be in the branch above
|
|
2053
|
+
|
|
2054
|
+
# This is NOT documented. They only support consistency with the
|
|
2055
|
+
# Buffer-based API for the sake of the Array.
|
|
2056
|
+
if kwargs.pop("src_offset", 0) != 0:
|
|
2057
|
+
raise ValueError("src_offset must be 0")
|
|
2058
|
+
|
|
2059
|
+
is_blocking = kwargs.pop("is_blocking", True)
|
|
2060
|
+
return _cl._enqueue_svm_memcpy(
|
|
2061
|
+
queue, is_blocking, SVM(dest), src, **kwargs)
|
|
2062
|
+
|
|
2063
|
+
# }}}
|
|
2064
|
+
else:
|
|
2065
|
+
# assume from-host
|
|
2066
|
+
raise TypeError("enqueue_copy cannot perform host-to-host transfers")
|
|
2067
|
+
|
|
2068
|
+
# }}}
|
|
2069
|
+
|
|
2070
|
+
|
|
2071
|
+
# {{{ enqueue_fill
|
|
2072
|
+
|
|
2073
|
+
def enqueue_fill(queue: CommandQueue,
|
|
2074
|
+
dest: MemoryObject | SVMPointer,
|
|
2075
|
+
pattern: Any, size: int, *, offset: int = 0,
|
|
2076
|
+
wait_for: Sequence[Event] | None = None) -> Event:
|
|
2077
|
+
"""
|
|
2078
|
+
.. versionadded:: 2022.2
|
|
2079
|
+
"""
|
|
2080
|
+
if isinstance(dest, MemoryObjectHolder):
|
|
2081
|
+
return enqueue_fill_buffer(queue, dest, pattern, offset, size, wait_for)
|
|
2082
|
+
elif isinstance(dest, SVMPointer):
|
|
2083
|
+
if offset:
|
|
2084
|
+
raise NotImplementedError("enqueue_fill with SVM does not yet support "
|
|
2085
|
+
"offsets")
|
|
2086
|
+
return enqueue_svm_memfill(queue, dest, pattern, size, wait_for)
|
|
2087
|
+
else:
|
|
2088
|
+
raise TypeError(f"enqueue_fill does not know how to fill '{type(dest)}'")
|
|
2089
|
+
|
|
2090
|
+
# }}}
|
|
2091
|
+
|
|
2092
|
+
|
|
2093
|
+
# {{{ image creation
|
|
2094
|
+
|
|
2095
|
+
DTYPE_TO_CHANNEL_TYPE = {
|
|
2096
|
+
np.dtype(np.float32): channel_type.FLOAT,
|
|
2097
|
+
np.dtype(np.int16): channel_type.SIGNED_INT16,
|
|
2098
|
+
np.dtype(np.int32): channel_type.SIGNED_INT32,
|
|
2099
|
+
np.dtype(np.int8): channel_type.SIGNED_INT8,
|
|
2100
|
+
np.dtype(np.uint16): channel_type.UNSIGNED_INT16,
|
|
2101
|
+
np.dtype(np.uint32): channel_type.UNSIGNED_INT32,
|
|
2102
|
+
np.dtype(np.uint8): channel_type.UNSIGNED_INT8,
|
|
2103
|
+
}
|
|
2104
|
+
try:
|
|
2105
|
+
np.float16 # noqa: B018
|
|
2106
|
+
except Exception:
|
|
2107
|
+
pass
|
|
2108
|
+
else:
|
|
2109
|
+
DTYPE_TO_CHANNEL_TYPE[np.dtype(np.float16)] = channel_type.HALF_FLOAT
|
|
2110
|
+
|
|
2111
|
+
DTYPE_TO_CHANNEL_TYPE_NORM = {
|
|
2112
|
+
np.dtype(np.int16): channel_type.SNORM_INT16,
|
|
2113
|
+
np.dtype(np.int8): channel_type.SNORM_INT8,
|
|
2114
|
+
np.dtype(np.uint16): channel_type.UNORM_INT16,
|
|
2115
|
+
np.dtype(np.uint8): channel_type.UNORM_INT8,
|
|
2116
|
+
}
|
|
2117
|
+
|
|
2118
|
+
|
|
2119
|
+
def image_from_array(ctx, ary, num_channels=None, mode="r", norm_int=False):
|
|
2120
|
+
if not ary.flags.c_contiguous:
|
|
2121
|
+
raise ValueError("array must be C-contiguous")
|
|
2122
|
+
|
|
2123
|
+
dtype = ary.dtype
|
|
2124
|
+
if num_channels is None:
|
|
2125
|
+
|
|
2126
|
+
try:
|
|
2127
|
+
dtype, num_channels = \
|
|
2128
|
+
pyopencl.cltypes.vec_type_to_scalar_and_count[dtype]
|
|
2129
|
+
except KeyError:
|
|
2130
|
+
# It must be a scalar type then.
|
|
2131
|
+
num_channels = 1
|
|
2132
|
+
|
|
2133
|
+
shape = ary.shape
|
|
2134
|
+
strides = ary.strides
|
|
2135
|
+
|
|
2136
|
+
elif num_channels == 1:
|
|
2137
|
+
shape = ary.shape
|
|
2138
|
+
strides = ary.strides
|
|
2139
|
+
else:
|
|
2140
|
+
if ary.shape[-1] != num_channels:
|
|
2141
|
+
raise RuntimeError("last dimension must be equal to number of channels")
|
|
2142
|
+
|
|
2143
|
+
shape = ary.shape[:-1]
|
|
2144
|
+
strides = ary.strides[:-1]
|
|
2145
|
+
|
|
2146
|
+
if mode == "r":
|
|
2147
|
+
mode_flags = mem_flags.READ_ONLY
|
|
2148
|
+
elif mode == "w":
|
|
2149
|
+
mode_flags = mem_flags.WRITE_ONLY
|
|
2150
|
+
else:
|
|
2151
|
+
raise ValueError("invalid value '%s' for 'mode'" % mode)
|
|
2152
|
+
|
|
2153
|
+
img_format = {
|
|
2154
|
+
1: channel_order.R,
|
|
2155
|
+
2: channel_order.RG,
|
|
2156
|
+
3: channel_order.RGB,
|
|
2157
|
+
4: channel_order.RGBA,
|
|
2158
|
+
}[num_channels]
|
|
2159
|
+
|
|
2160
|
+
assert ary.strides[-1] == ary.dtype.itemsize
|
|
2161
|
+
|
|
2162
|
+
if norm_int:
|
|
2163
|
+
channel_type = DTYPE_TO_CHANNEL_TYPE_NORM[dtype]
|
|
2164
|
+
else:
|
|
2165
|
+
channel_type = DTYPE_TO_CHANNEL_TYPE[dtype]
|
|
2166
|
+
|
|
2167
|
+
return create_image(ctx, mode_flags | mem_flags.COPY_HOST_PTR,
|
|
2168
|
+
ImageFormat(img_format, channel_type),
|
|
2169
|
+
shape=shape[::-1], pitches=strides[::-1][1:],
|
|
2170
|
+
hostbuf=ary)
|
|
2171
|
+
|
|
2172
|
+
# }}}
|
|
2173
|
+
|
|
2174
|
+
|
|
2175
|
+
# {{{ enqueue_* compatibility shims
|
|
2176
|
+
|
|
2177
|
+
def enqueue_marker(queue, wait_for=None):
|
|
2178
|
+
if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
|
|
2179
|
+
return _cl._enqueue_marker_with_wait_list(queue, wait_for)
|
|
2180
|
+
else:
|
|
2181
|
+
if wait_for:
|
|
2182
|
+
_cl._enqueue_wait_for_events(queue, wait_for)
|
|
2183
|
+
return _cl._enqueue_marker(queue)
|
|
2184
|
+
|
|
2185
|
+
|
|
2186
|
+
def enqueue_barrier(queue, wait_for=None):
|
|
2187
|
+
if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
|
|
2188
|
+
return _cl._enqueue_barrier_with_wait_list(queue, wait_for)
|
|
2189
|
+
else:
|
|
2190
|
+
_cl._enqueue_barrier(queue)
|
|
2191
|
+
if wait_for:
|
|
2192
|
+
_cl._enqueue_wait_for_events(queue, wait_for)
|
|
2193
|
+
return _cl._enqueue_marker(queue)
|
|
2194
|
+
|
|
2195
|
+
|
|
2196
|
+
def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None):
|
|
2197
|
+
if not (queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2)):
|
|
2198
|
+
warn(
|
|
2199
|
+
"The context for this queue does not declare OpenCL 1.2 support, so "
|
|
2200
|
+
"the next thing you might see is a crash",
|
|
2201
|
+
stacklevel=2)
|
|
2202
|
+
|
|
2203
|
+
if _PYPY and isinstance(pattern, np.generic):
|
|
2204
|
+
pattern = np.asarray(pattern)
|
|
2205
|
+
|
|
2206
|
+
return _cl._enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for)
|
|
2207
|
+
|
|
2208
|
+
# }}}
|
|
2209
|
+
|
|
2210
|
+
|
|
2211
|
+
# {{{ numpy-like svm allocation
|
|
2212
|
+
|
|
2213
|
+
def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None):
|
|
2214
|
+
"""Fill shared virtual memory with a pattern.
|
|
2215
|
+
|
|
2216
|
+
:arg dest: a Python buffer object, or any implementation of :class:`SVMPointer`.
|
|
2217
|
+
:arg pattern: a Python buffer object (e.g. a :class:`numpy.ndarray` with the
|
|
2218
|
+
fill pattern to be used.
|
|
2219
|
+
:arg byte_count: The size of the memory to be fill. Defaults to the
|
|
2220
|
+
entirety of *dest*.
|
|
2221
|
+
|
|
2222
|
+
|std-enqueue-blurb|
|
|
2223
|
+
|
|
2224
|
+
.. versionadded:: 2016.2
|
|
2225
|
+
"""
|
|
2226
|
+
|
|
2227
|
+
if not isinstance(dest, SVMPointer):
|
|
2228
|
+
dest = SVM(dest)
|
|
2229
|
+
|
|
2230
|
+
return _cl._enqueue_svm_memfill(
|
|
2231
|
+
queue, dest, pattern, byte_count=byte_count, wait_for=wait_for)
|
|
2232
|
+
|
|
2233
|
+
|
|
2234
|
+
def enqueue_svm_migratemem(queue, svms, flags, wait_for=None):
|
|
2235
|
+
"""
|
|
2236
|
+
:arg svms: a collection of Python buffer objects (e.g. :mod:`numpy`
|
|
2237
|
+
arrays), or any implementation of :class:`SVMPointer`.
|
|
2238
|
+
:arg flags: a combination of :class:`mem_migration_flags`
|
|
2239
|
+
|
|
2240
|
+
|std-enqueue-blurb|
|
|
2241
|
+
|
|
2242
|
+
.. versionadded:: 2016.2
|
|
2243
|
+
|
|
2244
|
+
This function requires OpenCL 2.1.
|
|
2245
|
+
"""
|
|
2246
|
+
|
|
2247
|
+
return _cl._enqueue_svm_migratemem(queue, svms, flags, wait_for)
|
|
2248
|
+
|
|
2249
|
+
|
|
2250
|
+
def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None, queue=None):
|
|
2251
|
+
"""Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype*
|
|
2252
|
+
and *order*. (See :func:`numpy.empty` for the meaning of these arguments.)
|
|
2253
|
+
The array will be allocated in shared virtual memory belonging
|
|
2254
|
+
to *ctx*.
|
|
2255
|
+
|
|
2256
|
+
:arg ctx: a :class:`Context`
|
|
2257
|
+
:arg flags: a combination of flags from :class:`svm_mem_flags`.
|
|
2258
|
+
:arg alignment: the number of bytes to which the beginning of the memory
|
|
2259
|
+
is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*.
|
|
2260
|
+
|
|
2261
|
+
:returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute
|
|
2262
|
+
is a :class:`SVMAllocation`.
|
|
2263
|
+
|
|
2264
|
+
To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you
|
|
2265
|
+
will likely want to wrap the returned array in an :class:`SVM` tag.
|
|
2266
|
+
|
|
2267
|
+
.. versionadded:: 2016.2
|
|
2268
|
+
|
|
2269
|
+
.. versionchanged:: 2022.2
|
|
2270
|
+
|
|
2271
|
+
*queue* argument added.
|
|
2272
|
+
"""
|
|
2273
|
+
|
|
2274
|
+
dtype = np.dtype(dtype)
|
|
2275
|
+
|
|
2276
|
+
try:
|
|
2277
|
+
s = 1
|
|
2278
|
+
for dim in shape:
|
|
2279
|
+
s *= dim
|
|
2280
|
+
except TypeError as err:
|
|
2281
|
+
admissible_types = (int, np.integer)
|
|
2282
|
+
|
|
2283
|
+
if not isinstance(shape, admissible_types):
|
|
2284
|
+
raise TypeError("shape must either be iterable or "
|
|
2285
|
+
"castable to an integer") from err
|
|
2286
|
+
s = shape
|
|
2287
|
+
shape = (shape,)
|
|
2288
|
+
|
|
2289
|
+
itemsize = dtype.itemsize
|
|
2290
|
+
nbytes = s * itemsize
|
|
2291
|
+
|
|
2292
|
+
from pyopencl.compyte.array import c_contiguous_strides, f_contiguous_strides
|
|
2293
|
+
|
|
2294
|
+
if order in "fF":
|
|
2295
|
+
strides = f_contiguous_strides(itemsize, shape)
|
|
2296
|
+
elif order in "cC":
|
|
2297
|
+
strides = c_contiguous_strides(itemsize, shape)
|
|
2298
|
+
else:
|
|
2299
|
+
raise ValueError("order not recognized: %s" % order)
|
|
2300
|
+
|
|
2301
|
+
descr = dtype.descr
|
|
2302
|
+
|
|
2303
|
+
interface = {
|
|
2304
|
+
"version": 3,
|
|
2305
|
+
"shape": shape,
|
|
2306
|
+
"strides": strides,
|
|
2307
|
+
}
|
|
2308
|
+
|
|
2309
|
+
if len(descr) == 1:
|
|
2310
|
+
interface["typestr"] = descr[0][1]
|
|
2311
|
+
else:
|
|
2312
|
+
interface["typestr"] = "V%d" % itemsize
|
|
2313
|
+
interface["descr"] = descr
|
|
2314
|
+
|
|
2315
|
+
if alignment is None:
|
|
2316
|
+
alignment = itemsize
|
|
2317
|
+
|
|
2318
|
+
svm_alloc = _OverriddenArrayInterfaceSVMAllocation(
|
|
2319
|
+
ctx, nbytes, alignment, flags, _interface=interface,
|
|
2320
|
+
queue=queue)
|
|
2321
|
+
return np.asarray(svm_alloc)
|
|
2322
|
+
|
|
2323
|
+
|
|
2324
|
+
def svm_empty_like(ctx, flags, ary, alignment=None):
|
|
2325
|
+
"""Allocate an empty :class:`numpy.ndarray` like the existing
|
|
2326
|
+
:class:`numpy.ndarray` *ary*. The array will be allocated in shared
|
|
2327
|
+
virtual memory belonging to *ctx*.
|
|
2328
|
+
|
|
2329
|
+
:arg ctx: a :class:`Context`
|
|
2330
|
+
:arg flags: a combination of flags from :class:`svm_mem_flags`.
|
|
2331
|
+
:arg alignment: the number of bytes to which the beginning of the memory
|
|
2332
|
+
is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*.
|
|
2333
|
+
|
|
2334
|
+
:returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute
|
|
2335
|
+
is a :class:`SVMAllocation`.
|
|
2336
|
+
|
|
2337
|
+
To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you
|
|
2338
|
+
will likely want to wrap the returned array in an :class:`SVM` tag.
|
|
2339
|
+
|
|
2340
|
+
.. versionadded:: 2016.2
|
|
2341
|
+
"""
|
|
2342
|
+
if ary.flags.c_contiguous:
|
|
2343
|
+
order = "C"
|
|
2344
|
+
elif ary.flags.f_contiguous:
|
|
2345
|
+
order = "F"
|
|
2346
|
+
else:
|
|
2347
|
+
raise ValueError("array is neither C- nor Fortran-contiguous")
|
|
2348
|
+
|
|
2349
|
+
return svm_empty(ctx, flags, ary.shape, ary.dtype, order,
|
|
2350
|
+
alignment=alignment)
|
|
2351
|
+
|
|
2352
|
+
|
|
2353
|
+
def csvm_empty(ctx, shape, dtype, order="C", alignment=None):
|
|
2354
|
+
"""
|
|
2355
|
+
Like :func:`svm_empty`, but with *flags* set for a coarse-grain read-write
|
|
2356
|
+
buffer.
|
|
2357
|
+
|
|
2358
|
+
.. versionadded:: 2016.2
|
|
2359
|
+
"""
|
|
2360
|
+
return svm_empty(ctx, svm_mem_flags.READ_WRITE, shape, dtype, order, alignment)
|
|
2361
|
+
|
|
2362
|
+
|
|
2363
|
+
def csvm_empty_like(ctx, ary, alignment=None):
|
|
2364
|
+
"""
|
|
2365
|
+
Like :func:`svm_empty_like`, but with *flags* set for a coarse-grain
|
|
2366
|
+
read-write buffer.
|
|
2367
|
+
|
|
2368
|
+
.. versionadded:: 2016.2
|
|
2369
|
+
"""
|
|
2370
|
+
return svm_empty_like(ctx, svm_mem_flags.READ_WRITE, ary)
|
|
2371
|
+
|
|
2372
|
+
|
|
2373
|
+
def fsvm_empty(ctx, shape, dtype, order="C", alignment=None):
|
|
2374
|
+
"""
|
|
2375
|
+
Like :func:`svm_empty`, but with *flags* set for a fine-grain read-write
|
|
2376
|
+
buffer.
|
|
2377
|
+
|
|
2378
|
+
.. versionadded:: 2016.2
|
|
2379
|
+
"""
|
|
2380
|
+
return svm_empty(ctx,
|
|
2381
|
+
svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER,
|
|
2382
|
+
shape, dtype, order, alignment)
|
|
2383
|
+
|
|
2384
|
+
|
|
2385
|
+
def fsvm_empty_like(ctx, ary, alignment=None):
|
|
2386
|
+
"""
|
|
2387
|
+
Like :func:`svm_empty_like`, but with *flags* set for a fine-grain
|
|
2388
|
+
read-write buffer.
|
|
2389
|
+
|
|
2390
|
+
.. versionadded:: 2016.2
|
|
2391
|
+
"""
|
|
2392
|
+
return svm_empty_like(
|
|
2393
|
+
ctx,
|
|
2394
|
+
svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER,
|
|
2395
|
+
ary)
|
|
2396
|
+
|
|
2397
|
+
# }}}
|
|
2398
|
+
|
|
2399
|
+
|
|
2400
|
+
_KERNEL_ARG_CLASSES: tuple[type, ...] = (
|
|
2401
|
+
MemoryObjectHolder,
|
|
2402
|
+
Sampler,
|
|
2403
|
+
CommandQueue,
|
|
2404
|
+
LocalMemory,
|
|
2405
|
+
)
|
|
2406
|
+
if get_cl_header_version() >= (2, 0):
|
|
2407
|
+
_KERNEL_ARG_CLASSES = (*_KERNEL_ARG_CLASSES, SVM)
|
|
2408
|
+
|
|
2409
|
+
|
|
2410
|
+
# vim: foldmethod=marker
|