pyopencl 2026.1.1__cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyopencl/.libs/libOpenCL-34a55fe4.so.1.0.0 +0 -0
- pyopencl/__init__.py +1995 -0
- pyopencl/_cl.cpython-314t-aarch64-linux-gnu.so +0 -0
- pyopencl/_cl.pyi +2009 -0
- pyopencl/_cluda.py +57 -0
- pyopencl/_monkeypatch.py +1104 -0
- pyopencl/_mymako.py +17 -0
- pyopencl/algorithm.py +1454 -0
- pyopencl/array.py +3530 -0
- pyopencl/bitonic_sort.py +245 -0
- pyopencl/bitonic_sort_templates.py +597 -0
- pyopencl/cache.py +553 -0
- pyopencl/capture_call.py +200 -0
- pyopencl/characterize/__init__.py +461 -0
- pyopencl/characterize/performance.py +240 -0
- pyopencl/cl/pyopencl-airy.cl +324 -0
- pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
- pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
- pyopencl/cl/pyopencl-bessel-y.cl +435 -0
- pyopencl/cl/pyopencl-complex.h +303 -0
- pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
- pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
- pyopencl/cl/pyopencl-random123/array.h +325 -0
- pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
- pyopencl/cl/pyopencl-random123/philox.cl +486 -0
- pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
- pyopencl/clmath.py +281 -0
- pyopencl/clrandom.py +412 -0
- pyopencl/cltypes.py +217 -0
- pyopencl/compyte/.gitignore +21 -0
- pyopencl/compyte/__init__.py +0 -0
- pyopencl/compyte/array.py +211 -0
- pyopencl/compyte/dtypes.py +314 -0
- pyopencl/compyte/pyproject.toml +49 -0
- pyopencl/elementwise.py +1288 -0
- pyopencl/invoker.py +417 -0
- pyopencl/ipython_ext.py +70 -0
- pyopencl/py.typed +0 -0
- pyopencl/reduction.py +829 -0
- pyopencl/scan.py +1921 -0
- pyopencl/tools.py +1680 -0
- pyopencl/typing.py +61 -0
- pyopencl/version.py +11 -0
- pyopencl-2026.1.1.dist-info/METADATA +108 -0
- pyopencl-2026.1.1.dist-info/RECORD +47 -0
- pyopencl-2026.1.1.dist-info/WHEEL +6 -0
- pyopencl-2026.1.1.dist-info/licenses/LICENSE +104 -0
pyopencl/__init__.py
ADDED
|
@@ -0,0 +1,1995 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
__copyright__ = "Copyright (C) 2009-15 Andreas Kloeckner"
|
|
5
|
+
|
|
6
|
+
__license__ = """
|
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
in the Software without restriction, including without limitation the rights
|
|
10
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
furnished to do so, subject to the following conditions:
|
|
13
|
+
|
|
14
|
+
The above copyright notice and this permission notice shall be included in
|
|
15
|
+
all copies or substantial portions of the Software.
|
|
16
|
+
|
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
23
|
+
THE SOFTWARE.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from typing_extensions import override
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
import logging
|
|
29
|
+
from typing import (
|
|
30
|
+
TYPE_CHECKING, Any, Generic, Literal, TypeAlias, TypeVar, cast,
|
|
31
|
+
overload)
|
|
32
|
+
from collections.abc import Callable
|
|
33
|
+
from collections.abc import Sequence
|
|
34
|
+
from warnings import warn
|
|
35
|
+
|
|
36
|
+
# must import, otherwise dtype registry will not be fully populated
|
|
37
|
+
import pyopencl.cltypes
|
|
38
|
+
from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
__version__ = VERSION_TEXT
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
# This tells ocl-icd where to find shipped OpenCL ICDs, cf.
|
|
46
|
+
# https://github.com/isuruf/ocl-icd/commit/3862386b51930f95d9ad1089f7157a98165d5a6b
|
|
47
|
+
# via
|
|
48
|
+
# https://github.com/inducer/pyopencl/blob/0b3d0ef92497e6838eea300b974f385f94cb5100/scripts/build-wheels.sh#L43-L44
|
|
49
|
+
import os
|
|
50
|
+
os.environ["PYOPENCL_HOME"] = os.path.dirname(os.path.abspath(__file__))
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
import pyopencl._cl as _cl
|
|
54
|
+
except ImportError:
|
|
55
|
+
from os.path import dirname, join, realpath
|
|
56
|
+
if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)):
|
|
57
|
+
warn(
|
|
58
|
+
"It looks like you are importing PyOpenCL from "
|
|
59
|
+
"its source directory. This likely won't work.",
|
|
60
|
+
stacklevel=2)
|
|
61
|
+
raise
|
|
62
|
+
|
|
63
|
+
import numpy as np
|
|
64
|
+
|
|
65
|
+
import sys
|
|
66
|
+
|
|
67
|
+
_PYPY = "__pypy__" in sys.builtin_module_names
|
|
68
|
+
|
|
69
|
+
from pyopencl.typing import (
|
|
70
|
+
DTypeT,
|
|
71
|
+
HasBufferInterface,
|
|
72
|
+
SVMInnerT,
|
|
73
|
+
WaitList,
|
|
74
|
+
)
|
|
75
|
+
from pyopencl._cl import ( # noqa: F401
|
|
76
|
+
get_cl_header_version,
|
|
77
|
+
program_kind,
|
|
78
|
+
status_code,
|
|
79
|
+
platform_info,
|
|
80
|
+
device_type,
|
|
81
|
+
device_info,
|
|
82
|
+
device_topology_type_amd,
|
|
83
|
+
device_fp_config,
|
|
84
|
+
device_mem_cache_type,
|
|
85
|
+
device_local_mem_type,
|
|
86
|
+
device_exec_capabilities,
|
|
87
|
+
device_svm_capabilities,
|
|
88
|
+
|
|
89
|
+
command_queue_properties,
|
|
90
|
+
context_info,
|
|
91
|
+
gl_context_info,
|
|
92
|
+
context_properties,
|
|
93
|
+
command_queue_info,
|
|
94
|
+
queue_properties,
|
|
95
|
+
|
|
96
|
+
mem_flags,
|
|
97
|
+
svm_mem_flags,
|
|
98
|
+
|
|
99
|
+
channel_order,
|
|
100
|
+
channel_type,
|
|
101
|
+
mem_object_type,
|
|
102
|
+
mem_info,
|
|
103
|
+
image_info,
|
|
104
|
+
pipe_info,
|
|
105
|
+
pipe_properties,
|
|
106
|
+
addressing_mode,
|
|
107
|
+
filter_mode,
|
|
108
|
+
sampler_info,
|
|
109
|
+
sampler_properties,
|
|
110
|
+
map_flags,
|
|
111
|
+
program_info,
|
|
112
|
+
program_build_info,
|
|
113
|
+
program_binary_type,
|
|
114
|
+
|
|
115
|
+
kernel_info,
|
|
116
|
+
kernel_arg_info,
|
|
117
|
+
kernel_arg_address_qualifier,
|
|
118
|
+
kernel_arg_access_qualifier,
|
|
119
|
+
kernel_arg_type_qualifier,
|
|
120
|
+
kernel_work_group_info,
|
|
121
|
+
kernel_sub_group_info,
|
|
122
|
+
|
|
123
|
+
event_info,
|
|
124
|
+
command_type,
|
|
125
|
+
command_execution_status,
|
|
126
|
+
profiling_info,
|
|
127
|
+
mem_migration_flags,
|
|
128
|
+
device_partition_property,
|
|
129
|
+
device_affinity_domain,
|
|
130
|
+
device_atomic_capabilities,
|
|
131
|
+
device_device_enqueue_capabilities,
|
|
132
|
+
|
|
133
|
+
version_bits,
|
|
134
|
+
khronos_vendor_id,
|
|
135
|
+
|
|
136
|
+
Error, MemoryError, LogicError, RuntimeError,
|
|
137
|
+
|
|
138
|
+
Platform,
|
|
139
|
+
get_platforms,
|
|
140
|
+
|
|
141
|
+
Device,
|
|
142
|
+
Context,
|
|
143
|
+
CommandQueue,
|
|
144
|
+
LocalMemory,
|
|
145
|
+
MemoryObjectHolder,
|
|
146
|
+
MemoryObject,
|
|
147
|
+
MemoryMap,
|
|
148
|
+
Buffer,
|
|
149
|
+
|
|
150
|
+
_Program,
|
|
151
|
+
Kernel,
|
|
152
|
+
|
|
153
|
+
Event,
|
|
154
|
+
wait_for_events,
|
|
155
|
+
NannyEvent,
|
|
156
|
+
|
|
157
|
+
enqueue_nd_range_kernel,
|
|
158
|
+
|
|
159
|
+
_enqueue_marker,
|
|
160
|
+
|
|
161
|
+
_enqueue_read_buffer,
|
|
162
|
+
_enqueue_write_buffer,
|
|
163
|
+
_enqueue_copy_buffer,
|
|
164
|
+
_enqueue_read_buffer_rect,
|
|
165
|
+
_enqueue_write_buffer_rect,
|
|
166
|
+
_enqueue_copy_buffer_rect,
|
|
167
|
+
|
|
168
|
+
_enqueue_read_image,
|
|
169
|
+
_enqueue_copy_image,
|
|
170
|
+
_enqueue_write_image,
|
|
171
|
+
_enqueue_copy_image_to_buffer,
|
|
172
|
+
_enqueue_copy_buffer_to_image,
|
|
173
|
+
|
|
174
|
+
have_gl,
|
|
175
|
+
|
|
176
|
+
ImageFormat,
|
|
177
|
+
get_supported_image_formats,
|
|
178
|
+
|
|
179
|
+
Image,
|
|
180
|
+
Sampler,
|
|
181
|
+
|
|
182
|
+
# This class is available unconditionally, even though CL only
|
|
183
|
+
# has it on CL2.0 and newer.
|
|
184
|
+
Pipe,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
if TYPE_CHECKING:
|
|
189
|
+
from numpy.typing import NDArray
|
|
190
|
+
from pyopencl._cl import (
|
|
191
|
+
DeviceTopologyAmd,
|
|
192
|
+
enqueue_copy_buffer_p2p_amd,
|
|
193
|
+
enqueue_map_buffer,
|
|
194
|
+
enqueue_map_image,
|
|
195
|
+
UserEvent, ImageDescriptor,
|
|
196
|
+
SVM, SVMAllocation, SVMPointer,
|
|
197
|
+
# _enqueue_barrier_with_wait_list, _enqueue_fill_buffer,
|
|
198
|
+
# _enqueue_marker_with_wait_list,
|
|
199
|
+
enqueue_fill_image,
|
|
200
|
+
enqueue_migrate_mem_objects, unload_platform_compiler,
|
|
201
|
+
GLBuffer, GLRenderBuffer, GLTexture, gl_object_type, gl_texture_info,
|
|
202
|
+
get_apple_cgl_share_group,
|
|
203
|
+
enqueue_acquire_gl_objects,
|
|
204
|
+
enqueue_release_gl_objects,
|
|
205
|
+
)
|
|
206
|
+
else:
|
|
207
|
+
try:
|
|
208
|
+
from pyopencl._cl import DeviceTopologyAmd
|
|
209
|
+
from pyopencl._cl import enqueue_copy_buffer_p2p_amd
|
|
210
|
+
except ImportError:
|
|
211
|
+
pass
|
|
212
|
+
|
|
213
|
+
if not _PYPY:
|
|
214
|
+
# FIXME: Add back to default set when pypy support catches up
|
|
215
|
+
from pyopencl._cl import enqueue_map_buffer
|
|
216
|
+
from pyopencl._cl import enqueue_map_image
|
|
217
|
+
|
|
218
|
+
if get_cl_header_version() >= (1, 1):
|
|
219
|
+
from pyopencl._cl import UserEvent
|
|
220
|
+
if get_cl_header_version() >= (1, 2):
|
|
221
|
+
from pyopencl._cl import ImageDescriptor
|
|
222
|
+
from pyopencl._cl import ( # noqa: F401
|
|
223
|
+
_enqueue_barrier_with_wait_list, _enqueue_fill_buffer,
|
|
224
|
+
_enqueue_marker_with_wait_list, enqueue_fill_image,
|
|
225
|
+
enqueue_migrate_mem_objects, unload_platform_compiler)
|
|
226
|
+
|
|
227
|
+
if get_cl_header_version() >= (2, 0):
|
|
228
|
+
from pyopencl._cl import SVM, SVMAllocation, SVMPointer
|
|
229
|
+
|
|
230
|
+
if _cl.have_gl():
|
|
231
|
+
from pyopencl._cl import (
|
|
232
|
+
GLBuffer, GLRenderBuffer, GLTexture, gl_object_type, gl_texture_info)
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
from pyopencl._cl import get_apple_cgl_share_group
|
|
236
|
+
except ImportError:
|
|
237
|
+
pass
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
from pyopencl._cl import enqueue_acquire_gl_objects
|
|
241
|
+
from pyopencl._cl import enqueue_release_gl_objects
|
|
242
|
+
except ImportError:
|
|
243
|
+
pass
|
|
244
|
+
|
|
245
|
+
import pyopencl._monkeypatch
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# {{{ diagnostics
|
|
249
|
+
|
|
250
|
+
class CompilerWarning(UserWarning):
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class CommandQueueUsedAfterExit(UserWarning):
|
|
255
|
+
pass
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def compiler_output(text: str) -> None:
|
|
259
|
+
from pytools import strtobool
|
|
260
|
+
if strtobool(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "False")):
|
|
261
|
+
warn(text, CompilerWarning, stacklevel=3)
|
|
262
|
+
else:
|
|
263
|
+
warn("Non-empty compiler output encountered. Set the "
|
|
264
|
+
"environment variable PYOPENCL_COMPILER_OUTPUT=1 "
|
|
265
|
+
"to see more.", CompilerWarning, stacklevel=3)
|
|
266
|
+
|
|
267
|
+
# }}}
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
# {{{ find pyopencl shipped source code
|
|
271
|
+
|
|
272
|
+
def _find_pyopencl_include_path() -> str:
|
|
273
|
+
from os.path import abspath, dirname, exists, join
|
|
274
|
+
|
|
275
|
+
# Try to find the include path in the same directory as this file
|
|
276
|
+
include_path = join(abspath(dirname(__file__)), "cl")
|
|
277
|
+
if not exists(include_path):
|
|
278
|
+
from importlib.resources import files
|
|
279
|
+
|
|
280
|
+
include_path = str(files("pyopencl") / "cl")
|
|
281
|
+
if not exists(include_path):
|
|
282
|
+
raise OSError("Unable to find PyOpenCL include path")
|
|
283
|
+
|
|
284
|
+
# Quote the path if it contains a space and is not quoted already.
|
|
285
|
+
# See https://github.com/inducer/pyopencl/issues/250 for discussion.
|
|
286
|
+
if " " in include_path and not include_path.startswith('"'):
|
|
287
|
+
return '"' + include_path + '"'
|
|
288
|
+
else:
|
|
289
|
+
return include_path
|
|
290
|
+
|
|
291
|
+
# }}}
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
# {{{ build option munging
|
|
295
|
+
|
|
296
|
+
def _split_options_if_necessary(
|
|
297
|
+
options: str | Sequence[str]
|
|
298
|
+
) -> Sequence[str]:
|
|
299
|
+
if isinstance(options, str):
|
|
300
|
+
import shlex
|
|
301
|
+
|
|
302
|
+
options = shlex.split(options)
|
|
303
|
+
|
|
304
|
+
return options
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _find_include_path(options: Sequence[str]) -> list[str]:
|
|
308
|
+
def unquote(path: str):
|
|
309
|
+
if path.startswith('"') and path.endswith('"'):
|
|
310
|
+
return path[1:-1]
|
|
311
|
+
else:
|
|
312
|
+
return path
|
|
313
|
+
|
|
314
|
+
include_path = ["."]
|
|
315
|
+
|
|
316
|
+
option_idx = 0
|
|
317
|
+
while option_idx < len(options):
|
|
318
|
+
option = options[option_idx].strip()
|
|
319
|
+
if option.startswith("-I") or option.startswith("/I"):
|
|
320
|
+
if len(option) == 2:
|
|
321
|
+
if option_idx+1 < len(options):
|
|
322
|
+
include_path.append(unquote(options[option_idx+1]))
|
|
323
|
+
option_idx += 2
|
|
324
|
+
else:
|
|
325
|
+
include_path.append(unquote(option[2:].lstrip()))
|
|
326
|
+
option_idx += 1
|
|
327
|
+
else:
|
|
328
|
+
option_idx += 1
|
|
329
|
+
|
|
330
|
+
# }}}
|
|
331
|
+
|
|
332
|
+
return include_path
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _options_to_bytestring(options: Sequence[str | bytes]):
|
|
336
|
+
def encode_if_necessary(s: str | bytes) -> bytes:
|
|
337
|
+
if isinstance(s, str):
|
|
338
|
+
return s.encode("utf-8")
|
|
339
|
+
else:
|
|
340
|
+
return s
|
|
341
|
+
|
|
342
|
+
return b" ".join(encode_if_necessary(s) for s in options)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# }}}
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
# {{{ Program (wrapper around _Program, adds caching support)
|
|
349
|
+
|
|
350
|
+
from pytools import strtobool
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
_PYOPENCL_NO_CACHE = strtobool(os.environ.get("PYOPENCL_NO_CACHE", "false"))
|
|
354
|
+
|
|
355
|
+
_DEFAULT_BUILD_OPTIONS: list[str] = []
|
|
356
|
+
_DEFAULT_INCLUDE_OPTIONS: list[str] = ["-I", _find_pyopencl_include_path()]
|
|
357
|
+
|
|
358
|
+
# map of platform.name to build options list
|
|
359
|
+
_PLAT_BUILD_OPTIONS: dict[str, list[str]] = {
|
|
360
|
+
"Oclgrind": ["-D", "PYOPENCL_USING_OCLGRIND"],
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def enable_debugging(platform_or_context: Platform | Context) -> None:
|
|
365
|
+
"""Enables debugging for all code subsequently compiled by
|
|
366
|
+
PyOpenCL on the passed *platform*. Alternatively, a context
|
|
367
|
+
may be passed.
|
|
368
|
+
"""
|
|
369
|
+
|
|
370
|
+
if isinstance(platform_or_context, Context):
|
|
371
|
+
platform = platform_or_context.devices[0].platform
|
|
372
|
+
else:
|
|
373
|
+
platform = platform_or_context
|
|
374
|
+
|
|
375
|
+
if "AMD Accelerated" in platform.name:
|
|
376
|
+
_PLAT_BUILD_OPTIONS.setdefault(platform.name, []).extend(
|
|
377
|
+
["-g", "-O0"])
|
|
378
|
+
os.environ["CPU_MAX_COMPUTE_UNITS"] = "1"
|
|
379
|
+
else:
|
|
380
|
+
warn(f"Do not know how to enable debugging on '{platform.name}'",
|
|
381
|
+
stacklevel=2)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
class RepeatedKernelRetrieval(UserWarning):
|
|
385
|
+
pass
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
RetT = TypeVar("RetT")
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
class Program:
|
|
392
|
+
_prg: _Program | None
|
|
393
|
+
_context: Context
|
|
394
|
+
_source: str | bytes
|
|
395
|
+
_build_duration_info: tuple[str, bool, float] | None
|
|
396
|
+
|
|
397
|
+
@overload
|
|
398
|
+
def __init__(self, arg1: _Program) -> None: ...
|
|
399
|
+
|
|
400
|
+
@overload
|
|
401
|
+
def __init__(self, arg1: Context, arg2: str | bytes) -> None: ...
|
|
402
|
+
|
|
403
|
+
@overload
|
|
404
|
+
def __init__(
|
|
405
|
+
self,
|
|
406
|
+
arg1: Context,
|
|
407
|
+
arg2: Sequence[Device],
|
|
408
|
+
arg3: Sequence[bytes]
|
|
409
|
+
) -> None: ...
|
|
410
|
+
|
|
411
|
+
def __init__(self, arg1, arg2=None, arg3=None):
|
|
412
|
+
self._knl_retrieval_count: dict[str, int] = {}
|
|
413
|
+
|
|
414
|
+
if arg2 is None:
|
|
415
|
+
# 1-argument form: program
|
|
416
|
+
self._prg = cast("_Program", arg1)
|
|
417
|
+
self._context = cast("Context", self._prg.get_info(program_info.CONTEXT))
|
|
418
|
+
|
|
419
|
+
elif arg3 is None:
|
|
420
|
+
# 2-argument form: context, source
|
|
421
|
+
context, source = cast("tuple[Context, str | bytes]", (arg1, arg2))
|
|
422
|
+
|
|
423
|
+
from pyopencl.tools import is_spirv
|
|
424
|
+
if is_spirv(source):
|
|
425
|
+
# FIXME no caching in SPIR-V case
|
|
426
|
+
self._context = context
|
|
427
|
+
self._prg = _cl._create_program_with_il(context, source)
|
|
428
|
+
return
|
|
429
|
+
|
|
430
|
+
self._context = context
|
|
431
|
+
self._source = source
|
|
432
|
+
self._prg = None
|
|
433
|
+
|
|
434
|
+
else:
|
|
435
|
+
context, devices, binaries = arg1, arg2, arg3
|
|
436
|
+
self._context = context
|
|
437
|
+
self._prg = _cl._Program(context, devices, binaries)
|
|
438
|
+
|
|
439
|
+
self._build_duration_info = None
|
|
440
|
+
|
|
441
|
+
def _get_prg(self) -> _Program:
|
|
442
|
+
if self._prg is not None:
|
|
443
|
+
return self._prg
|
|
444
|
+
else:
|
|
445
|
+
# "no program" can only happen in from-source case.
|
|
446
|
+
warn("Pre-build attribute access defeats compiler caching.",
|
|
447
|
+
stacklevel=3)
|
|
448
|
+
|
|
449
|
+
self._prg = _cl._Program(self._context, self._source)
|
|
450
|
+
return self._prg
|
|
451
|
+
|
|
452
|
+
def get_info(self, arg: program_info) -> object:
|
|
453
|
+
return self._get_prg().get_info(arg)
|
|
454
|
+
|
|
455
|
+
def get_build_info(self, *args, **kwargs):
|
|
456
|
+
return self._get_prg().get_build_info(*args, **kwargs)
|
|
457
|
+
|
|
458
|
+
def all_kernels(self) -> Sequence[Kernel]:
|
|
459
|
+
return self._get_prg().all_kernels()
|
|
460
|
+
|
|
461
|
+
@property
|
|
462
|
+
def int_ptr(self):
|
|
463
|
+
return self._get_prg().int_ptr
|
|
464
|
+
int_ptr.__doc__ = _cl._Program.int_ptr.__doc__
|
|
465
|
+
|
|
466
|
+
@staticmethod
|
|
467
|
+
def from_int_ptr(int_ptr_value: int, retain: bool = True):
|
|
468
|
+
return Program(_cl._Program.from_int_ptr(int_ptr_value, retain))
|
|
469
|
+
from_int_ptr.__doc__ = _cl._Program.from_int_ptr.__doc__
|
|
470
|
+
|
|
471
|
+
def __getattr__(self, attr: str) -> Kernel:
|
|
472
|
+
try:
|
|
473
|
+
knl = Kernel(self, attr)
|
|
474
|
+
# Nvidia does not raise errors even for invalid names,
|
|
475
|
+
# but this will give an error if the kernel is invalid.
|
|
476
|
+
knl.num_args # noqa: B018
|
|
477
|
+
|
|
478
|
+
count = self._knl_retrieval_count[attr] = (
|
|
479
|
+
self._knl_retrieval_count.get(attr, 0) + 1)
|
|
480
|
+
|
|
481
|
+
if count == 2:
|
|
482
|
+
# https://github.com/inducer/pyopencl/issues/831
|
|
483
|
+
# https://github.com/inducer/pyopencl/issues/830#issuecomment-2913538384
|
|
484
|
+
warn(f"Kernel '{attr}' has been retrieved more than once. "
|
|
485
|
+
"Each retrieval creates a new, independent kernel, "
|
|
486
|
+
"at possibly considerable expense. "
|
|
487
|
+
"To avoid the expense, reuse the retrieved kernel instance. "
|
|
488
|
+
"To avoid this warning, use cl.Kernel(prg, name).",
|
|
489
|
+
RepeatedKernelRetrieval, stacklevel=2)
|
|
490
|
+
|
|
491
|
+
if self._build_duration_info is not None:
|
|
492
|
+
build_descr, _was_cached, duration = self._build_duration_info
|
|
493
|
+
if duration > 0.2:
|
|
494
|
+
logger.info(
|
|
495
|
+
"build program: kernel '%s' was part of a "
|
|
496
|
+
"lengthy %s (%.2f s)", attr, build_descr, duration)
|
|
497
|
+
|
|
498
|
+
# don't whine about build times more than once.
|
|
499
|
+
self._build_duration_info = None
|
|
500
|
+
|
|
501
|
+
return knl
|
|
502
|
+
except LogicError as err:
|
|
503
|
+
raise AttributeError("'%s' was not found as a program "
|
|
504
|
+
"info attribute or as a kernel name" % attr) from err
|
|
505
|
+
|
|
506
|
+
# {{{ build
|
|
507
|
+
|
|
508
|
+
@classmethod
|
|
509
|
+
def _process_build_options(cls,
|
|
510
|
+
context: Context,
|
|
511
|
+
options: str | Sequence[str] | None,
|
|
512
|
+
_add_include_path: bool = False
|
|
513
|
+
) -> tuple[bytes, Sequence[str]]:
|
|
514
|
+
if options is None:
|
|
515
|
+
options = []
|
|
516
|
+
|
|
517
|
+
options = _split_options_if_necessary(options)
|
|
518
|
+
|
|
519
|
+
options = (
|
|
520
|
+
*options,
|
|
521
|
+
*_DEFAULT_BUILD_OPTIONS,
|
|
522
|
+
*_DEFAULT_INCLUDE_OPTIONS,
|
|
523
|
+
*_PLAT_BUILD_OPTIONS.get(context.devices[0].platform.name, []))
|
|
524
|
+
|
|
525
|
+
forced_options = os.environ.get("PYOPENCL_BUILD_OPTIONS")
|
|
526
|
+
if forced_options:
|
|
527
|
+
options = (
|
|
528
|
+
*options,
|
|
529
|
+
*forced_options.split())
|
|
530
|
+
|
|
531
|
+
return (
|
|
532
|
+
_options_to_bytestring(options),
|
|
533
|
+
_find_include_path(options))
|
|
534
|
+
|
|
535
|
+
def build(self,
|
|
536
|
+
options: str | Sequence[str] | None = None,
|
|
537
|
+
devices: Sequence[Device] | None = None,
|
|
538
|
+
cache_dir: str | None = None,
|
|
539
|
+
):
|
|
540
|
+
options_bytes, include_path = self._process_build_options(
|
|
541
|
+
self._context, options)
|
|
542
|
+
|
|
543
|
+
if cache_dir is None:
|
|
544
|
+
cache_dir = getattr(self._context, "cache_dir", None)
|
|
545
|
+
|
|
546
|
+
build_descr = None
|
|
547
|
+
from pyopencl.characterize import has_src_build_cache
|
|
548
|
+
|
|
549
|
+
if (
|
|
550
|
+
(_PYOPENCL_NO_CACHE or has_src_build_cache(self._context.devices[0]))
|
|
551
|
+
and self._prg is None):
|
|
552
|
+
if _PYOPENCL_NO_CACHE:
|
|
553
|
+
build_descr = "uncached source build (cache disabled by user)"
|
|
554
|
+
else:
|
|
555
|
+
build_descr = "uncached source build (assuming cached by ICD)"
|
|
556
|
+
|
|
557
|
+
self._prg = _cl._Program(self._context, self._source)
|
|
558
|
+
|
|
559
|
+
from time import time
|
|
560
|
+
start_time = time()
|
|
561
|
+
was_cached = False
|
|
562
|
+
|
|
563
|
+
if self._prg is not None:
|
|
564
|
+
# uncached
|
|
565
|
+
|
|
566
|
+
if build_descr is None:
|
|
567
|
+
build_descr = "uncached source build"
|
|
568
|
+
|
|
569
|
+
self._build_and_catch_errors(
|
|
570
|
+
lambda: self._prg.build(options_bytes, devices),
|
|
571
|
+
options_bytes=options_bytes)
|
|
572
|
+
|
|
573
|
+
else:
|
|
574
|
+
# cached
|
|
575
|
+
|
|
576
|
+
from pyopencl.cache import create_built_program_from_source_cached
|
|
577
|
+
self._prg, was_cached = self._build_and_catch_errors(
|
|
578
|
+
lambda: create_built_program_from_source_cached(
|
|
579
|
+
self._context, self._source, options_bytes, devices,
|
|
580
|
+
cache_dir=cache_dir, include_path=include_path),
|
|
581
|
+
options_bytes=options_bytes, source=self._source)
|
|
582
|
+
|
|
583
|
+
if was_cached:
|
|
584
|
+
build_descr = "cache retrieval"
|
|
585
|
+
else:
|
|
586
|
+
build_descr = "source build resulting from a binary cache miss"
|
|
587
|
+
|
|
588
|
+
del self._context
|
|
589
|
+
|
|
590
|
+
end_time = time()
|
|
591
|
+
|
|
592
|
+
self._build_duration_info = (build_descr, was_cached, end_time-start_time)
|
|
593
|
+
|
|
594
|
+
return self
|
|
595
|
+
|
|
596
|
+
def _build_and_catch_errors(self,
|
|
597
|
+
build_func: Callable[[], RetT],
|
|
598
|
+
options_bytes: bytes,
|
|
599
|
+
source: str | None = None,
|
|
600
|
+
):
|
|
601
|
+
try:
|
|
602
|
+
return build_func()
|
|
603
|
+
except RuntimeError as e:
|
|
604
|
+
msg = str(e)
|
|
605
|
+
if options_bytes:
|
|
606
|
+
msg = msg + "\n(options: %s)" % options_bytes.decode("utf-8")
|
|
607
|
+
|
|
608
|
+
if source is not None:
|
|
609
|
+
from tempfile import NamedTemporaryFile
|
|
610
|
+
srcfile = NamedTemporaryFile(mode="wt", delete=False, suffix=".cl")
|
|
611
|
+
try:
|
|
612
|
+
srcfile.write(source)
|
|
613
|
+
finally:
|
|
614
|
+
srcfile.close()
|
|
615
|
+
|
|
616
|
+
msg = msg + "\n(source saved as %s)" % srcfile.name
|
|
617
|
+
|
|
618
|
+
code = e.code
|
|
619
|
+
routine = e.routine
|
|
620
|
+
|
|
621
|
+
err = RuntimeError(
|
|
622
|
+
_cl._ErrorRecord(
|
|
623
|
+
msg=msg,
|
|
624
|
+
code=code,
|
|
625
|
+
routine=routine))
|
|
626
|
+
|
|
627
|
+
# Python 3.2 outputs the whole list of currently active exceptions
|
|
628
|
+
# This serves to remove one (redundant) level from that nesting.
|
|
629
|
+
raise err
|
|
630
|
+
|
|
631
|
+
# }}}
|
|
632
|
+
|
|
633
|
+
def compile(self,
|
|
634
|
+
options: str | Sequence[str] | None = None,
|
|
635
|
+
devices: Sequence[Device] | None = None,
|
|
636
|
+
headers: Sequence[tuple[str, Program]] | None = None
|
|
637
|
+
):
|
|
638
|
+
if headers is None:
|
|
639
|
+
headers = []
|
|
640
|
+
|
|
641
|
+
options_bytes, _ = self._process_build_options(self._context, options)
|
|
642
|
+
|
|
643
|
+
self._get_prg().compile(options_bytes, devices,
|
|
644
|
+
[(name, prg._get_prg()) for name, prg in headers])
|
|
645
|
+
return self
|
|
646
|
+
|
|
647
|
+
@override
|
|
648
|
+
def __eq__(self, other: object):
|
|
649
|
+
return (
|
|
650
|
+
isinstance(other, Program)
|
|
651
|
+
and self._get_prg() == other._get_prg())
|
|
652
|
+
|
|
653
|
+
@override
|
|
654
|
+
def __hash__(self):
|
|
655
|
+
return hash(self._get_prg())
|
|
656
|
+
|
|
657
|
+
reference_count: int # pyright: ignore[reportUninitializedInstanceVariable]
|
|
658
|
+
context: Context # pyright: ignore[reportUninitializedInstanceVariable]
|
|
659
|
+
num_devices: int # pyright: ignore[reportUninitializedInstanceVariable]
|
|
660
|
+
devices: Sequence[Device] # pyright: ignore[reportUninitializedInstanceVariable]
|
|
661
|
+
source: str # pyright: ignore[reportUninitializedInstanceVariable]
|
|
662
|
+
binary_sizes: int # pyright: ignore[reportUninitializedInstanceVariable]
|
|
663
|
+
binaries: Sequence[bytes] # pyright: ignore[reportUninitializedInstanceVariable]
|
|
664
|
+
num_kernels: int # pyright: ignore[reportUninitializedInstanceVariable]
|
|
665
|
+
kernel_names: str # pyright: ignore[reportUninitializedInstanceVariable]
|
|
666
|
+
il: bytes # pyright: ignore[reportUninitializedInstanceVariable]
|
|
667
|
+
scope_global_ctors_present: bool # pyright: ignore[reportUninitializedInstanceVariable]
|
|
668
|
+
scope_global_dtors_present: bool # pyright: ignore[reportUninitializedInstanceVariable]
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
pyopencl._monkeypatch.add_get_info(Program, Program.get_info, _cl.program_info)
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def create_program_with_built_in_kernels(context, devices, kernel_names):
|
|
675
|
+
if not isinstance(kernel_names, str):
|
|
676
|
+
kernel_names = ":".join(kernel_names)
|
|
677
|
+
|
|
678
|
+
return Program(_Program.create_with_built_in_kernels(
|
|
679
|
+
context, devices, kernel_names))
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def link_program(context, programs, options=None, devices=None):
|
|
683
|
+
if options is None:
|
|
684
|
+
options = []
|
|
685
|
+
|
|
686
|
+
options_bytes = _options_to_bytestring(_split_options_if_necessary(options))
|
|
687
|
+
programs = [prg._get_prg() for prg in programs]
|
|
688
|
+
raw_prg = _Program.link(context, programs, options_bytes, devices)
|
|
689
|
+
return Program(raw_prg)
|
|
690
|
+
|
|
691
|
+
# }}}
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
# {{{ _OverriddenArrayInterfaceSVMAllocation
|
|
695
|
+
|
|
696
|
+
if get_cl_header_version() >= (2, 0):
|
|
697
|
+
class _OverriddenArrayInterfaceSVMAllocation(SVMAllocation):
|
|
698
|
+
def __init__(self, ctx, size, alignment, flags, *, _interface,
|
|
699
|
+
queue=None):
|
|
700
|
+
"""
|
|
701
|
+
:arg ctx: a :class:`Context`
|
|
702
|
+
:arg flags: some of :class:`svm_mem_flags`.
|
|
703
|
+
"""
|
|
704
|
+
super().__init__(ctx, size, alignment, flags, queue)
|
|
705
|
+
|
|
706
|
+
# mem_flags.READ_ONLY applies to kernels, not the host
|
|
707
|
+
read_write = True
|
|
708
|
+
_interface["data"] = (int(self.svm_ptr), not read_write)
|
|
709
|
+
|
|
710
|
+
self.__array_interface__ = _interface
|
|
711
|
+
|
|
712
|
+
# }}}
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
# {{{ create_image
|
|
716
|
+
|
|
717
|
+
def create_image(context, flags, format, shape=None, pitches=None,
|
|
718
|
+
hostbuf=None, is_array=False, buffer=None) -> Image:
|
|
719
|
+
"""
|
|
720
|
+
See :class:`mem_flags` for values of *flags*.
|
|
721
|
+
*shape* is a 2- or 3-tuple. *format* is an instance of :class:`ImageFormat`.
|
|
722
|
+
*pitches* is a 1-tuple for 2D images and a 2-tuple for 3D images, indicating
|
|
723
|
+
the distance in bytes from one scan line to the next, and from one 2D image
|
|
724
|
+
slice to the next.
|
|
725
|
+
|
|
726
|
+
If *hostbuf* is given and *shape* is *None*, then *hostbuf.shape* is
|
|
727
|
+
used as the *shape* parameter.
|
|
728
|
+
|
|
729
|
+
:class:`Image` inherits from :class:`MemoryObject`.
|
|
730
|
+
|
|
731
|
+
.. note::
|
|
732
|
+
|
|
733
|
+
If you want to load images from :class:`numpy.ndarray` instances or read images
|
|
734
|
+
back into them, be aware that OpenCL images expect the *x* dimension to vary
|
|
735
|
+
fastest, whereas in the default (C) order of :mod:`numpy` arrays, the last index
|
|
736
|
+
varies fastest. If your array is arranged in the wrong order in memory,
|
|
737
|
+
there are two possible fixes for this:
|
|
738
|
+
|
|
739
|
+
* Convert the array to Fortran (column-major) order using :func:`numpy.asarray`.
|
|
740
|
+
|
|
741
|
+
* Pass *ary.T.copy()* to the image creation function.
|
|
742
|
+
|
|
743
|
+
.. versionadded:: 2024.3
|
|
744
|
+
"""
|
|
745
|
+
|
|
746
|
+
return Image(context, flags, format, shape=shape, pitches=pitches,
|
|
747
|
+
hostbuf=hostbuf, is_array=is_array, buffer=buffer,
|
|
748
|
+
_through_create_image=True)
|
|
749
|
+
|
|
750
|
+
# }}}
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
# {{{ create_some_context
|
|
754
|
+
|
|
755
|
+
def choose_devices(interactive: bool | None = None,
|
|
756
|
+
answers: list[str] | None = None) -> list[Device]:
|
|
757
|
+
"""
|
|
758
|
+
Choose :class:`Device` instances 'somehow'.
|
|
759
|
+
|
|
760
|
+
:arg interactive: If multiple choices for platform and/or device exist,
|
|
761
|
+
*interactive* is ``True`` (or ``None`` and ``sys.stdin.isatty()``
|
|
762
|
+
returns ``True``), then the user is queried about which device should be
|
|
763
|
+
chosen. Otherwise, a device is chosen in an implementation-defined
|
|
764
|
+
manner.
|
|
765
|
+
:arg answers: A sequence of strings that will be used to answer the
|
|
766
|
+
platform/device selection questions.
|
|
767
|
+
|
|
768
|
+
:returns: a list of :class:`Device` instances.
|
|
769
|
+
"""
|
|
770
|
+
|
|
771
|
+
if answers is None:
|
|
772
|
+
if "PYOPENCL_CTX" in os.environ:
|
|
773
|
+
ctx_spec = os.environ["PYOPENCL_CTX"]
|
|
774
|
+
answers = ctx_spec.split(":")
|
|
775
|
+
|
|
776
|
+
if "PYOPENCL_TEST" in os.environ:
|
|
777
|
+
from pyopencl.tools import get_test_platforms_and_devices
|
|
778
|
+
for _plat, devs in get_test_platforms_and_devices():
|
|
779
|
+
for dev in devs:
|
|
780
|
+
return [dev]
|
|
781
|
+
|
|
782
|
+
if answers is not None:
|
|
783
|
+
pre_provided_answers = answers
|
|
784
|
+
answers = answers[:]
|
|
785
|
+
else:
|
|
786
|
+
pre_provided_answers = None
|
|
787
|
+
|
|
788
|
+
user_inputs = []
|
|
789
|
+
|
|
790
|
+
if interactive is None:
|
|
791
|
+
interactive = True
|
|
792
|
+
try:
|
|
793
|
+
if not sys.stdin.isatty():
|
|
794
|
+
interactive = False
|
|
795
|
+
except Exception:
|
|
796
|
+
interactive = False
|
|
797
|
+
|
|
798
|
+
def cc_print(s):
|
|
799
|
+
if interactive:
|
|
800
|
+
print(s)
|
|
801
|
+
|
|
802
|
+
def get_input(prompt):
|
|
803
|
+
if answers:
|
|
804
|
+
return str(answers.pop(0))
|
|
805
|
+
elif not interactive:
|
|
806
|
+
return ""
|
|
807
|
+
else:
|
|
808
|
+
user_input = input(prompt)
|
|
809
|
+
user_inputs.append(user_input)
|
|
810
|
+
return user_input
|
|
811
|
+
|
|
812
|
+
# {{{ pick a platform
|
|
813
|
+
|
|
814
|
+
try:
|
|
815
|
+
platforms = get_platforms()
|
|
816
|
+
except LogicError as e:
|
|
817
|
+
if "PLATFORM_NOT_FOUND_KHR" in str(e):
|
|
818
|
+
# With the cl_khr_icd extension, clGetPlatformIDs fails if no platform
|
|
819
|
+
# is available:
|
|
820
|
+
# https://registry.khronos.org/OpenCL/sdk/3.0/docs/man/html/clGetPlatformIDs.html
|
|
821
|
+
raise RuntimeError("no CL platforms available to ICD loader. "
|
|
822
|
+
"Install a CL driver "
|
|
823
|
+
"('ICD', such as pocl, rocm, Intel CL) to fix this. "
|
|
824
|
+
"See pyopencl docs for help: "
|
|
825
|
+
"https://documen.tician.de/pyopencl/"
|
|
826
|
+
"misc.html#installation") from e
|
|
827
|
+
else:
|
|
828
|
+
raise
|
|
829
|
+
|
|
830
|
+
if not platforms:
|
|
831
|
+
raise Error("no platforms found")
|
|
832
|
+
else:
|
|
833
|
+
if not answers:
|
|
834
|
+
cc_print("Choose platform:")
|
|
835
|
+
for i, pf in enumerate(platforms):
|
|
836
|
+
cc_print("[%d] %s" % (i, pf))
|
|
837
|
+
|
|
838
|
+
answer = get_input("Choice [0]:")
|
|
839
|
+
if not answer:
|
|
840
|
+
platform = platforms[0]
|
|
841
|
+
else:
|
|
842
|
+
platform = None
|
|
843
|
+
try:
|
|
844
|
+
int_choice = int(answer)
|
|
845
|
+
except ValueError:
|
|
846
|
+
pass
|
|
847
|
+
else:
|
|
848
|
+
if 0 <= int_choice < len(platforms):
|
|
849
|
+
platform = platforms[int_choice]
|
|
850
|
+
|
|
851
|
+
if platform is None:
|
|
852
|
+
answer = answer.lower()
|
|
853
|
+
for pf in platforms:
|
|
854
|
+
if answer in pf.name.lower():
|
|
855
|
+
platform = pf
|
|
856
|
+
if platform is None:
|
|
857
|
+
raise RuntimeError("input did not match any platform")
|
|
858
|
+
|
|
859
|
+
# }}}
|
|
860
|
+
|
|
861
|
+
# {{{ pick a device
|
|
862
|
+
|
|
863
|
+
devices = platform.get_devices()
|
|
864
|
+
|
|
865
|
+
def parse_device(choice):
|
|
866
|
+
try:
|
|
867
|
+
int_choice = int(choice)
|
|
868
|
+
except ValueError:
|
|
869
|
+
pass
|
|
870
|
+
else:
|
|
871
|
+
if 0 <= int_choice < len(devices):
|
|
872
|
+
return devices[int_choice]
|
|
873
|
+
|
|
874
|
+
choice = choice.lower()
|
|
875
|
+
for dev in devices:
|
|
876
|
+
if choice in dev.name.lower():
|
|
877
|
+
return dev
|
|
878
|
+
raise RuntimeError("input did not match any device")
|
|
879
|
+
|
|
880
|
+
if not devices:
|
|
881
|
+
raise Error("no devices found")
|
|
882
|
+
elif len(devices) == 1 and not answers:
|
|
883
|
+
cc_print(f"Choosing only available device: {devices[0]}")
|
|
884
|
+
pass
|
|
885
|
+
else:
|
|
886
|
+
if not answers:
|
|
887
|
+
cc_print("Choose device(s):")
|
|
888
|
+
for i, dev in enumerate(devices):
|
|
889
|
+
cc_print("[%d] %s" % (i, dev))
|
|
890
|
+
|
|
891
|
+
answer = get_input("Choice, comma-separated [0]:")
|
|
892
|
+
if not answer:
|
|
893
|
+
devices = [devices[0]]
|
|
894
|
+
else:
|
|
895
|
+
devices = [parse_device(i) for i in answer.split(",")]
|
|
896
|
+
|
|
897
|
+
# }}}
|
|
898
|
+
|
|
899
|
+
if user_inputs:
|
|
900
|
+
if pre_provided_answers is not None:
|
|
901
|
+
user_inputs = pre_provided_answers + user_inputs
|
|
902
|
+
cc_print("Set the environment variable PYOPENCL_CTX='%s' to "
|
|
903
|
+
"avoid being asked again." % ":".join(user_inputs))
|
|
904
|
+
|
|
905
|
+
if answers:
|
|
906
|
+
raise RuntimeError("not all provided choices were used by "
|
|
907
|
+
"choose_devices. (left over: '%s')" % ":".join(answers))
|
|
908
|
+
|
|
909
|
+
return devices
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
def create_some_context(interactive: bool | None = None,
|
|
913
|
+
answers: list[str] | None = None) -> Context:
|
|
914
|
+
"""
|
|
915
|
+
Create a :class:`Context` 'somehow'.
|
|
916
|
+
|
|
917
|
+
:arg interactive: If multiple choices for platform and/or device exist,
|
|
918
|
+
*interactive* is ``True`` (or ``None`` and ``sys.stdin.isatty()``
|
|
919
|
+
returns ``True``), then the user is queried about which device should be
|
|
920
|
+
chosen. Otherwise, a device is chosen in an implementation-defined
|
|
921
|
+
manner.
|
|
922
|
+
:arg answers: A sequence of strings that will be used to answer the
|
|
923
|
+
platform/device selection questions.
|
|
924
|
+
|
|
925
|
+
:returns: an instance of :class:`Context`.
|
|
926
|
+
"""
|
|
927
|
+
devices = choose_devices(interactive, answers)
|
|
928
|
+
|
|
929
|
+
return Context(devices)
|
|
930
|
+
|
|
931
|
+
|
|
932
|
+
_csc = create_some_context
|
|
933
|
+
|
|
934
|
+
# }}}
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
# {{{ SVMMap
|
|
938
|
+
|
|
939
|
+
@dataclass
|
|
940
|
+
class SVMMap(Generic[SVMInnerT]):
|
|
941
|
+
"""
|
|
942
|
+
Returned by :func:`SVMPointer.map` and :func:`SVM.map`.
|
|
943
|
+
This class may also be used as a context manager in a ``with`` statement.
|
|
944
|
+
:meth:`release` will be called upon exit from the ``with`` region.
|
|
945
|
+
The value returned to the ``as`` part of the context manager is the
|
|
946
|
+
mapped Python object (e.g. a :mod:`numpy` array).
|
|
947
|
+
|
|
948
|
+
.. versionadded:: 2016.2
|
|
949
|
+
|
|
950
|
+
.. property:: event
|
|
951
|
+
|
|
952
|
+
The :class:`Event` returned when mapping the memory.
|
|
953
|
+
|
|
954
|
+
.. automethod:: release
|
|
955
|
+
|
|
956
|
+
"""
|
|
957
|
+
svm: SVM[SVMInnerT] | None
|
|
958
|
+
array: SVMInnerT
|
|
959
|
+
queue: CommandQueue
|
|
960
|
+
event: Event
|
|
961
|
+
|
|
962
|
+
def __del__(self):
|
|
963
|
+
if self.svm is not None:
|
|
964
|
+
self.release()
|
|
965
|
+
|
|
966
|
+
def __enter__(self):
|
|
967
|
+
return self.array
|
|
968
|
+
|
|
969
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
970
|
+
self.release()
|
|
971
|
+
|
|
972
|
+
def release(self,
|
|
973
|
+
queue: CommandQueue | None = None,
|
|
974
|
+
wait_for: WaitList = None
|
|
975
|
+
) -> Event:
|
|
976
|
+
"""
|
|
977
|
+
:arg queue: a :class:`pyopencl.CommandQueue`. Defaults to the one
|
|
978
|
+
with which the map was created, if not specified.
|
|
979
|
+
:returns: a :class:`pyopencl.Event`
|
|
980
|
+
|
|
981
|
+
|std-enqueue-blurb|
|
|
982
|
+
"""
|
|
983
|
+
|
|
984
|
+
assert self.svm is not None
|
|
985
|
+
evt = self.svm._enqueue_unmap(queue or self.queue, wait_for)
|
|
986
|
+
self.svm = None
|
|
987
|
+
|
|
988
|
+
return evt
|
|
989
|
+
|
|
990
|
+
# }}}
|
|
991
|
+
|
|
992
|
+
|
|
993
|
+
# {{{ enqueue_copy
|
|
994
|
+
|
|
995
|
+
_IMAGE_MEM_OBJ_TYPES = [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]
|
|
996
|
+
if get_cl_header_version() >= (1, 2):
|
|
997
|
+
_IMAGE_MEM_OBJ_TYPES.append(mem_object_type.IMAGE2D_ARRAY)
|
|
998
|
+
|
|
999
|
+
|
|
1000
|
+
@overload
|
|
1001
|
+
def enqueue_copy(
|
|
1002
|
+
queue: CommandQueue,
|
|
1003
|
+
dest: Buffer,
|
|
1004
|
+
src: HasBufferInterface,
|
|
1005
|
+
*,
|
|
1006
|
+
dst_offset: int = 0,
|
|
1007
|
+
is_blocking: bool = True,
|
|
1008
|
+
wait_for: WaitList = None
|
|
1009
|
+
) -> Event: ...
|
|
1010
|
+
|
|
1011
|
+
@overload
|
|
1012
|
+
def enqueue_copy(
|
|
1013
|
+
queue: CommandQueue,
|
|
1014
|
+
dest: HasBufferInterface,
|
|
1015
|
+
src: Buffer,
|
|
1016
|
+
*,
|
|
1017
|
+
src_offset: int = 0,
|
|
1018
|
+
is_blocking: bool = True,
|
|
1019
|
+
wait_for: WaitList = None
|
|
1020
|
+
) -> Event: ...
|
|
1021
|
+
|
|
1022
|
+
@overload
|
|
1023
|
+
def enqueue_copy(
|
|
1024
|
+
queue: CommandQueue,
|
|
1025
|
+
dest: Buffer,
|
|
1026
|
+
src: Buffer,
|
|
1027
|
+
*,
|
|
1028
|
+
src_offset: int = 0,
|
|
1029
|
+
dst_offset: int = 0,
|
|
1030
|
+
byte_count: int | None = None,
|
|
1031
|
+
wait_for: WaitList = None
|
|
1032
|
+
) -> Event: ...
|
|
1033
|
+
|
|
1034
|
+
@overload
|
|
1035
|
+
def enqueue_copy(
|
|
1036
|
+
queue: CommandQueue,
|
|
1037
|
+
dest: Buffer,
|
|
1038
|
+
src: HasBufferInterface,
|
|
1039
|
+
*,
|
|
1040
|
+
origin: tuple[int, ...],
|
|
1041
|
+
host_origin: tuple[int, ...],
|
|
1042
|
+
region: tuple[int, ...],
|
|
1043
|
+
buffer_pitches: tuple[int, ...] | None = None,
|
|
1044
|
+
host_pitches: tuple[int, ...] | None = None,
|
|
1045
|
+
is_blocking: bool = True,
|
|
1046
|
+
wait_for: WaitList = None
|
|
1047
|
+
) -> Event: ...
|
|
1048
|
+
|
|
1049
|
+
@overload
|
|
1050
|
+
def enqueue_copy(
|
|
1051
|
+
queue: CommandQueue,
|
|
1052
|
+
dest: HasBufferInterface,
|
|
1053
|
+
src: Buffer,
|
|
1054
|
+
*,
|
|
1055
|
+
origin: tuple[int, ...],
|
|
1056
|
+
host_origin: tuple[int, ...],
|
|
1057
|
+
region: tuple[int, ...],
|
|
1058
|
+
buffer_pitches: tuple[int, ...] | None = None,
|
|
1059
|
+
host_pitches: tuple[int, ...] | None = None,
|
|
1060
|
+
is_blocking: bool = True,
|
|
1061
|
+
wait_for: WaitList = None
|
|
1062
|
+
) -> Event: ...
|
|
1063
|
+
|
|
1064
|
+
@overload
|
|
1065
|
+
def enqueue_copy(
|
|
1066
|
+
queue: CommandQueue,
|
|
1067
|
+
dest: Buffer,
|
|
1068
|
+
src: Buffer,
|
|
1069
|
+
*,
|
|
1070
|
+
src_origin: tuple[int, ...],
|
|
1071
|
+
dst_origin: tuple[int, ...],
|
|
1072
|
+
region: tuple[int, ...],
|
|
1073
|
+
src_pitches: tuple[int, ...] | None = None,
|
|
1074
|
+
dst_pitches: tuple[int, ...] | None = None,
|
|
1075
|
+
wait_for: WaitList = None
|
|
1076
|
+
) -> Event: ...
|
|
1077
|
+
|
|
1078
|
+
@overload
|
|
1079
|
+
def enqueue_copy(
|
|
1080
|
+
queue: CommandQueue,
|
|
1081
|
+
dest: HasBufferInterface,
|
|
1082
|
+
src: Image,
|
|
1083
|
+
*,
|
|
1084
|
+
origin: tuple[int, ...],
|
|
1085
|
+
region: tuple[int, ...],
|
|
1086
|
+
pitches: tuple[int, ...] | None = None,
|
|
1087
|
+
is_blocking: bool = True,
|
|
1088
|
+
wait_for: WaitList = None
|
|
1089
|
+
) -> Event: ...
|
|
1090
|
+
|
|
1091
|
+
@overload
|
|
1092
|
+
def enqueue_copy(
|
|
1093
|
+
queue: CommandQueue,
|
|
1094
|
+
dest: Image,
|
|
1095
|
+
src: HasBufferInterface,
|
|
1096
|
+
*,
|
|
1097
|
+
origin: tuple[int, ...],
|
|
1098
|
+
region: tuple[int, ...],
|
|
1099
|
+
pitches: tuple[int, ...] | None = None,
|
|
1100
|
+
is_blocking: bool = True,
|
|
1101
|
+
wait_for: WaitList = None
|
|
1102
|
+
) -> Event: ...
|
|
1103
|
+
|
|
1104
|
+
@overload
|
|
1105
|
+
def enqueue_copy(
|
|
1106
|
+
queue: CommandQueue,
|
|
1107
|
+
dest: Image,
|
|
1108
|
+
src: Buffer,
|
|
1109
|
+
*,
|
|
1110
|
+
origin: tuple[int, ...],
|
|
1111
|
+
region: tuple[int, ...],
|
|
1112
|
+
pitches: tuple[int, ...] | None = None,
|
|
1113
|
+
wait_for: WaitList = None
|
|
1114
|
+
) -> Event: ...
|
|
1115
|
+
|
|
1116
|
+
@overload
|
|
1117
|
+
def enqueue_copy(
|
|
1118
|
+
queue: CommandQueue,
|
|
1119
|
+
dest: Buffer,
|
|
1120
|
+
src: Image,
|
|
1121
|
+
*,
|
|
1122
|
+
origin: tuple[int, ...],
|
|
1123
|
+
region: tuple[int, ...],
|
|
1124
|
+
pitches: tuple[int, ...] | None = None,
|
|
1125
|
+
wait_for: WaitList = None
|
|
1126
|
+
) -> Event: ...
|
|
1127
|
+
|
|
1128
|
+
@overload
|
|
1129
|
+
def enqueue_copy(
|
|
1130
|
+
queue: CommandQueue,
|
|
1131
|
+
dest: Image,
|
|
1132
|
+
src: Image,
|
|
1133
|
+
*,
|
|
1134
|
+
src_origin: tuple[int, ...],
|
|
1135
|
+
dest_origin: tuple[int, ...],
|
|
1136
|
+
region: tuple[int, ...],
|
|
1137
|
+
wait_for: WaitList = None
|
|
1138
|
+
) -> Event: ...
|
|
1139
|
+
|
|
1140
|
+
@overload
|
|
1141
|
+
def enqueue_copy(
|
|
1142
|
+
queue: CommandQueue,
|
|
1143
|
+
dest: SVMPointer | HasBufferInterface,
|
|
1144
|
+
src: SVMPointer | HasBufferInterface,
|
|
1145
|
+
*,
|
|
1146
|
+
byte_count: int | None = None,
|
|
1147
|
+
|
|
1148
|
+
# do not use, must be zero
|
|
1149
|
+
src_offset: int = 0,
|
|
1150
|
+
dst_offset: int = 0,
|
|
1151
|
+
|
|
1152
|
+
is_blocking: bool = True,
|
|
1153
|
+
wait_for: WaitList = None
|
|
1154
|
+
) -> Event: ...
|
|
1155
|
+
|
|
1156
|
+
|
|
1157
|
+
def enqueue_copy(queue: CommandQueue, dest: Any, src: Any, **kwargs: Any) -> Event:
|
|
1158
|
+
"""Copy from :class:`Image`, :class:`Buffer` or the host to
|
|
1159
|
+
:class:`Image`, :class:`Buffer` or the host. (Note: host-to-host
|
|
1160
|
+
copies are unsupported.)
|
|
1161
|
+
|
|
1162
|
+
The following keyword arguments are available:
|
|
1163
|
+
|
|
1164
|
+
:arg wait_for: (optional, default empty)
|
|
1165
|
+
:arg is_blocking: Wait for completion. Defaults to *True*.
|
|
1166
|
+
(Available on any copy involving host memory)
|
|
1167
|
+
:return: A :class:`NannyEvent` if the transfer involved a
|
|
1168
|
+
host-side buffer, otherwise an :class:`Event`.
|
|
1169
|
+
|
|
1170
|
+
.. note::
|
|
1171
|
+
|
|
1172
|
+
Be aware that the deletion of the :class:`NannyEvent` that is
|
|
1173
|
+
returned by the function if the transfer involved a host-side buffer
|
|
1174
|
+
will block until the transfer is complete, so be sure to keep a
|
|
1175
|
+
reference to this :class:`Event` until the
|
|
1176
|
+
transfer has completed.
|
|
1177
|
+
|
|
1178
|
+
.. note::
|
|
1179
|
+
|
|
1180
|
+
Two types of 'buffer' occur in the arguments to this function,
|
|
1181
|
+
:class:`Buffer` and 'host-side buffers'. The latter are
|
|
1182
|
+
defined by Python and commonly called `buffer objects
|
|
1183
|
+
<https://docs.python.org/3/c-api/buffer.html>`__. :mod:`numpy`
|
|
1184
|
+
arrays are a very common example.
|
|
1185
|
+
Make sure to always be clear on whether a :class:`Buffer` or a
|
|
1186
|
+
Python buffer object is needed.
|
|
1187
|
+
|
|
1188
|
+
.. ------------------------------------------------------------------------
|
|
1189
|
+
.. rubric :: Transfer :class:`Buffer` ↔ host
|
|
1190
|
+
.. ------------------------------------------------------------------------
|
|
1191
|
+
|
|
1192
|
+
:arg src_offset: offset in bytes (optional)
|
|
1193
|
+
|
|
1194
|
+
May only be nonzero if applied on the device side.
|
|
1195
|
+
|
|
1196
|
+
:arg dst_offset: offset in bytes (optional)
|
|
1197
|
+
|
|
1198
|
+
May only be nonzero if applied on the device side.
|
|
1199
|
+
|
|
1200
|
+
.. note::
|
|
1201
|
+
|
|
1202
|
+
The size of the transfer is controlled by the size of the
|
|
1203
|
+
of the host-side buffer. If the host-side buffer
|
|
1204
|
+
is a :class:`numpy.ndarray`, you can control the transfer size by
|
|
1205
|
+
transferring into a smaller 'view' of the target array, like this::
|
|
1206
|
+
|
|
1207
|
+
cl.enqueue_copy(queue, large_dest_numpy_array[:15], src_buffer)
|
|
1208
|
+
|
|
1209
|
+
.. ------------------------------------------------------------------------
|
|
1210
|
+
.. rubric :: Transfer :class:`Buffer` ↔ :class:`Buffer`
|
|
1211
|
+
.. ------------------------------------------------------------------------
|
|
1212
|
+
|
|
1213
|
+
:arg byte_count: (optional) If not specified, defaults to the
|
|
1214
|
+
size of the source in versions 2012.x and earlier,
|
|
1215
|
+
and to the minimum of the size of the source and target
|
|
1216
|
+
from 2013.1 on.
|
|
1217
|
+
:arg src_offset: (optional)
|
|
1218
|
+
:arg dst_offset: (optional)
|
|
1219
|
+
|
|
1220
|
+
.. ------------------------------------------------------------------------
|
|
1221
|
+
.. rubric :: Rectangular :class:`Buffer` ↔ host transfers (CL 1.1 and newer)
|
|
1222
|
+
.. ------------------------------------------------------------------------
|
|
1223
|
+
|
|
1224
|
+
:arg buffer_origin: :class:`tuple` of :class:`int` of length
|
|
1225
|
+
three or shorter. (mandatory)
|
|
1226
|
+
:arg host_origin: :class:`tuple` of :class:`int` of length
|
|
1227
|
+
three or shorter. (mandatory)
|
|
1228
|
+
:arg region: :class:`tuple` of :class:`int` of length
|
|
1229
|
+
three or shorter. (mandatory)
|
|
1230
|
+
:arg buffer_pitches: :class:`tuple` of :class:`int` of length
|
|
1231
|
+
two or shorter. (optional, "tightly-packed" if unspecified)
|
|
1232
|
+
:arg host_pitches: :class:`tuple` of :class:`int` of length
|
|
1233
|
+
two or shorter. (optional, "tightly-packed" if unspecified)
|
|
1234
|
+
|
|
1235
|
+
.. ------------------------------------------------------------------------
|
|
1236
|
+
.. rubric :: Rectangular :class:`Buffer` ↔ :class:`Buffer`
|
|
1237
|
+
transfers (CL 1.1 and newer)
|
|
1238
|
+
.. ------------------------------------------------------------------------
|
|
1239
|
+
|
|
1240
|
+
:arg src_origin: :class:`tuple` of :class:`int` of length
|
|
1241
|
+
three or shorter. (mandatory)
|
|
1242
|
+
:arg dst_origin: :class:`tuple` of :class:`int` of length
|
|
1243
|
+
three or shorter. (mandatory)
|
|
1244
|
+
:arg region: :class:`tuple` of :class:`int` of length
|
|
1245
|
+
three or shorter. (mandatory)
|
|
1246
|
+
:arg src_pitches: :class:`tuple` of :class:`int` of length
|
|
1247
|
+
two or shorter. (optional, "tightly-packed" if unspecified)
|
|
1248
|
+
:arg dst_pitches: :class:`tuple` of :class:`int` of length
|
|
1249
|
+
two or shorter. (optional, "tightly-packed" if unspecified)
|
|
1250
|
+
|
|
1251
|
+
.. ------------------------------------------------------------------------
|
|
1252
|
+
.. rubric :: Transfer :class:`Image` ↔ host
|
|
1253
|
+
.. ------------------------------------------------------------------------
|
|
1254
|
+
|
|
1255
|
+
:arg origin: :class:`tuple` of :class:`int` of length
|
|
1256
|
+
three or shorter. (mandatory)
|
|
1257
|
+
:arg region: :class:`tuple` of :class:`int` of length
|
|
1258
|
+
three or shorter. (mandatory)
|
|
1259
|
+
:arg pitches: :class:`tuple` of :class:`int` of length
|
|
1260
|
+
two or shorter. (optional)
|
|
1261
|
+
|
|
1262
|
+
.. ------------------------------------------------------------------------
|
|
1263
|
+
.. rubric :: Transfer :class:`Buffer` ↔ :class:`Image`
|
|
1264
|
+
.. ------------------------------------------------------------------------
|
|
1265
|
+
|
|
1266
|
+
:arg offset: offset in buffer (mandatory)
|
|
1267
|
+
:arg origin: :class:`tuple` of :class:`int` of length
|
|
1268
|
+
three or shorter. (mandatory)
|
|
1269
|
+
:arg region: :class:`tuple` of :class:`int` of length
|
|
1270
|
+
three or shorter. (mandatory)
|
|
1271
|
+
|
|
1272
|
+
.. ------------------------------------------------------------------------
|
|
1273
|
+
.. rubric :: Transfer :class:`Image` ↔ :class:`Image`
|
|
1274
|
+
.. ------------------------------------------------------------------------
|
|
1275
|
+
|
|
1276
|
+
:arg src_origin: :class:`tuple` of :class:`int` of length
|
|
1277
|
+
three or shorter. (mandatory)
|
|
1278
|
+
:arg dest_origin: :class:`tuple` of :class:`int` of length
|
|
1279
|
+
three or shorter. (mandatory)
|
|
1280
|
+
:arg region: :class:`tuple` of :class:`int` of length
|
|
1281
|
+
three or shorter. (mandatory)
|
|
1282
|
+
|
|
1283
|
+
.. ------------------------------------------------------------------------
|
|
1284
|
+
.. rubric :: Transfer :class:`SVMPointer`/host ↔ :class:`SVMPointer`/host
|
|
1285
|
+
.. ------------------------------------------------------------------------
|
|
1286
|
+
|
|
1287
|
+
:arg byte_count: (optional) If not specified, defaults to the
|
|
1288
|
+
size of the source in versions 2012.x and earlier,
|
|
1289
|
+
and to the minimum of the size of the source and target
|
|
1290
|
+
from 2013.1 on.
|
|
1291
|
+
|
|
1292
|
+
|std-enqueue-blurb|
|
|
1293
|
+
|
|
1294
|
+
.. versionadded:: 2011.1
|
|
1295
|
+
"""
|
|
1296
|
+
|
|
1297
|
+
if isinstance(dest, MemoryObjectHolder):
|
|
1298
|
+
if dest.type == mem_object_type.BUFFER:
|
|
1299
|
+
if isinstance(src, MemoryObjectHolder):
|
|
1300
|
+
if src.type == mem_object_type.BUFFER:
|
|
1301
|
+
# {{{ buffer -> buffer
|
|
1302
|
+
|
|
1303
|
+
if "src_origin" in kwargs:
|
|
1304
|
+
# rectangular
|
|
1305
|
+
return _cl._enqueue_copy_buffer_rect(
|
|
1306
|
+
queue, src, dest, **kwargs)
|
|
1307
|
+
else:
|
|
1308
|
+
# linear
|
|
1309
|
+
dest_offset = kwargs.pop("dest_offset", None)
|
|
1310
|
+
if dest_offset is not None:
|
|
1311
|
+
if "dst_offset" in kwargs:
|
|
1312
|
+
raise TypeError("may not specify both 'dst_offset' "
|
|
1313
|
+
"and 'dest_offset'")
|
|
1314
|
+
|
|
1315
|
+
warn("The 'dest_offset' argument of enqueue_copy "
|
|
1316
|
+
"is deprecated. Use 'dst_offset' instead. "
|
|
1317
|
+
"'dest_offset' will stop working in 2023.x.",
|
|
1318
|
+
DeprecationWarning, stacklevel=2)
|
|
1319
|
+
|
|
1320
|
+
kwargs["dst_offset"] = dest_offset
|
|
1321
|
+
|
|
1322
|
+
return _cl._enqueue_copy_buffer(queue, src, dest, **kwargs)
|
|
1323
|
+
|
|
1324
|
+
# }}}
|
|
1325
|
+
elif src.type in _IMAGE_MEM_OBJ_TYPES:
|
|
1326
|
+
return _cl._enqueue_copy_image_to_buffer(
|
|
1327
|
+
queue, src, dest, **kwargs)
|
|
1328
|
+
else:
|
|
1329
|
+
raise ValueError("invalid src mem object type")
|
|
1330
|
+
else:
|
|
1331
|
+
# {{{ host -> buffer
|
|
1332
|
+
|
|
1333
|
+
if "buffer_origin" in kwargs:
|
|
1334
|
+
return _cl._enqueue_write_buffer_rect(queue, dest, src, **kwargs)
|
|
1335
|
+
else:
|
|
1336
|
+
device_offset = kwargs.pop("device_offset", None)
|
|
1337
|
+
if device_offset is not None:
|
|
1338
|
+
if "dst_offset" in kwargs:
|
|
1339
|
+
raise TypeError("may not specify both 'device_offset' "
|
|
1340
|
+
"and 'dst_offset'")
|
|
1341
|
+
|
|
1342
|
+
warn("The 'device_offset' argument of enqueue_copy "
|
|
1343
|
+
"is deprecated. Use 'dst_offset' instead. "
|
|
1344
|
+
"'dst_offset' will stop working in 2023.x.",
|
|
1345
|
+
DeprecationWarning, stacklevel=2)
|
|
1346
|
+
|
|
1347
|
+
kwargs["dst_offset"] = device_offset
|
|
1348
|
+
|
|
1349
|
+
return _cl._enqueue_write_buffer(queue, dest, src, **kwargs)
|
|
1350
|
+
|
|
1351
|
+
# }}}
|
|
1352
|
+
|
|
1353
|
+
elif dest.type in _IMAGE_MEM_OBJ_TYPES:
|
|
1354
|
+
# {{{ ... -> image
|
|
1355
|
+
|
|
1356
|
+
if isinstance(src, MemoryObjectHolder):
|
|
1357
|
+
if src.type == mem_object_type.BUFFER:
|
|
1358
|
+
return _cl._enqueue_copy_buffer_to_image(
|
|
1359
|
+
queue, src, dest, **kwargs)
|
|
1360
|
+
elif src.type in _IMAGE_MEM_OBJ_TYPES:
|
|
1361
|
+
return _cl._enqueue_copy_image(queue, src, dest, **kwargs)
|
|
1362
|
+
else:
|
|
1363
|
+
raise ValueError("invalid src mem object type")
|
|
1364
|
+
else:
|
|
1365
|
+
# assume from-host
|
|
1366
|
+
origin = kwargs.pop("origin")
|
|
1367
|
+
region = kwargs.pop("region")
|
|
1368
|
+
|
|
1369
|
+
pitches = kwargs.pop("pitches", (0, 0))
|
|
1370
|
+
if len(pitches) == 1:
|
|
1371
|
+
kwargs["row_pitch"], = pitches
|
|
1372
|
+
else:
|
|
1373
|
+
kwargs["row_pitch"], kwargs["slice_pitch"] = pitches
|
|
1374
|
+
|
|
1375
|
+
return _cl._enqueue_write_image(
|
|
1376
|
+
queue, dest, origin, region, src, **kwargs)
|
|
1377
|
+
|
|
1378
|
+
# }}}
|
|
1379
|
+
else:
|
|
1380
|
+
raise ValueError("invalid dest mem object type")
|
|
1381
|
+
|
|
1382
|
+
elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVMPointer):
|
|
1383
|
+
# {{{ ... -> SVM
|
|
1384
|
+
|
|
1385
|
+
if not isinstance(src, SVMPointer):
|
|
1386
|
+
src = SVM(src)
|
|
1387
|
+
|
|
1388
|
+
is_blocking = kwargs.pop("is_blocking", True)
|
|
1389
|
+
|
|
1390
|
+
# These are NOT documented. They only support consistency with the
|
|
1391
|
+
# Buffer-based API for the sake of the Array.
|
|
1392
|
+
if kwargs.pop("src_offset", 0) != 0:
|
|
1393
|
+
raise ValueError("src_offset must be 0")
|
|
1394
|
+
if kwargs.pop("dst_offset", 0) != 0:
|
|
1395
|
+
raise ValueError("dst_offset must be 0")
|
|
1396
|
+
|
|
1397
|
+
return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs)
|
|
1398
|
+
|
|
1399
|
+
# }}}
|
|
1400
|
+
|
|
1401
|
+
else:
|
|
1402
|
+
# assume to-host
|
|
1403
|
+
|
|
1404
|
+
if isinstance(src, MemoryObjectHolder):
|
|
1405
|
+
if src.type == mem_object_type.BUFFER:
|
|
1406
|
+
if "buffer_origin" in kwargs:
|
|
1407
|
+
return _cl._enqueue_read_buffer_rect(queue, src, dest, **kwargs)
|
|
1408
|
+
else:
|
|
1409
|
+
device_offset = kwargs.pop("device_offset", None)
|
|
1410
|
+
if device_offset is not None:
|
|
1411
|
+
if "src_offset" in kwargs:
|
|
1412
|
+
raise TypeError("may not specify both 'device_offset' "
|
|
1413
|
+
"and 'src_offset'")
|
|
1414
|
+
|
|
1415
|
+
warn("The 'device_offset' argument of enqueue_copy "
|
|
1416
|
+
"is deprecated. Use 'src_offset' instead. "
|
|
1417
|
+
"'dst_offset' will stop working in 2023.x.",
|
|
1418
|
+
DeprecationWarning, stacklevel=2)
|
|
1419
|
+
|
|
1420
|
+
kwargs["src_offset"] = device_offset
|
|
1421
|
+
|
|
1422
|
+
return _cl._enqueue_read_buffer(queue, src, dest, **kwargs)
|
|
1423
|
+
|
|
1424
|
+
elif src.type in _IMAGE_MEM_OBJ_TYPES:
|
|
1425
|
+
origin = kwargs.pop("origin")
|
|
1426
|
+
region = kwargs.pop("region")
|
|
1427
|
+
|
|
1428
|
+
pitches = kwargs.pop("pitches", (0, 0))
|
|
1429
|
+
if len(pitches) == 1:
|
|
1430
|
+
kwargs["row_pitch"], = pitches
|
|
1431
|
+
else:
|
|
1432
|
+
kwargs["row_pitch"], kwargs["slice_pitch"] = pitches
|
|
1433
|
+
|
|
1434
|
+
return _cl._enqueue_read_image(
|
|
1435
|
+
queue, src, origin, region, dest, **kwargs)
|
|
1436
|
+
else:
|
|
1437
|
+
raise ValueError("invalid src mem object type")
|
|
1438
|
+
elif isinstance(src, SVMPointer):
|
|
1439
|
+
# {{{ svm -> host
|
|
1440
|
+
|
|
1441
|
+
# dest is not a SVM instance, otherwise we'd be in the branch above
|
|
1442
|
+
|
|
1443
|
+
# This is NOT documented. They only support consistency with the
|
|
1444
|
+
# Buffer-based API for the sake of the Array.
|
|
1445
|
+
if kwargs.pop("src_offset", 0) != 0:
|
|
1446
|
+
raise ValueError("src_offset must be 0")
|
|
1447
|
+
|
|
1448
|
+
is_blocking = kwargs.pop("is_blocking", True)
|
|
1449
|
+
return _cl._enqueue_svm_memcpy(
|
|
1450
|
+
queue, is_blocking, SVM(dest), src, **kwargs)
|
|
1451
|
+
|
|
1452
|
+
# }}}
|
|
1453
|
+
else:
|
|
1454
|
+
# assume from-host
|
|
1455
|
+
raise TypeError("enqueue_copy cannot perform host-to-host transfers")
|
|
1456
|
+
|
|
1457
|
+
# }}}
|
|
1458
|
+
|
|
1459
|
+
|
|
1460
|
+
# {{{ enqueue_fill
|
|
1461
|
+
|
|
1462
|
+
def enqueue_fill(queue: CommandQueue,
|
|
1463
|
+
dest: MemoryObjectHolder | SVMPointer,
|
|
1464
|
+
pattern: HasBufferInterface,
|
|
1465
|
+
size: int,
|
|
1466
|
+
*, offset: int = 0,
|
|
1467
|
+
wait_for: WaitList = None) -> Event:
|
|
1468
|
+
"""
|
|
1469
|
+
.. versionadded:: 2022.2
|
|
1470
|
+
"""
|
|
1471
|
+
if isinstance(dest, MemoryObjectHolder):
|
|
1472
|
+
return enqueue_fill_buffer(queue, dest, pattern, offset, size, wait_for)
|
|
1473
|
+
elif isinstance(dest, SVMPointer):
|
|
1474
|
+
if offset:
|
|
1475
|
+
raise NotImplementedError("enqueue_fill with SVM does not yet support "
|
|
1476
|
+
"offsets")
|
|
1477
|
+
return enqueue_svm_memfill(queue, dest, pattern, size, wait_for)
|
|
1478
|
+
else:
|
|
1479
|
+
raise TypeError(f"enqueue_fill does not know how to fill '{type(dest)}'")
|
|
1480
|
+
|
|
1481
|
+
# }}}
|
|
1482
|
+
|
|
1483
|
+
|
|
1484
|
+
# {{{ image creation
|
|
1485
|
+
|
|
1486
|
+
DTYPE_TO_CHANNEL_TYPE = {
|
|
1487
|
+
np.dtype(np.float32): channel_type.FLOAT,
|
|
1488
|
+
np.dtype(np.int16): channel_type.SIGNED_INT16,
|
|
1489
|
+
np.dtype(np.int32): channel_type.SIGNED_INT32,
|
|
1490
|
+
np.dtype(np.int8): channel_type.SIGNED_INT8,
|
|
1491
|
+
np.dtype(np.uint16): channel_type.UNSIGNED_INT16,
|
|
1492
|
+
np.dtype(np.uint32): channel_type.UNSIGNED_INT32,
|
|
1493
|
+
np.dtype(np.uint8): channel_type.UNSIGNED_INT8,
|
|
1494
|
+
}
|
|
1495
|
+
try:
|
|
1496
|
+
np.float16 # noqa: B018
|
|
1497
|
+
except Exception:
|
|
1498
|
+
pass
|
|
1499
|
+
else:
|
|
1500
|
+
DTYPE_TO_CHANNEL_TYPE[np.dtype(np.float16)] = channel_type.HALF_FLOAT
|
|
1501
|
+
|
|
1502
|
+
DTYPE_TO_CHANNEL_TYPE_NORM = {
|
|
1503
|
+
np.dtype(np.int16): channel_type.SNORM_INT16,
|
|
1504
|
+
np.dtype(np.int8): channel_type.SNORM_INT8,
|
|
1505
|
+
np.dtype(np.uint16): channel_type.UNORM_INT16,
|
|
1506
|
+
np.dtype(np.uint8): channel_type.UNORM_INT8,
|
|
1507
|
+
}
|
|
1508
|
+
|
|
1509
|
+
|
|
1510
|
+
def image_from_array(
|
|
1511
|
+
ctx: Context,
|
|
1512
|
+
ary: NDArray[Any],
|
|
1513
|
+
num_channels: int | None = None,
|
|
1514
|
+
mode: Literal["r"] | Literal["w"] = "r",
|
|
1515
|
+
norm_int: bool = False
|
|
1516
|
+
) -> Image:
|
|
1517
|
+
if not ary.flags.c_contiguous:
|
|
1518
|
+
raise ValueError("array must be C-contiguous")
|
|
1519
|
+
|
|
1520
|
+
dtype = ary.dtype
|
|
1521
|
+
if num_channels is None:
|
|
1522
|
+
|
|
1523
|
+
try:
|
|
1524
|
+
dtype, num_channels = \
|
|
1525
|
+
pyopencl.cltypes.vec_type_to_scalar_and_count[dtype]
|
|
1526
|
+
except KeyError:
|
|
1527
|
+
# It must be a scalar type then.
|
|
1528
|
+
num_channels = 1
|
|
1529
|
+
|
|
1530
|
+
shape = ary.shape
|
|
1531
|
+
strides = ary.strides
|
|
1532
|
+
|
|
1533
|
+
elif num_channels == 1:
|
|
1534
|
+
shape = ary.shape
|
|
1535
|
+
strides = ary.strides
|
|
1536
|
+
else:
|
|
1537
|
+
if ary.shape[-1] != num_channels:
|
|
1538
|
+
raise RuntimeError("last dimension must be equal to number of channels")
|
|
1539
|
+
|
|
1540
|
+
shape = ary.shape[:-1]
|
|
1541
|
+
strides = ary.strides[:-1]
|
|
1542
|
+
|
|
1543
|
+
if mode == "r":
|
|
1544
|
+
mode_flags = mem_flags.READ_ONLY
|
|
1545
|
+
elif mode == "w":
|
|
1546
|
+
mode_flags = mem_flags.WRITE_ONLY
|
|
1547
|
+
else:
|
|
1548
|
+
raise ValueError("invalid value '%s' for 'mode'" % mode)
|
|
1549
|
+
|
|
1550
|
+
img_format = {
|
|
1551
|
+
1: channel_order.R,
|
|
1552
|
+
2: channel_order.RG,
|
|
1553
|
+
3: channel_order.RGB,
|
|
1554
|
+
4: channel_order.RGBA,
|
|
1555
|
+
}[num_channels]
|
|
1556
|
+
|
|
1557
|
+
assert ary.strides[-1] == ary.dtype.itemsize
|
|
1558
|
+
|
|
1559
|
+
if norm_int:
|
|
1560
|
+
channel_type = DTYPE_TO_CHANNEL_TYPE_NORM[dtype]
|
|
1561
|
+
else:
|
|
1562
|
+
channel_type = DTYPE_TO_CHANNEL_TYPE[dtype]
|
|
1563
|
+
|
|
1564
|
+
return create_image(ctx, mode_flags | mem_flags.COPY_HOST_PTR,
|
|
1565
|
+
ImageFormat(img_format, channel_type),
|
|
1566
|
+
shape=shape[::-1], pitches=strides[::-1][1:],
|
|
1567
|
+
hostbuf=ary)
|
|
1568
|
+
|
|
1569
|
+
# }}}
|
|
1570
|
+
|
|
1571
|
+
|
|
1572
|
+
# {{{ enqueue_* compatibility shims
|
|
1573
|
+
|
|
1574
|
+
def enqueue_marker(queue: CommandQueue, wait_for: WaitList = None) -> Event:
|
|
1575
|
+
if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
|
|
1576
|
+
return _cl._enqueue_marker_with_wait_list(queue, wait_for)
|
|
1577
|
+
else:
|
|
1578
|
+
if wait_for:
|
|
1579
|
+
_cl._enqueue_wait_for_events(queue, wait_for)
|
|
1580
|
+
return _cl._enqueue_marker(queue)
|
|
1581
|
+
|
|
1582
|
+
|
|
1583
|
+
def enqueue_barrier(queue: CommandQueue, wait_for: WaitList = None) -> Event:
|
|
1584
|
+
if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
|
|
1585
|
+
return _cl._enqueue_barrier_with_wait_list(queue, wait_for)
|
|
1586
|
+
else:
|
|
1587
|
+
_cl._enqueue_barrier(queue)
|
|
1588
|
+
if wait_for:
|
|
1589
|
+
_cl._enqueue_wait_for_events(queue, wait_for)
|
|
1590
|
+
return _cl._enqueue_marker(queue)
|
|
1591
|
+
|
|
1592
|
+
|
|
1593
|
+
def enqueue_fill_buffer(
|
|
1594
|
+
queue: CommandQueue,
|
|
1595
|
+
mem: MemoryObjectHolder,
|
|
1596
|
+
pattern: HasBufferInterface,
|
|
1597
|
+
offset: int,
|
|
1598
|
+
size: int,
|
|
1599
|
+
wait_for: WaitList = None
|
|
1600
|
+
) -> Event:
|
|
1601
|
+
if not (queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2)):
|
|
1602
|
+
warn(
|
|
1603
|
+
"The context for this queue does not declare OpenCL 1.2 support, so "
|
|
1604
|
+
"the next thing you might see is a crash",
|
|
1605
|
+
stacklevel=2)
|
|
1606
|
+
|
|
1607
|
+
if _PYPY and isinstance(pattern, np.generic):
|
|
1608
|
+
pattern = np.asarray(pattern)
|
|
1609
|
+
|
|
1610
|
+
return _cl._enqueue_fill_buffer(queue, mem, pattern, offset, size,
|
|
1611
|
+
wait_for)
|
|
1612
|
+
|
|
1613
|
+
# }}}
|
|
1614
|
+
|
|
1615
|
+
|
|
1616
|
+
# {{{ numpy-like svm allocation
|
|
1617
|
+
|
|
1618
|
+
def enqueue_svm_memfill(
|
|
1619
|
+
queue: CommandQueue,
|
|
1620
|
+
dest: SVMPointer,
|
|
1621
|
+
pattern: HasBufferInterface,
|
|
1622
|
+
byte_count: int | None = None,
|
|
1623
|
+
wait_for: WaitList = None
|
|
1624
|
+
) -> Event:
|
|
1625
|
+
"""Fill shared virtual memory with a pattern.
|
|
1626
|
+
|
|
1627
|
+
:arg dest: a Python buffer object, or any implementation of :class:`SVMPointer`.
|
|
1628
|
+
:arg pattern: a Python buffer object (e.g. a :class:`numpy.ndarray` with the
|
|
1629
|
+
fill pattern to be used.
|
|
1630
|
+
:arg byte_count: The size of the memory to be fill. Defaults to the
|
|
1631
|
+
entirety of *dest*.
|
|
1632
|
+
|
|
1633
|
+
|std-enqueue-blurb|
|
|
1634
|
+
|
|
1635
|
+
.. versionadded:: 2016.2
|
|
1636
|
+
"""
|
|
1637
|
+
|
|
1638
|
+
if not isinstance(dest, SVMPointer):
|
|
1639
|
+
dest = SVM(dest)
|
|
1640
|
+
|
|
1641
|
+
return _cl._enqueue_svm_memfill(
|
|
1642
|
+
queue, dest, pattern, byte_count=byte_count, wait_for=wait_for)
|
|
1643
|
+
|
|
1644
|
+
|
|
1645
|
+
def enqueue_svm_migrate_mem(
|
|
1646
|
+
queue: CommandQueue,
|
|
1647
|
+
svms: Sequence[SVMPointer],
|
|
1648
|
+
flags: svm_mem_flags,
|
|
1649
|
+
wait_for: WaitList = None,
|
|
1650
|
+
):
|
|
1651
|
+
"""
|
|
1652
|
+
:arg svms: a collection of Python buffer objects (e.g. :mod:`numpy`
|
|
1653
|
+
arrays), or any implementation of :class:`SVMPointer`.
|
|
1654
|
+
:arg flags: a combination of :class:`mem_migration_flags`
|
|
1655
|
+
|
|
1656
|
+
|std-enqueue-blurb|
|
|
1657
|
+
|
|
1658
|
+
.. versionadded:: 2016.2
|
|
1659
|
+
|
|
1660
|
+
This function requires OpenCL 2.1.
|
|
1661
|
+
"""
|
|
1662
|
+
|
|
1663
|
+
return _cl._enqueue_svm_migrate_mem(queue, svms, flags, wait_for)
|
|
1664
|
+
|
|
1665
|
+
|
|
1666
|
+
def svm_empty(
|
|
1667
|
+
ctx: Context,
|
|
1668
|
+
flags: svm_mem_flags,
|
|
1669
|
+
shape: int | tuple[int, ...],
|
|
1670
|
+
dtype: DTypeT,
|
|
1671
|
+
order: Literal["F"] | Literal["C"] = "C",
|
|
1672
|
+
alignment: int | None = None,
|
|
1673
|
+
queue: CommandQueue | None = None,
|
|
1674
|
+
) -> np.ndarray[tuple[int, ...], DTypeT]:
|
|
1675
|
+
"""Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype*
|
|
1676
|
+
and *order*. (See :func:`numpy.empty` for the meaning of these arguments.)
|
|
1677
|
+
The array will be allocated in shared virtual memory belonging
|
|
1678
|
+
to *ctx*.
|
|
1679
|
+
|
|
1680
|
+
:arg ctx: a :class:`Context`
|
|
1681
|
+
:arg flags: a combination of flags from :class:`svm_mem_flags`.
|
|
1682
|
+
:arg alignment: the number of bytes to which the beginning of the memory
|
|
1683
|
+
is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*.
|
|
1684
|
+
|
|
1685
|
+
:returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute
|
|
1686
|
+
is a :class:`SVMAllocation`.
|
|
1687
|
+
|
|
1688
|
+
To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you
|
|
1689
|
+
will likely want to wrap the returned array in an :class:`SVM` tag.
|
|
1690
|
+
|
|
1691
|
+
.. versionadded:: 2016.2
|
|
1692
|
+
|
|
1693
|
+
.. versionchanged:: 2022.2
|
|
1694
|
+
|
|
1695
|
+
*queue* argument added.
|
|
1696
|
+
"""
|
|
1697
|
+
|
|
1698
|
+
dtype = np.dtype(dtype)
|
|
1699
|
+
|
|
1700
|
+
try:
|
|
1701
|
+
shape = cast("tuple[int, ...]", shape)
|
|
1702
|
+
s = 1
|
|
1703
|
+
for dim in shape:
|
|
1704
|
+
s *= dim
|
|
1705
|
+
except TypeError as err:
|
|
1706
|
+
admissible_types = (int, np.integer)
|
|
1707
|
+
|
|
1708
|
+
if not isinstance(shape, admissible_types):
|
|
1709
|
+
raise TypeError("shape must either be iterable or "
|
|
1710
|
+
"castable to an integer") from err
|
|
1711
|
+
s = shape
|
|
1712
|
+
shape = (shape,)
|
|
1713
|
+
|
|
1714
|
+
itemsize = dtype.itemsize
|
|
1715
|
+
nbytes = s * itemsize
|
|
1716
|
+
|
|
1717
|
+
from pyopencl.compyte.array import c_contiguous_strides, f_contiguous_strides
|
|
1718
|
+
|
|
1719
|
+
if order in "fF":
|
|
1720
|
+
strides = f_contiguous_strides(itemsize, shape)
|
|
1721
|
+
elif order in "cC":
|
|
1722
|
+
strides = c_contiguous_strides(itemsize, shape)
|
|
1723
|
+
else:
|
|
1724
|
+
raise ValueError("order not recognized: %s" % order)
|
|
1725
|
+
|
|
1726
|
+
descr = dtype.descr
|
|
1727
|
+
|
|
1728
|
+
interface = {
|
|
1729
|
+
"version": 3,
|
|
1730
|
+
"shape": shape,
|
|
1731
|
+
"strides": strides,
|
|
1732
|
+
}
|
|
1733
|
+
|
|
1734
|
+
if len(descr) == 1:
|
|
1735
|
+
interface["typestr"] = descr[0][1]
|
|
1736
|
+
else:
|
|
1737
|
+
interface["typestr"] = "V%d" % itemsize
|
|
1738
|
+
interface["descr"] = descr
|
|
1739
|
+
|
|
1740
|
+
if alignment is None:
|
|
1741
|
+
alignment = itemsize
|
|
1742
|
+
|
|
1743
|
+
svm_alloc = _OverriddenArrayInterfaceSVMAllocation(
|
|
1744
|
+
ctx, nbytes, alignment, flags, _interface=interface,
|
|
1745
|
+
queue=queue)
|
|
1746
|
+
return np.asarray(svm_alloc)
|
|
1747
|
+
|
|
1748
|
+
|
|
1749
|
+
def svm_empty_like(
|
|
1750
|
+
ctx: Context,
|
|
1751
|
+
flags: svm_mem_flags,
|
|
1752
|
+
ary: np.ndarray[tuple[int, ...], DTypeT],
|
|
1753
|
+
alignment: int | None = None,
|
|
1754
|
+
) -> np.ndarray[tuple[int, ...], DTypeT]:
|
|
1755
|
+
"""Allocate an empty :class:`numpy.ndarray` like the existing
|
|
1756
|
+
:class:`numpy.ndarray` *ary*. The array will be allocated in shared
|
|
1757
|
+
virtual memory belonging to *ctx*.
|
|
1758
|
+
|
|
1759
|
+
:arg alignment: the number of bytes to which the beginning of the memory
|
|
1760
|
+
is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*.
|
|
1761
|
+
|
|
1762
|
+
:returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute
|
|
1763
|
+
is a :class:`SVMAllocation`.
|
|
1764
|
+
|
|
1765
|
+
To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you
|
|
1766
|
+
will likely want to wrap the returned array in an :class:`SVM` tag.
|
|
1767
|
+
|
|
1768
|
+
.. versionadded:: 2016.2
|
|
1769
|
+
"""
|
|
1770
|
+
if ary.flags.c_contiguous:
|
|
1771
|
+
order = "C"
|
|
1772
|
+
elif ary.flags.f_contiguous:
|
|
1773
|
+
order = "F"
|
|
1774
|
+
else:
|
|
1775
|
+
raise ValueError("array is neither C- nor Fortran-contiguous")
|
|
1776
|
+
|
|
1777
|
+
return svm_empty(ctx, flags, ary.shape, ary.dtype, order,
|
|
1778
|
+
alignment=alignment)
|
|
1779
|
+
|
|
1780
|
+
|
|
1781
|
+
def csvm_empty(
|
|
1782
|
+
ctx: Context,
|
|
1783
|
+
shape: int | tuple[int, ...],
|
|
1784
|
+
dtype: DTypeT,
|
|
1785
|
+
order: Literal["F"] | Literal["C"] = "C",
|
|
1786
|
+
alignment: int | None = None,
|
|
1787
|
+
queue: CommandQueue | None = None,
|
|
1788
|
+
) -> np.ndarray[tuple[int, ...], DTypeT]:
|
|
1789
|
+
"""
|
|
1790
|
+
Like :func:`svm_empty`, but with *flags* set for a coarse-grain read-write
|
|
1791
|
+
buffer.
|
|
1792
|
+
|
|
1793
|
+
.. versionadded:: 2016.2
|
|
1794
|
+
"""
|
|
1795
|
+
return svm_empty(ctx, svm_mem_flags.READ_WRITE, shape, dtype, order,
|
|
1796
|
+
alignment, queue=queue)
|
|
1797
|
+
|
|
1798
|
+
|
|
1799
|
+
def csvm_empty_like(
|
|
1800
|
+
ctx: Context,
|
|
1801
|
+
ary: np.ndarray[tuple[int, ...], DTypeT],
|
|
1802
|
+
alignment: int | None = None,
|
|
1803
|
+
) -> np.ndarray[tuple[int, ...], DTypeT]:
|
|
1804
|
+
"""
|
|
1805
|
+
Like :func:`svm_empty_like`, but with *flags* set for a coarse-grain
|
|
1806
|
+
read-write buffer.
|
|
1807
|
+
|
|
1808
|
+
.. versionadded:: 2016.2
|
|
1809
|
+
"""
|
|
1810
|
+
return svm_empty_like(ctx, svm_mem_flags.READ_WRITE, ary, alignment)
|
|
1811
|
+
|
|
1812
|
+
|
|
1813
|
+
def fsvm_empty(
|
|
1814
|
+
ctx: Context,
|
|
1815
|
+
shape: int | tuple[int, ...],
|
|
1816
|
+
dtype: DTypeT,
|
|
1817
|
+
order: Literal["F"] | Literal["C"] = "C",
|
|
1818
|
+
alignment: int | None = None,
|
|
1819
|
+
queue: CommandQueue | None = None,
|
|
1820
|
+
) -> np.ndarray[tuple[int, ...], DTypeT]:
|
|
1821
|
+
"""
|
|
1822
|
+
Like :func:`svm_empty`, but with *flags* set for a fine-grain read-write
|
|
1823
|
+
buffer.
|
|
1824
|
+
|
|
1825
|
+
.. versionadded:: 2016.2
|
|
1826
|
+
"""
|
|
1827
|
+
return svm_empty(ctx,
|
|
1828
|
+
svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER,
|
|
1829
|
+
shape, dtype, order, alignment, queue)
|
|
1830
|
+
|
|
1831
|
+
|
|
1832
|
+
def fsvm_empty_like(
|
|
1833
|
+
ctx: Context,
|
|
1834
|
+
ary: np.ndarray[tuple[int, ...], DTypeT],
|
|
1835
|
+
alignment: int | None = None,
|
|
1836
|
+
) -> np.ndarray[tuple[int, ...], DTypeT]:
|
|
1837
|
+
"""
|
|
1838
|
+
Like :func:`svm_empty_like`, but with *flags* set for a fine-grain
|
|
1839
|
+
read-write buffer.
|
|
1840
|
+
|
|
1841
|
+
.. versionadded:: 2016.2
|
|
1842
|
+
"""
|
|
1843
|
+
return svm_empty_like(
|
|
1844
|
+
ctx,
|
|
1845
|
+
svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER,
|
|
1846
|
+
ary,
|
|
1847
|
+
alignment)
|
|
1848
|
+
|
|
1849
|
+
# }}}
|
|
1850
|
+
|
|
1851
|
+
|
|
1852
|
+
_KERNEL_ARG_CLASSES: tuple[type, ...] = (
|
|
1853
|
+
MemoryObjectHolder,
|
|
1854
|
+
Sampler,
|
|
1855
|
+
CommandQueue,
|
|
1856
|
+
LocalMemory,
|
|
1857
|
+
*([SVM] if get_cl_header_version() >= (2, 0) else [])
|
|
1858
|
+
)
|
|
1859
|
+
|
|
1860
|
+
|
|
1861
|
+
CtxFactory: TypeAlias = Callable[[], Context]
|
|
1862
|
+
|
|
1863
|
+
|
|
1864
|
+
__all__ = [
|
|
1865
|
+
"SVM",
|
|
1866
|
+
"VERSION",
|
|
1867
|
+
"VERSION_STATUS",
|
|
1868
|
+
"VERSION_TEXT",
|
|
1869
|
+
"Buffer",
|
|
1870
|
+
"CommandQueue",
|
|
1871
|
+
"Context",
|
|
1872
|
+
"CtxFactory",
|
|
1873
|
+
"Device",
|
|
1874
|
+
"DeviceTopologyAmd",
|
|
1875
|
+
"Error",
|
|
1876
|
+
"Event",
|
|
1877
|
+
"GLBuffer",
|
|
1878
|
+
"GLRenderBuffer",
|
|
1879
|
+
"GLTexture",
|
|
1880
|
+
"Image",
|
|
1881
|
+
"ImageDescriptor",
|
|
1882
|
+
"ImageFormat",
|
|
1883
|
+
"Kernel",
|
|
1884
|
+
"LocalMemory",
|
|
1885
|
+
"LogicError",
|
|
1886
|
+
"MemoryError",
|
|
1887
|
+
"MemoryMap",
|
|
1888
|
+
"MemoryObject",
|
|
1889
|
+
"MemoryObjectHolder",
|
|
1890
|
+
"NannyEvent",
|
|
1891
|
+
"Pipe",
|
|
1892
|
+
"Platform",
|
|
1893
|
+
"Program",
|
|
1894
|
+
"RuntimeError",
|
|
1895
|
+
"SVMAllocation",
|
|
1896
|
+
"SVMAllocation",
|
|
1897
|
+
"SVMMap",
|
|
1898
|
+
"SVMPointer",
|
|
1899
|
+
"Sampler",
|
|
1900
|
+
"UserEvent",
|
|
1901
|
+
"WaitList",
|
|
1902
|
+
"_csc",
|
|
1903
|
+
"addressing_mode",
|
|
1904
|
+
"channel_order",
|
|
1905
|
+
"channel_type",
|
|
1906
|
+
"choose_devices",
|
|
1907
|
+
"command_execution_status",
|
|
1908
|
+
"command_queue_info",
|
|
1909
|
+
"command_queue_properties",
|
|
1910
|
+
"command_type",
|
|
1911
|
+
"context_info",
|
|
1912
|
+
"context_properties",
|
|
1913
|
+
"create_image",
|
|
1914
|
+
"create_program_with_built_in_kernels",
|
|
1915
|
+
"create_some_context",
|
|
1916
|
+
"csvm_empty",
|
|
1917
|
+
"csvm_empty_like",
|
|
1918
|
+
"device_affinity_domain",
|
|
1919
|
+
"device_atomic_capabilities",
|
|
1920
|
+
"device_device_enqueue_capabilities",
|
|
1921
|
+
"device_exec_capabilities",
|
|
1922
|
+
"device_fp_config",
|
|
1923
|
+
"device_info",
|
|
1924
|
+
"device_local_mem_type",
|
|
1925
|
+
"device_mem_cache_type",
|
|
1926
|
+
"device_partition_property",
|
|
1927
|
+
"device_svm_capabilities",
|
|
1928
|
+
"device_topology_type_amd",
|
|
1929
|
+
"device_type",
|
|
1930
|
+
"enable_debugging",
|
|
1931
|
+
"enqueue_acquire_gl_objects",
|
|
1932
|
+
"enqueue_barrier",
|
|
1933
|
+
"enqueue_copy",
|
|
1934
|
+
"enqueue_copy_buffer_p2p_amd",
|
|
1935
|
+
"enqueue_fill",
|
|
1936
|
+
"enqueue_fill_buffer",
|
|
1937
|
+
"enqueue_fill_image",
|
|
1938
|
+
"enqueue_map_buffer",
|
|
1939
|
+
"enqueue_map_image",
|
|
1940
|
+
"enqueue_marker",
|
|
1941
|
+
"enqueue_migrate_mem_objects",
|
|
1942
|
+
"enqueue_nd_range_kernel",
|
|
1943
|
+
"enqueue_release_gl_objects",
|
|
1944
|
+
"enqueue_svm_memfill",
|
|
1945
|
+
"enqueue_svm_migrate_mem",
|
|
1946
|
+
"event_info",
|
|
1947
|
+
"filter_mode",
|
|
1948
|
+
"fsvm_empty",
|
|
1949
|
+
"fsvm_empty_like",
|
|
1950
|
+
"get_apple_cgl_share_group",
|
|
1951
|
+
"get_cl_header_version",
|
|
1952
|
+
"get_platforms",
|
|
1953
|
+
"get_supported_image_formats",
|
|
1954
|
+
"gl_context_info",
|
|
1955
|
+
"gl_object_type",
|
|
1956
|
+
"gl_texture_info",
|
|
1957
|
+
"have_gl",
|
|
1958
|
+
"image_from_array",
|
|
1959
|
+
"image_info",
|
|
1960
|
+
"kernel_arg_access_qualifier",
|
|
1961
|
+
"kernel_arg_address_qualifier",
|
|
1962
|
+
"kernel_arg_info",
|
|
1963
|
+
"kernel_arg_type_qualifier",
|
|
1964
|
+
"kernel_info",
|
|
1965
|
+
"kernel_sub_group_info",
|
|
1966
|
+
"kernel_work_group_info",
|
|
1967
|
+
"khronos_vendor_id",
|
|
1968
|
+
"link_program",
|
|
1969
|
+
"map_flags",
|
|
1970
|
+
"mem_flags",
|
|
1971
|
+
"mem_info",
|
|
1972
|
+
"mem_migration_flags",
|
|
1973
|
+
"mem_object_type",
|
|
1974
|
+
"pipe_info",
|
|
1975
|
+
"pipe_properties",
|
|
1976
|
+
"platform_info",
|
|
1977
|
+
"profiling_info",
|
|
1978
|
+
"program_binary_type",
|
|
1979
|
+
"program_build_info",
|
|
1980
|
+
"program_info",
|
|
1981
|
+
"program_kind",
|
|
1982
|
+
"queue_properties",
|
|
1983
|
+
"sampler_info",
|
|
1984
|
+
"sampler_properties",
|
|
1985
|
+
"status_code",
|
|
1986
|
+
"svm_empty",
|
|
1987
|
+
"svm_empty_like",
|
|
1988
|
+
"svm_mem_flags",
|
|
1989
|
+
"unload_platform_compiler",
|
|
1990
|
+
"version_bits",
|
|
1991
|
+
"wait_for_events",
|
|
1992
|
+
]
|
|
1993
|
+
|
|
1994
|
+
|
|
1995
|
+
# vim: foldmethod=marker
|