pyopencl 2026.1.1__cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyopencl/.libs/libOpenCL-34a55fe4.so.1.0.0 +0 -0
- pyopencl/__init__.py +1995 -0
- pyopencl/_cl.cpython-314t-aarch64-linux-gnu.so +0 -0
- pyopencl/_cl.pyi +2009 -0
- pyopencl/_cluda.py +57 -0
- pyopencl/_monkeypatch.py +1104 -0
- pyopencl/_mymako.py +17 -0
- pyopencl/algorithm.py +1454 -0
- pyopencl/array.py +3530 -0
- pyopencl/bitonic_sort.py +245 -0
- pyopencl/bitonic_sort_templates.py +597 -0
- pyopencl/cache.py +553 -0
- pyopencl/capture_call.py +200 -0
- pyopencl/characterize/__init__.py +461 -0
- pyopencl/characterize/performance.py +240 -0
- pyopencl/cl/pyopencl-airy.cl +324 -0
- pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
- pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
- pyopencl/cl/pyopencl-bessel-y.cl +435 -0
- pyopencl/cl/pyopencl-complex.h +303 -0
- pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
- pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
- pyopencl/cl/pyopencl-random123/array.h +325 -0
- pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
- pyopencl/cl/pyopencl-random123/philox.cl +486 -0
- pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
- pyopencl/clmath.py +281 -0
- pyopencl/clrandom.py +412 -0
- pyopencl/cltypes.py +217 -0
- pyopencl/compyte/.gitignore +21 -0
- pyopencl/compyte/__init__.py +0 -0
- pyopencl/compyte/array.py +211 -0
- pyopencl/compyte/dtypes.py +314 -0
- pyopencl/compyte/pyproject.toml +49 -0
- pyopencl/elementwise.py +1288 -0
- pyopencl/invoker.py +417 -0
- pyopencl/ipython_ext.py +70 -0
- pyopencl/py.typed +0 -0
- pyopencl/reduction.py +829 -0
- pyopencl/scan.py +1921 -0
- pyopencl/tools.py +1680 -0
- pyopencl/typing.py +61 -0
- pyopencl/version.py +11 -0
- pyopencl-2026.1.1.dist-info/METADATA +108 -0
- pyopencl-2026.1.1.dist-info/RECORD +47 -0
- pyopencl-2026.1.1.dist-info/WHEEL +6 -0
- pyopencl-2026.1.1.dist-info/licenses/LICENSE +104 -0
pyopencl/_monkeypatch.py
ADDED
|
@@ -0,0 +1,1104 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
__copyright__ = "Copyright (C) 2025 University of Illinois Board of Trustees"
|
|
5
|
+
|
|
6
|
+
__license__ = """
|
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
in the Software without restriction, including without limitation the rights
|
|
10
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
furnished to do so, subject to the following conditions:
|
|
13
|
+
|
|
14
|
+
The above copyright notice and this permission notice shall be included in
|
|
15
|
+
all copies or substantial portions of the Software.
|
|
16
|
+
|
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
23
|
+
THE SOFTWARE.
|
|
24
|
+
"""
|
|
25
|
+
import inspect as _inspect
|
|
26
|
+
from sys import intern
|
|
27
|
+
from typing import (
|
|
28
|
+
TYPE_CHECKING,
|
|
29
|
+
Any,
|
|
30
|
+
Literal,
|
|
31
|
+
TextIO,
|
|
32
|
+
TypeVar,
|
|
33
|
+
cast,
|
|
34
|
+
overload,
|
|
35
|
+
)
|
|
36
|
+
from warnings import warn
|
|
37
|
+
|
|
38
|
+
import numpy as np
|
|
39
|
+
|
|
40
|
+
import pyopencl._cl as _cl
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
if TYPE_CHECKING:
|
|
44
|
+
from collections.abc import Callable, Collection, Sequence
|
|
45
|
+
|
|
46
|
+
from numpy.typing import NDArray
|
|
47
|
+
|
|
48
|
+
from pyopencl import SVMMap
|
|
49
|
+
from pyopencl.typing import HasBufferInterface, KernelArg, SVMInnerT, WaitList
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
CONSTANT_CLASSES = tuple(
|
|
53
|
+
getattr(_cl, name) for name in dir(_cl)
|
|
54
|
+
if _inspect.isclass(getattr(_cl, name))
|
|
55
|
+
and name[0].islower() and name not in ["zip", "map", "range"])
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
BITFIELD_CONSTANT_CLASSES = (
|
|
59
|
+
_cl.device_type,
|
|
60
|
+
_cl.device_fp_config,
|
|
61
|
+
_cl.device_exec_capabilities,
|
|
62
|
+
_cl.command_queue_properties,
|
|
63
|
+
_cl.mem_flags,
|
|
64
|
+
_cl.map_flags,
|
|
65
|
+
_cl.kernel_arg_type_qualifier,
|
|
66
|
+
_cl.device_affinity_domain,
|
|
67
|
+
_cl.mem_migration_flags,
|
|
68
|
+
_cl.device_svm_capabilities,
|
|
69
|
+
_cl.queue_properties,
|
|
70
|
+
_cl.svm_mem_flags,
|
|
71
|
+
_cl.device_atomic_capabilities,
|
|
72
|
+
_cl.device_device_enqueue_capabilities,
|
|
73
|
+
_cl.version_bits,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def generic_get_cl_version(self: _cl.Platform):
|
|
78
|
+
import re
|
|
79
|
+
version_string = self.version
|
|
80
|
+
match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string)
|
|
81
|
+
if match is None:
|
|
82
|
+
raise RuntimeError("%s %s returned non-conformant "
|
|
83
|
+
"platform version string '%s'" %
|
|
84
|
+
(type(self).__name__, self, version_string))
|
|
85
|
+
|
|
86
|
+
return int(match.group(1)), int(match.group(2))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def platform_repr(self: _cl.Platform):
|
|
90
|
+
return f"<pyopencl.Platform '{self.name}' at 0x{self.int_ptr:x}>"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def device_repr(self: _cl.Device):
|
|
94
|
+
return "<pyopencl.Device '{}' on '{}' at 0x{:x}>".format(
|
|
95
|
+
self.name.strip(), self.platform.name.strip(), self.int_ptr)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def device_hashable_model_and_version_identifier(self: _cl.Device):
|
|
99
|
+
return ("v1", self.vendor, self.vendor_id, self.name, self.version)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def device_persistent_unique_id(self: _cl.Device):
|
|
103
|
+
warn("Device.persistent_unique_id is deprecated. "
|
|
104
|
+
"Use Device.hashable_model_and_version_identifier instead.",
|
|
105
|
+
DeprecationWarning, stacklevel=2)
|
|
106
|
+
return device_hashable_model_and_version_identifier(self)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def context_repr(self: _cl.Context):
|
|
110
|
+
return "<pyopencl.Context at 0x{:x} on {}>".format(self.int_ptr,
|
|
111
|
+
", ".join(repr(dev) for dev in self.devices))
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def context_get_cl_version(self: _cl.Context):
|
|
115
|
+
return self.devices[0].platform._get_cl_version()
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def command_queue_enter(self: _cl.CommandQueue):
|
|
119
|
+
return self
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def command_queue_exit(self: _cl.CommandQueue, exc_type, exc_val, exc_tb):
|
|
123
|
+
self.finish()
|
|
124
|
+
self._finalize()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def command_queue_get_cl_version(self: _cl.CommandQueue):
|
|
128
|
+
return self.device._get_cl_version()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def program_get_build_logs(self: _cl._Program):
|
|
132
|
+
build_logs = []
|
|
133
|
+
for dev in self.get_info(_cl.program_info.DEVICES):
|
|
134
|
+
try:
|
|
135
|
+
log = self.get_build_info(dev, _cl.program_build_info.LOG)
|
|
136
|
+
except Exception:
|
|
137
|
+
log = "<error retrieving log>"
|
|
138
|
+
|
|
139
|
+
build_logs.append((dev, log))
|
|
140
|
+
|
|
141
|
+
return build_logs
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def program_build(
|
|
145
|
+
self: _cl._Program,
|
|
146
|
+
options_bytes: bytes,
|
|
147
|
+
devices: Sequence[_cl.Device] | None = None
|
|
148
|
+
) -> _cl._Program:
|
|
149
|
+
err = None
|
|
150
|
+
try:
|
|
151
|
+
self._build(options=options_bytes, devices=devices)
|
|
152
|
+
except _cl.Error as e:
|
|
153
|
+
msg = str(e) + "\n\n" + (75*"="+"\n").join(
|
|
154
|
+
f"Build on {dev}:\n\n{log}"
|
|
155
|
+
for dev, log in self._get_build_logs())
|
|
156
|
+
code = e.code
|
|
157
|
+
routine = e.routine
|
|
158
|
+
|
|
159
|
+
err = _cl.RuntimeError(
|
|
160
|
+
_cl._ErrorRecord(
|
|
161
|
+
msg=msg,
|
|
162
|
+
code=code,
|
|
163
|
+
routine=routine))
|
|
164
|
+
|
|
165
|
+
if err is not None:
|
|
166
|
+
# Python 3.2 outputs the whole list of currently active exceptions
|
|
167
|
+
# This serves to remove one (redundant) level from that nesting.
|
|
168
|
+
raise err
|
|
169
|
+
|
|
170
|
+
message = (75*"="+"\n").join(
|
|
171
|
+
f"Build on {dev} succeeded, but said:\n\n{log}"
|
|
172
|
+
for dev, log in self._get_build_logs()
|
|
173
|
+
if log is not None and log.strip())
|
|
174
|
+
|
|
175
|
+
if message:
|
|
176
|
+
if self.kind() == _cl.program_kind.SOURCE:
|
|
177
|
+
build_type = "From-source build"
|
|
178
|
+
elif self.kind() == _cl.program_kind.BINARY:
|
|
179
|
+
build_type = "From-binary build"
|
|
180
|
+
elif self.kind() == _cl.program_kind.IL:
|
|
181
|
+
build_type = "From-IL build"
|
|
182
|
+
else:
|
|
183
|
+
build_type = "Build"
|
|
184
|
+
|
|
185
|
+
from pyopencl import compiler_output
|
|
186
|
+
compiler_output("%s succeeded, but resulted in non-empty logs:\n%s"
|
|
187
|
+
% (build_type, message))
|
|
188
|
+
|
|
189
|
+
return self
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class ProfilingInfoGetter:
|
|
193
|
+
event: _cl.Event
|
|
194
|
+
|
|
195
|
+
def __init__(self, event: _cl.Event):
|
|
196
|
+
self.event = event
|
|
197
|
+
|
|
198
|
+
def __getattr__(self, name: str):
|
|
199
|
+
info_cls = _cl.profiling_info
|
|
200
|
+
|
|
201
|
+
if not name.islower():
|
|
202
|
+
warn(f"Using non-lower-case attributes with Event.profile "
|
|
203
|
+
f"is deprecated. Got: '{name}', expected: '{name.lower()}'. "
|
|
204
|
+
"This will stop working in 2026.",
|
|
205
|
+
DeprecationWarning, stacklevel=2)
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
inf_attr = getattr(info_cls, name.upper())
|
|
209
|
+
except AttributeError as err:
|
|
210
|
+
raise AttributeError("%s has no attribute '%s'"
|
|
211
|
+
% (type(self), name)) from err
|
|
212
|
+
else:
|
|
213
|
+
return self.event.get_profiling_info(inf_attr)
|
|
214
|
+
|
|
215
|
+
queued: int # pyright: ignore[reportUninitializedInstanceVariable]
|
|
216
|
+
submit: int # pyright: ignore[reportUninitializedInstanceVariable]
|
|
217
|
+
start: int # pyright: ignore[reportUninitializedInstanceVariable]
|
|
218
|
+
end: int # pyright: ignore[reportUninitializedInstanceVariable]
|
|
219
|
+
complete: int # pyright: ignore[reportUninitializedInstanceVariable]
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
kernel_old_get_info = _cl.Kernel.get_info
|
|
223
|
+
kernel_old_get_work_group_info = _cl.Kernel.get_work_group_info
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def kernel_set_arg_types(self: _cl.Kernel, arg_types) -> None:
|
|
227
|
+
arg_types = tuple(arg_types)
|
|
228
|
+
|
|
229
|
+
# {{{ arg counting bug handling
|
|
230
|
+
|
|
231
|
+
# For example:
|
|
232
|
+
# https://github.com/pocl/pocl/issues/197
|
|
233
|
+
# (but Apple CPU has a similar bug)
|
|
234
|
+
|
|
235
|
+
work_around_arg_count_bug = False
|
|
236
|
+
warn_about_arg_count_bug = False
|
|
237
|
+
|
|
238
|
+
from pyopencl.characterize import has_struct_arg_count_bug
|
|
239
|
+
|
|
240
|
+
count_bug_per_dev = [
|
|
241
|
+
has_struct_arg_count_bug(dev, self.context)
|
|
242
|
+
for dev in self.context.devices]
|
|
243
|
+
|
|
244
|
+
from pytools import single_valued
|
|
245
|
+
if any(count_bug_per_dev):
|
|
246
|
+
if all(count_bug_per_dev):
|
|
247
|
+
work_around_arg_count_bug = single_valued(count_bug_per_dev)
|
|
248
|
+
else:
|
|
249
|
+
warn_about_arg_count_bug = True
|
|
250
|
+
|
|
251
|
+
# }}}
|
|
252
|
+
|
|
253
|
+
from pyopencl.invoker import generate_enqueue_and_set_args
|
|
254
|
+
self._set_enqueue_and_set_args(
|
|
255
|
+
*generate_enqueue_and_set_args(
|
|
256
|
+
self.function_name,
|
|
257
|
+
len(arg_types), self.num_args,
|
|
258
|
+
arg_types,
|
|
259
|
+
warn_about_arg_count_bug=warn_about_arg_count_bug,
|
|
260
|
+
work_around_arg_count_bug=work_around_arg_count_bug,
|
|
261
|
+
devs=self.context.devices))
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
@overload
|
|
265
|
+
def kernel_get_work_group_info(
|
|
266
|
+
self: _cl.Kernel,
|
|
267
|
+
param: Literal[
|
|
268
|
+
_cl.kernel_work_group_info.WORK_GROUP_SIZE,
|
|
269
|
+
_cl.kernel_work_group_info.PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
|
|
270
|
+
_cl.kernel_work_group_info.LOCAL_MEM_SIZE,
|
|
271
|
+
_cl.kernel_work_group_info.PRIVATE_MEM_SIZE,
|
|
272
|
+
],
|
|
273
|
+
device: _cl.Device
|
|
274
|
+
) -> int: ...
|
|
275
|
+
|
|
276
|
+
@overload
|
|
277
|
+
def kernel_get_work_group_info(
|
|
278
|
+
self: _cl.Kernel,
|
|
279
|
+
param: Literal[
|
|
280
|
+
_cl.kernel_work_group_info.COMPILE_WORK_GROUP_SIZE,
|
|
281
|
+
_cl.kernel_work_group_info.GLOBAL_WORK_SIZE,
|
|
282
|
+
],
|
|
283
|
+
device: _cl.Device
|
|
284
|
+
) -> Sequence[int]: ...
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
@overload
|
|
288
|
+
def kernel_get_work_group_info(
|
|
289
|
+
self: _cl.Kernel,
|
|
290
|
+
param: int,
|
|
291
|
+
device: _cl.Device
|
|
292
|
+
) -> object: ...
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def kernel_get_work_group_info(
|
|
296
|
+
self: _cl.Kernel,
|
|
297
|
+
param: int,
|
|
298
|
+
device: _cl.Device
|
|
299
|
+
) -> object:
|
|
300
|
+
try:
|
|
301
|
+
wg_info_cache = self._wg_info_cache
|
|
302
|
+
except AttributeError:
|
|
303
|
+
wg_info_cache = self._wg_info_cache = {}
|
|
304
|
+
|
|
305
|
+
cache_key = (param, device.int_ptr)
|
|
306
|
+
try:
|
|
307
|
+
return wg_info_cache[cache_key]
|
|
308
|
+
except KeyError:
|
|
309
|
+
pass
|
|
310
|
+
|
|
311
|
+
result = kernel_old_get_work_group_info(self, param, device)
|
|
312
|
+
wg_info_cache[cache_key] = result
|
|
313
|
+
return result
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def kernel_capture_call(
|
|
317
|
+
self: _cl.Kernel,
|
|
318
|
+
output_file: str | TextIO,
|
|
319
|
+
queue: _cl.CommandQueue,
|
|
320
|
+
global_size: tuple[int, ...],
|
|
321
|
+
local_size: tuple[int, ...] | None,
|
|
322
|
+
*args: KernelArg,
|
|
323
|
+
wait_for: WaitList = None,
|
|
324
|
+
g_times_l: bool = False,
|
|
325
|
+
allow_empty_ndrange: bool = False,
|
|
326
|
+
global_offset: tuple[int, ...] | None = None,
|
|
327
|
+
) -> None:
|
|
328
|
+
from pyopencl.capture_call import capture_kernel_call
|
|
329
|
+
capture_kernel_call(self, output_file, queue, global_size, local_size,
|
|
330
|
+
*args,
|
|
331
|
+
wait_for=wait_for,
|
|
332
|
+
g_times_l=g_times_l,
|
|
333
|
+
allow_empty_ndrange=allow_empty_ndrange,
|
|
334
|
+
global_offset=global_offset)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def kernel_get_info(self: _cl.Kernel, param_name: _cl.kernel_info) -> object:
|
|
338
|
+
val = kernel_old_get_info(self, param_name)
|
|
339
|
+
|
|
340
|
+
if isinstance(val, _cl._Program):
|
|
341
|
+
from pyopencl import Program
|
|
342
|
+
return Program(val)
|
|
343
|
+
else:
|
|
344
|
+
return val
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def image_format_repr(self: _cl.ImageFormat) -> str:
|
|
348
|
+
return "ImageFormat({}, {})".format(
|
|
349
|
+
_cl.channel_order.to_string(self.channel_order,
|
|
350
|
+
"<unknown channel order 0x%x>"),
|
|
351
|
+
_cl.channel_type.to_string(self.channel_data_type,
|
|
352
|
+
"<unknown channel data type 0x%x>"))
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def image_format_eq(self: _cl.ImageFormat, other: object):
|
|
356
|
+
return (isinstance(other, _cl.ImageFormat)
|
|
357
|
+
and self.channel_order == other.channel_order
|
|
358
|
+
and self.channel_data_type == other.channel_data_type)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def image_format_ne(self: _cl.ImageFormat, other: object):
|
|
362
|
+
return not image_format_eq(self, other)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def image_format_hash(self: _cl.ImageFormat) -> int:
|
|
366
|
+
return hash((type(self), self.channel_order, self.channel_data_type))
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def image_init(self: _cl.Image,
|
|
370
|
+
context: _cl.Context,
|
|
371
|
+
flags: _cl.mem_flags,
|
|
372
|
+
format: _cl.ImageFormat,
|
|
373
|
+
shape: tuple[int, ...] | None = None,
|
|
374
|
+
pitches: tuple[int, ...] | None = None,
|
|
375
|
+
|
|
376
|
+
hostbuf: HasBufferInterface | None = None,
|
|
377
|
+
is_array: bool = False,
|
|
378
|
+
buffer: _cl.Buffer | None = None,
|
|
379
|
+
*,
|
|
380
|
+
desc: _cl.ImageDescriptor | None = None,
|
|
381
|
+
_through_create_image: bool = False,
|
|
382
|
+
) -> None:
|
|
383
|
+
if hostbuf is not None and not \
|
|
384
|
+
(flags & (_cl.mem_flags.USE_HOST_PTR | _cl.mem_flags.COPY_HOST_PTR)):
|
|
385
|
+
warn("'hostbuf' was passed, but no memory flags to make use of it.",
|
|
386
|
+
stacklevel=2)
|
|
387
|
+
|
|
388
|
+
if desc is not None:
|
|
389
|
+
if shape is not None:
|
|
390
|
+
raise TypeError("shape may not be passed when using descriptor")
|
|
391
|
+
if pitches is not None:
|
|
392
|
+
raise TypeError("pitches may not be passed when using descriptor")
|
|
393
|
+
if is_array:
|
|
394
|
+
raise TypeError("is_array may not be passed when using descriptor")
|
|
395
|
+
if buffer is not None:
|
|
396
|
+
raise TypeError("is_array may not be passed when using descriptor")
|
|
397
|
+
|
|
398
|
+
_cl.Image._custom_init(self, context, flags, format, desc, hostbuf)
|
|
399
|
+
|
|
400
|
+
return
|
|
401
|
+
|
|
402
|
+
if shape is None and hostbuf is None:
|
|
403
|
+
raise _cl.Error("'shape' must be passed if 'hostbuf' is not given")
|
|
404
|
+
|
|
405
|
+
if shape is None and hostbuf is not None:
|
|
406
|
+
shape = hostbuf.shape
|
|
407
|
+
|
|
408
|
+
if hostbuf is None and pitches is not None:
|
|
409
|
+
raise _cl.Error("'pitches' may only be given if 'hostbuf' is given")
|
|
410
|
+
|
|
411
|
+
if context._get_cl_version() >= (1, 2) and _cl.get_cl_header_version() >= (1, 2):
|
|
412
|
+
if not _through_create_image:
|
|
413
|
+
warn("Non-descriptor Image constructor called. "
|
|
414
|
+
"This will stop working in 2026. "
|
|
415
|
+
"Use create_image instead (with the same arguments).",
|
|
416
|
+
DeprecationWarning, stacklevel=2)
|
|
417
|
+
|
|
418
|
+
if buffer is not None and is_array:
|
|
419
|
+
raise ValueError(
|
|
420
|
+
"'buffer' and 'is_array' are mutually exclusive")
|
|
421
|
+
|
|
422
|
+
if len(shape) == 3:
|
|
423
|
+
if buffer is not None:
|
|
424
|
+
raise TypeError(
|
|
425
|
+
"'buffer' argument is not supported for 3D arrays")
|
|
426
|
+
elif is_array:
|
|
427
|
+
image_type = _cl.mem_object_type.IMAGE2D_ARRAY
|
|
428
|
+
else:
|
|
429
|
+
image_type = _cl.mem_object_type.IMAGE3D
|
|
430
|
+
|
|
431
|
+
elif len(shape) == 2:
|
|
432
|
+
if buffer is not None:
|
|
433
|
+
raise TypeError(
|
|
434
|
+
"'buffer' argument is not supported for 2D arrays")
|
|
435
|
+
elif is_array:
|
|
436
|
+
image_type = _cl.mem_object_type.IMAGE1D_ARRAY
|
|
437
|
+
else:
|
|
438
|
+
image_type = _cl.mem_object_type.IMAGE2D
|
|
439
|
+
|
|
440
|
+
elif len(shape) == 1:
|
|
441
|
+
if buffer is not None:
|
|
442
|
+
image_type = _cl.mem_object_type.IMAGE1D_BUFFER
|
|
443
|
+
elif is_array:
|
|
444
|
+
raise TypeError("array of zero-dimensional images not supported")
|
|
445
|
+
else:
|
|
446
|
+
image_type = _cl.mem_object_type.IMAGE1D
|
|
447
|
+
|
|
448
|
+
else:
|
|
449
|
+
raise ValueError("images cannot have more than three dimensions")
|
|
450
|
+
|
|
451
|
+
desc = _cl.ImageDescriptor()
|
|
452
|
+
desc.image_type = image_type
|
|
453
|
+
desc.shape = shape # also sets desc.array_size
|
|
454
|
+
|
|
455
|
+
if pitches is None:
|
|
456
|
+
desc.pitches = (0, 0)
|
|
457
|
+
else:
|
|
458
|
+
desc.pitches = pitches
|
|
459
|
+
|
|
460
|
+
desc.num_mip_levels = 0 # per CL 1.2 spec
|
|
461
|
+
desc.num_samples = 0 # per CL 1.2 spec
|
|
462
|
+
desc.buffer = buffer
|
|
463
|
+
|
|
464
|
+
_cl.Image._custom_init(self, context, flags, format, desc, hostbuf)
|
|
465
|
+
else:
|
|
466
|
+
# legacy init for CL 1.1 and older
|
|
467
|
+
if is_array:
|
|
468
|
+
raise TypeError("'is_array=True' is not supported for CL < 1.2")
|
|
469
|
+
# if num_mip_levels is not None:
|
|
470
|
+
# raise TypeError(
|
|
471
|
+
# "'num_mip_levels' argument is not supported for CL < 1.2")
|
|
472
|
+
# if num_samples is not None:
|
|
473
|
+
# raise TypeError(
|
|
474
|
+
# "'num_samples' argument is not supported for CL < 1.2")
|
|
475
|
+
if buffer is not None:
|
|
476
|
+
raise TypeError("'buffer' argument is not supported for CL < 1.2")
|
|
477
|
+
|
|
478
|
+
_cl.Image._custom_init(self, context, flags, format, shape,
|
|
479
|
+
pitches, hostbuf)
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def image_shape(self: _cl.Image) -> tuple[int, int] | tuple[int, int, int]:
|
|
483
|
+
if self.type == _cl.mem_object_type.IMAGE2D:
|
|
484
|
+
return (self.width, self.height)
|
|
485
|
+
elif self.type == _cl.mem_object_type.IMAGE3D:
|
|
486
|
+
return (self.width, self.height, self.depth)
|
|
487
|
+
else:
|
|
488
|
+
raise _cl.LogicError("only images have shapes")
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def error_str(self: _cl.Error) -> str:
|
|
492
|
+
val = self.what
|
|
493
|
+
try:
|
|
494
|
+
val.routine # noqa: B018
|
|
495
|
+
except AttributeError:
|
|
496
|
+
return str(val)
|
|
497
|
+
else:
|
|
498
|
+
result = ""
|
|
499
|
+
if val.code() != _cl.status_code.SUCCESS:
|
|
500
|
+
result = _cl.status_code.to_string(
|
|
501
|
+
val.code(), "<unknown error %d>")
|
|
502
|
+
routine = val.routine()
|
|
503
|
+
if routine:
|
|
504
|
+
result = f"{routine} failed: {result}"
|
|
505
|
+
what = val.what()
|
|
506
|
+
if what:
|
|
507
|
+
if result:
|
|
508
|
+
result += " - "
|
|
509
|
+
result += what
|
|
510
|
+
return result
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def error_code(self: _cl.Error) -> int:
|
|
514
|
+
return cast("_cl._ErrorRecord", self.args[0]).code()
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def error_routine(self: _cl.Error) -> str:
|
|
518
|
+
return cast("_cl._ErrorRecord", self.args[0]).routine()
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def error_what(self: _cl.Error) -> _cl._ErrorRecord:
|
|
522
|
+
return cast("_cl._ErrorRecord", self.args[0])
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def memory_map_enter(self: _cl.MemoryMap):
|
|
526
|
+
return self
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def memory_map_exit(self: _cl.MemoryMap, exc_type, exc_val, exc_tb):
|
|
530
|
+
self.release()
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def svmptr_map(
|
|
534
|
+
self: _cl.SVMPointer,
|
|
535
|
+
queue: _cl.CommandQueue,
|
|
536
|
+
*,
|
|
537
|
+
flags: int,
|
|
538
|
+
is_blocking: bool = True,
|
|
539
|
+
wait_for: WaitList = None,
|
|
540
|
+
size: int | None = None
|
|
541
|
+
) -> SVMMap[NDArray[Any]]:
|
|
542
|
+
"""
|
|
543
|
+
:arg is_blocking: If *False*, subsequent code must wait on
|
|
544
|
+
:attr:`SVMMap.event` in the returned object before accessing the
|
|
545
|
+
mapped memory.
|
|
546
|
+
:arg flags: a combination of :class:`pyopencl.map_flags`.
|
|
547
|
+
:arg size: The size of the map in bytes. If not provided, defaults to
|
|
548
|
+
:attr:`size`.
|
|
549
|
+
|
|
550
|
+
|std-enqueue-blurb|
|
|
551
|
+
"""
|
|
552
|
+
from pyopencl import SVMMap
|
|
553
|
+
return SVMMap(self,
|
|
554
|
+
np.asarray(self.buf),
|
|
555
|
+
queue,
|
|
556
|
+
_cl._enqueue_svm_map(queue, is_blocking, flags, self, wait_for,
|
|
557
|
+
size=size))
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def svmptr_map_ro(
|
|
561
|
+
self: _cl.SVMPointer,
|
|
562
|
+
queue: _cl.CommandQueue,
|
|
563
|
+
*,
|
|
564
|
+
is_blocking: bool = True,
|
|
565
|
+
wait_for: WaitList = None,
|
|
566
|
+
size: int | None = None
|
|
567
|
+
) -> SVMMap[NDArray[Any]]:
|
|
568
|
+
"""Like :meth:`map`, but with *flags* set for a read-only map.
|
|
569
|
+
"""
|
|
570
|
+
|
|
571
|
+
return self.map(queue, flags=_cl.map_flags.READ,
|
|
572
|
+
is_blocking=is_blocking, wait_for=wait_for, size=size)
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def svmptr_map_rw(
|
|
576
|
+
self: _cl.SVMPointer,
|
|
577
|
+
queue: _cl.CommandQueue,
|
|
578
|
+
*,
|
|
579
|
+
is_blocking: bool = True,
|
|
580
|
+
wait_for: WaitList = None,
|
|
581
|
+
size: int | None = None
|
|
582
|
+
) -> SVMMap[NDArray[Any]]:
|
|
583
|
+
"""Like :meth:`map`, but with *flags* set for a read-only map.
|
|
584
|
+
"""
|
|
585
|
+
|
|
586
|
+
return self.map(queue, flags=_cl.map_flags.READ | _cl.map_flags.WRITE,
|
|
587
|
+
is_blocking=is_blocking, wait_for=wait_for, size=size)
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def svmptr__enqueue_unmap(
|
|
591
|
+
self: _cl.SVMPointer,
|
|
592
|
+
queue: _cl.CommandQueue,
|
|
593
|
+
wait_for: WaitList = None
|
|
594
|
+
) -> _cl.Event:
|
|
595
|
+
return _cl._enqueue_svm_unmap(queue, self, wait_for)
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def svmptr_as_buffer(
|
|
599
|
+
self: _cl.SVMPointer,
|
|
600
|
+
ctx: _cl.Context,
|
|
601
|
+
*,
|
|
602
|
+
flags: int | None = None,
|
|
603
|
+
size: int | None = None
|
|
604
|
+
) -> _cl.Buffer:
|
|
605
|
+
"""
|
|
606
|
+
:arg ctx: a :class:`Context`
|
|
607
|
+
:arg flags: a combination of :class:`pyopencl.map_flags`, defaults to
|
|
608
|
+
read-write.
|
|
609
|
+
:arg size: The size of the map in bytes. If not provided, defaults to
|
|
610
|
+
:attr:`size`.
|
|
611
|
+
:returns: a :class:`Buffer` corresponding to *self*.
|
|
612
|
+
|
|
613
|
+
The memory referred to by this object must not be freed before
|
|
614
|
+
the returned :class:`Buffer` is released.
|
|
615
|
+
"""
|
|
616
|
+
|
|
617
|
+
if flags is None:
|
|
618
|
+
flags = _cl.mem_flags.READ_WRITE | _cl.mem_flags.USE_HOST_PTR
|
|
619
|
+
|
|
620
|
+
if size is None:
|
|
621
|
+
size = self.size
|
|
622
|
+
|
|
623
|
+
assert self.buf is not None
|
|
624
|
+
return _cl.Buffer(ctx, flags, size=size, hostbuf=self.buf)
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
def svm_map(
|
|
628
|
+
self: _cl.SVM[SVMInnerT],
|
|
629
|
+
queue: _cl.CommandQueue,
|
|
630
|
+
flags: int,
|
|
631
|
+
is_blocking: bool = True,
|
|
632
|
+
wait_for: WaitList = None
|
|
633
|
+
) -> SVMMap[SVMInnerT]:
|
|
634
|
+
|
|
635
|
+
"""
|
|
636
|
+
:arg is_blocking: If *False*, subsequent code must wait on
|
|
637
|
+
:attr:`SVMMap.event` in the returned object before accessing the
|
|
638
|
+
mapped memory.
|
|
639
|
+
:arg flags: a combination of :class:`pyopencl.map_flags`.
|
|
640
|
+
:returns: an :class:`SVMMap` instance
|
|
641
|
+
|
|
642
|
+
This differs from the inherited :class:`SVMPointer.map` in that no size
|
|
643
|
+
can be specified, and that :attr:`mem` is the exact array produced
|
|
644
|
+
when the :class:`SVMMap` is used as a context manager.
|
|
645
|
+
|
|
646
|
+
|std-enqueue-blurb|
|
|
647
|
+
"""
|
|
648
|
+
from pyopencl import SVMMap
|
|
649
|
+
return SVMMap(
|
|
650
|
+
self,
|
|
651
|
+
self.mem,
|
|
652
|
+
queue,
|
|
653
|
+
_cl._enqueue_svm_map(queue, is_blocking, flags, self, wait_for))
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
def svm_map_ro(
|
|
657
|
+
self: _cl.SVM[SVMInnerT],
|
|
658
|
+
queue: _cl.CommandQueue,
|
|
659
|
+
is_blocking: bool = True,
|
|
660
|
+
wait_for: WaitList = None,
|
|
661
|
+
) -> SVMMap[SVMInnerT]:
|
|
662
|
+
"""Like :meth:`map`, but with *flags* set for a read-only map."""
|
|
663
|
+
|
|
664
|
+
return self.map(queue, _cl.map_flags.READ,
|
|
665
|
+
is_blocking=is_blocking, wait_for=wait_for)
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
def svm_map_rw(
|
|
669
|
+
self: _cl.SVM[SVMInnerT],
|
|
670
|
+
queue: _cl.CommandQueue,
|
|
671
|
+
is_blocking: bool = True,
|
|
672
|
+
wait_for: WaitList = None,
|
|
673
|
+
) -> SVMMap[SVMInnerT]:
|
|
674
|
+
"""Like :meth:`map`, but with *flags* set for a read-only map."""
|
|
675
|
+
|
|
676
|
+
return self.map(queue, _cl.map_flags.READ | _cl.map_flags.WRITE,
|
|
677
|
+
is_blocking=is_blocking, wait_for=wait_for)
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def svm__enqueue_unmap(
|
|
681
|
+
self: _cl.SVM[SVMInnerT],
|
|
682
|
+
queue: _cl.CommandQueue
|
|
683
|
+
,
|
|
684
|
+
wait_for: WaitList = None
|
|
685
|
+
) -> _cl.Event:
|
|
686
|
+
return _cl._enqueue_svm_unmap(queue, self, wait_for)
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def to_string(
|
|
690
|
+
cls: type,
|
|
691
|
+
value: int,
|
|
692
|
+
default_format: str | None = None
|
|
693
|
+
) -> str:
|
|
694
|
+
if cls._is_bitfield:
|
|
695
|
+
names: list[str] = []
|
|
696
|
+
for name in dir(cls):
|
|
697
|
+
attr = cast("int", getattr(cls, name))
|
|
698
|
+
if not isinstance(attr, int):
|
|
699
|
+
continue
|
|
700
|
+
if attr == value or attr & value:
|
|
701
|
+
names.append(name)
|
|
702
|
+
if names:
|
|
703
|
+
return " | ".join(names)
|
|
704
|
+
else:
|
|
705
|
+
for name in dir(cls):
|
|
706
|
+
if (not name.startswith("_")
|
|
707
|
+
and getattr(cls, name) == value):
|
|
708
|
+
return name
|
|
709
|
+
|
|
710
|
+
if default_format is None:
|
|
711
|
+
raise ValueError("a name for value %d was not found in %s"
|
|
712
|
+
% (value, cls.__name__))
|
|
713
|
+
else:
|
|
714
|
+
return default_format % value
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def _add_functionality():
|
|
718
|
+
# {{{ Platform
|
|
719
|
+
|
|
720
|
+
_cl.Platform.__repr__ = platform_repr
|
|
721
|
+
_cl.Platform._get_cl_version = generic_get_cl_version
|
|
722
|
+
|
|
723
|
+
# }}}
|
|
724
|
+
|
|
725
|
+
# {{{ Device
|
|
726
|
+
|
|
727
|
+
_cl.Device.__repr__ = device_repr
|
|
728
|
+
|
|
729
|
+
# undocumented for now:
|
|
730
|
+
_cl.Device._get_cl_version = generic_get_cl_version
|
|
731
|
+
_cl.Device.hashable_model_and_version_identifier = property(
|
|
732
|
+
device_hashable_model_and_version_identifier)
|
|
733
|
+
_cl.Device.persistent_unique_id = property(device_persistent_unique_id)
|
|
734
|
+
|
|
735
|
+
# }}}
|
|
736
|
+
|
|
737
|
+
# {{{ Context
|
|
738
|
+
|
|
739
|
+
_cl.Context.__repr__ = context_repr
|
|
740
|
+
from pytools import memoize_method
|
|
741
|
+
_cl.Context._get_cl_version = memoize_method(context_get_cl_version)
|
|
742
|
+
|
|
743
|
+
# }}}
|
|
744
|
+
|
|
745
|
+
# {{{ CommandQueue
|
|
746
|
+
|
|
747
|
+
_cl.CommandQueue.__enter__ = command_queue_enter
|
|
748
|
+
_cl.CommandQueue.__exit__ = command_queue_exit
|
|
749
|
+
_cl.CommandQueue._get_cl_version = memoize_method(command_queue_get_cl_version)
|
|
750
|
+
|
|
751
|
+
# }}}
|
|
752
|
+
|
|
753
|
+
# {{{ _Program (the internal, non-caching version)
|
|
754
|
+
|
|
755
|
+
_cl._Program._get_build_logs = program_get_build_logs
|
|
756
|
+
_cl._Program.build = program_build
|
|
757
|
+
|
|
758
|
+
# }}}
|
|
759
|
+
|
|
760
|
+
# {{{ Event
|
|
761
|
+
|
|
762
|
+
_cl.Event.profile = property(ProfilingInfoGetter)
|
|
763
|
+
|
|
764
|
+
# }}}
|
|
765
|
+
|
|
766
|
+
# {{{ Kernel
|
|
767
|
+
|
|
768
|
+
_cl.Kernel.get_work_group_info = kernel_get_work_group_info
|
|
769
|
+
|
|
770
|
+
# FIXME: Possibly deprecate this version
|
|
771
|
+
_cl.Kernel.set_scalar_arg_dtypes = kernel_set_arg_types
|
|
772
|
+
_cl.Kernel.set_arg_types = kernel_set_arg_types
|
|
773
|
+
|
|
774
|
+
_cl.Kernel.capture_call = kernel_capture_call
|
|
775
|
+
_cl.Kernel.get_info = kernel_get_info
|
|
776
|
+
|
|
777
|
+
# }}}
|
|
778
|
+
|
|
779
|
+
# {{{ ImageFormat
|
|
780
|
+
|
|
781
|
+
_cl.ImageFormat.__repr__ = image_format_repr
|
|
782
|
+
_cl.ImageFormat.__eq__ = image_format_eq
|
|
783
|
+
_cl.ImageFormat.__ne__ = image_format_ne
|
|
784
|
+
_cl.ImageFormat.__hash__ = image_format_hash
|
|
785
|
+
|
|
786
|
+
# }}}
|
|
787
|
+
|
|
788
|
+
# {{{ Image
|
|
789
|
+
|
|
790
|
+
_cl.Image.__init__ = image_init
|
|
791
|
+
_cl.Image.shape = property(image_shape)
|
|
792
|
+
|
|
793
|
+
# }}}
|
|
794
|
+
|
|
795
|
+
# {{{ Error
|
|
796
|
+
|
|
797
|
+
_cl.Error.__str__ = error_str
|
|
798
|
+
_cl.Error.code = property(error_code)
|
|
799
|
+
_cl.Error.routine = property(error_routine)
|
|
800
|
+
_cl.Error.what = property(error_what)
|
|
801
|
+
|
|
802
|
+
# }}}
|
|
803
|
+
|
|
804
|
+
# {{{ MemoryMap
|
|
805
|
+
|
|
806
|
+
_cl.MemoryMap.__doc__ = """
|
|
807
|
+
This class may also be used as a context manager in a ``with`` statement.
|
|
808
|
+
The memory corresponding to this object will be unmapped when
|
|
809
|
+
this object is deleted or :meth:`release` is called.
|
|
810
|
+
|
|
811
|
+
.. automethod:: release
|
|
812
|
+
"""
|
|
813
|
+
_cl.MemoryMap.__enter__ = memory_map_enter
|
|
814
|
+
_cl.MemoryMap.__exit__ = memory_map_exit
|
|
815
|
+
|
|
816
|
+
# }}}
|
|
817
|
+
|
|
818
|
+
# {{{ SVMPointer
|
|
819
|
+
|
|
820
|
+
if _cl.get_cl_header_version() >= (2, 0):
|
|
821
|
+
_cl.SVMPointer.__doc__ = """A base class for things that can be passed to
|
|
822
|
+
functions that allow an SVM pointer, e.g. kernel enqueues and memory
|
|
823
|
+
copies.
|
|
824
|
+
|
|
825
|
+
Objects of this type cannot currently be directly created or
|
|
826
|
+
implemented in Python. To obtain objects implementing this type,
|
|
827
|
+
consider its subtypes :class:`SVMAllocation` and :class:`SVM`.
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
.. property:: svm_ptr
|
|
831
|
+
|
|
832
|
+
Gives the SVM pointer as an :class:`int`.
|
|
833
|
+
|
|
834
|
+
.. property:: size
|
|
835
|
+
|
|
836
|
+
An :class:`int` denoting the size in bytes, or *None*, if the size
|
|
837
|
+
of the SVM pointed to is not known.
|
|
838
|
+
|
|
839
|
+
*Most* objects of this type (e.g. instances of
|
|
840
|
+
:class:`SVMAllocation` and :class:`SVM` know their size, so that,
|
|
841
|
+
for example :class:`enqueue_copy` will automatically copy an entire
|
|
842
|
+
:class:`SVMAllocation` when a size is not explicitly specified.
|
|
843
|
+
|
|
844
|
+
.. automethod:: map
|
|
845
|
+
.. automethod:: map_ro
|
|
846
|
+
.. automethod:: map_rw
|
|
847
|
+
.. automethod:: as_buffer
|
|
848
|
+
.. property:: buf
|
|
849
|
+
|
|
850
|
+
An opaque object implementing the :c:func:`Python buffer protocol
|
|
851
|
+
<PyObject_GetBuffer>`. It exposes the pointed-to memory as
|
|
852
|
+
a one-dimensional buffer of bytes, with the size matching
|
|
853
|
+
:attr:`size`.
|
|
854
|
+
|
|
855
|
+
No guarantee is provided that two references to this attribute
|
|
856
|
+
result in the same object.
|
|
857
|
+
"""
|
|
858
|
+
|
|
859
|
+
if _cl.get_cl_header_version() >= (2, 0):
|
|
860
|
+
_cl.SVMPointer.map = svmptr_map
|
|
861
|
+
_cl.SVMPointer.map_ro = svmptr_map_ro
|
|
862
|
+
_cl.SVMPointer.map_rw = svmptr_map_rw
|
|
863
|
+
_cl.SVMPointer._enqueue_unmap = svmptr__enqueue_unmap
|
|
864
|
+
_cl.SVMPointer.as_buffer = svmptr_as_buffer
|
|
865
|
+
|
|
866
|
+
# }}}
|
|
867
|
+
|
|
868
|
+
# {{{ SVMAllocation
|
|
869
|
+
|
|
870
|
+
if _cl.get_cl_header_version() >= (2, 0):
|
|
871
|
+
_cl.SVMAllocation.__doc__ = """
|
|
872
|
+
Is a :class:`SVMPointer`.
|
|
873
|
+
|
|
874
|
+
.. versionadded:: 2016.2
|
|
875
|
+
|
|
876
|
+
.. automethod:: __init__
|
|
877
|
+
|
|
878
|
+
:arg flags: See :class:`svm_mem_flags`.
|
|
879
|
+
:arg queue: If not specified, the allocation will be freed
|
|
880
|
+
eagerly, irrespective of whether pending/enqueued operations
|
|
881
|
+
are still using this memory.
|
|
882
|
+
|
|
883
|
+
If specified, deallocation of the memory will be enqueued
|
|
884
|
+
with the given queue, and will only be performed
|
|
885
|
+
after previously-enqueue operations in the queue have
|
|
886
|
+
completed.
|
|
887
|
+
|
|
888
|
+
It is an error to specify an out-of-order queue.
|
|
889
|
+
|
|
890
|
+
.. warning::
|
|
891
|
+
|
|
892
|
+
Not specifying a queue will typically lead to undesired
|
|
893
|
+
behavior, including crashes and memory corruption.
|
|
894
|
+
See the warning in :ref:`svm`.
|
|
895
|
+
|
|
896
|
+
.. automethod:: enqueue_release
|
|
897
|
+
|
|
898
|
+
Enqueue the release of this allocation into *queue*.
|
|
899
|
+
If *queue* is not specified, enqueue the deallocation
|
|
900
|
+
into the queue provided at allocation time or via
|
|
901
|
+
:class:`bind_to_queue`.
|
|
902
|
+
|
|
903
|
+
.. automethod:: bind_to_queue
|
|
904
|
+
|
|
905
|
+
Change the queue used for implicit enqueue of deallocation
|
|
906
|
+
to *queue*. Sufficient synchronization is ensured by
|
|
907
|
+
enqueuing a marker into the old queue and waiting on this
|
|
908
|
+
marker in the new queue.
|
|
909
|
+
|
|
910
|
+
.. automethod:: unbind_from_queue
|
|
911
|
+
|
|
912
|
+
Configure the allocation to no longer implicitly enqueue
|
|
913
|
+
memory allocation. If such a queue was previously provided,
|
|
914
|
+
:meth:`~CommandQueue.finish` is automatically called on it.
|
|
915
|
+
"""
|
|
916
|
+
|
|
917
|
+
# }}}
|
|
918
|
+
|
|
919
|
+
# {{{ SVM
|
|
920
|
+
|
|
921
|
+
if _cl.get_cl_header_version() >= (2, 0):
|
|
922
|
+
_cl.SVM.__doc__ = """Tags an object exhibiting the Python buffer interface
|
|
923
|
+
(such as a :class:`numpy.ndarray`) as referring to shared virtual
|
|
924
|
+
memory.
|
|
925
|
+
|
|
926
|
+
Is a :class:`SVMPointer`, hence objects of this type may be passed
|
|
927
|
+
to kernel calls and :func:`enqueue_copy`, and all methods declared
|
|
928
|
+
there are also available there. Note that :meth:`map` differs
|
|
929
|
+
slightly from :meth:`SVMPointer.map`.
|
|
930
|
+
|
|
931
|
+
Depending on the features of the OpenCL implementation, the following
|
|
932
|
+
types of objects may be passed to/wrapped in this type:
|
|
933
|
+
|
|
934
|
+
* fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`,
|
|
935
|
+
if the implementation supports fine-grained shared virtual memory.
|
|
936
|
+
This memory may directly be passed to a kernel::
|
|
937
|
+
|
|
938
|
+
ary = cl.fsvm_empty(ctx, 1000, np.float32)
|
|
939
|
+
assert isinstance(ary, np.ndarray)
|
|
940
|
+
|
|
941
|
+
prg.twice(queue, ary.shape, None, cl.SVM(ary))
|
|
942
|
+
queue.finish() # synchronize
|
|
943
|
+
print(ary) # access from host
|
|
944
|
+
|
|
945
|
+
Observe how mapping (as needed in coarse-grain SVM) is no longer
|
|
946
|
+
necessary.
|
|
947
|
+
|
|
948
|
+
* any :class:`numpy.ndarray` (or other Python object with a buffer
|
|
949
|
+
interface) if the implementation supports fine-grained *system*
|
|
950
|
+
shared virtual memory.
|
|
951
|
+
|
|
952
|
+
This is how plain :mod:`numpy` arrays may directly be passed to a
|
|
953
|
+
kernel::
|
|
954
|
+
|
|
955
|
+
ary = np.zeros(1000, np.float32)
|
|
956
|
+
prg.twice(queue, ary.shape, None, cl.SVM(ary))
|
|
957
|
+
queue.finish() # synchronize
|
|
958
|
+
print(ary) # access from host
|
|
959
|
+
|
|
960
|
+
* coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty`
|
|
961
|
+
for any implementation of OpenCL 2.0.
|
|
962
|
+
|
|
963
|
+
.. note::
|
|
964
|
+
|
|
965
|
+
Applications making use of coarse-grain SVM may be better
|
|
966
|
+
served by opaque-style SVM. See :ref:`opaque-svm`.
|
|
967
|
+
|
|
968
|
+
This is how coarse-grain SVM may be used from both host and device::
|
|
969
|
+
|
|
970
|
+
svm_ary = cl.SVM(
|
|
971
|
+
cl.csvm_empty(ctx, 1000, np.float32, alignment=64))
|
|
972
|
+
assert isinstance(svm_ary.mem, np.ndarray)
|
|
973
|
+
|
|
974
|
+
with svm_ary.map_rw(queue) as ary:
|
|
975
|
+
ary.fill(17) # use from host
|
|
976
|
+
|
|
977
|
+
prg.twice(queue, svm_ary.mem.shape, None, svm_ary)
|
|
978
|
+
|
|
979
|
+
Coarse-grain shared-memory *must* be mapped into host address space
|
|
980
|
+
using :meth:`~SVMPointer.map` before being accessed through the
|
|
981
|
+
:mod:`numpy` interface.
|
|
982
|
+
|
|
983
|
+
.. note::
|
|
984
|
+
|
|
985
|
+
This object merely serves as a 'tag' that changes the behavior
|
|
986
|
+
of functions to which it is passed. It has no special management
|
|
987
|
+
relationship to the memory it tags. For example, it is permissible
|
|
988
|
+
to grab a :class:`numpy.ndarray` out of :attr:`SVM.mem` of one
|
|
989
|
+
:class:`SVM` instance and use the array to construct another.
|
|
990
|
+
Neither of the tags need to be kept alive.
|
|
991
|
+
|
|
992
|
+
.. versionadded:: 2016.2
|
|
993
|
+
|
|
994
|
+
.. attribute:: mem
|
|
995
|
+
|
|
996
|
+
The wrapped object.
|
|
997
|
+
|
|
998
|
+
.. automethod:: __init__
|
|
999
|
+
.. automethod:: map
|
|
1000
|
+
.. automethod:: map_ro
|
|
1001
|
+
.. automethod:: map_rw
|
|
1002
|
+
"""
|
|
1003
|
+
|
|
1004
|
+
# }}}
|
|
1005
|
+
|
|
1006
|
+
if _cl.get_cl_header_version() >= (2, 0):
|
|
1007
|
+
_cl.SVM.map = svm_map
|
|
1008
|
+
_cl.SVM.map_ro = svm_map_ro
|
|
1009
|
+
_cl.SVM.map_rw = svm_map_rw
|
|
1010
|
+
_cl.SVM._enqueue_unmap = svm__enqueue_unmap
|
|
1011
|
+
|
|
1012
|
+
# }}}
|
|
1013
|
+
|
|
1014
|
+
for cls in CONSTANT_CLASSES:
|
|
1015
|
+
cls._is_bitfield = cls in BITFIELD_CONSTANT_CLASSES
|
|
1016
|
+
cls.to_string = classmethod(to_string)
|
|
1017
|
+
|
|
1018
|
+
|
|
1019
|
+
_add_functionality()
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
# ORDER DEPENDENCY: Some of the above may override get_info, the effect needs
|
|
1023
|
+
# to be visible through the attributes. So get_info attr creation needs to happen
|
|
1024
|
+
# after the overriding is complete.
|
|
1025
|
+
|
|
1026
|
+
T = TypeVar("T")
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
InfoT = TypeVar("InfoT")
|
|
1030
|
+
|
|
1031
|
+
|
|
1032
|
+
def make_getinfo(
|
|
1033
|
+
info_method: Callable[[T, InfoT], object],
|
|
1034
|
+
info_constant: InfoT
|
|
1035
|
+
) -> property:
|
|
1036
|
+
def result(self: T) -> object:
|
|
1037
|
+
return info_method(self, info_constant)
|
|
1038
|
+
|
|
1039
|
+
return property(result)
|
|
1040
|
+
|
|
1041
|
+
|
|
1042
|
+
def make_cacheable_getinfo(
|
|
1043
|
+
info_method: Callable[[T, InfoT], object],
|
|
1044
|
+
cache_attr: str,
|
|
1045
|
+
info_constant: InfoT
|
|
1046
|
+
) -> property:
|
|
1047
|
+
def result(self: T):
|
|
1048
|
+
try:
|
|
1049
|
+
return getattr(self, cache_attr)
|
|
1050
|
+
except AttributeError:
|
|
1051
|
+
pass
|
|
1052
|
+
|
|
1053
|
+
result = info_method(self, info_constant)
|
|
1054
|
+
setattr(self, cache_attr, result)
|
|
1055
|
+
return result
|
|
1056
|
+
|
|
1057
|
+
return property(result)
|
|
1058
|
+
|
|
1059
|
+
|
|
1060
|
+
def add_get_info(
|
|
1061
|
+
cls: type[T],
|
|
1062
|
+
info_method: Callable[[T, InfoT], object],
|
|
1063
|
+
info_class: type[InfoT],
|
|
1064
|
+
cacheable_attrs: Collection[str] = (),
|
|
1065
|
+
) -> None:
|
|
1066
|
+
for info_name, _info_value in info_class.__dict__.items():
|
|
1067
|
+
if info_name == "to_string" or info_name.startswith("_"):
|
|
1068
|
+
continue
|
|
1069
|
+
|
|
1070
|
+
info_lower = info_name.lower()
|
|
1071
|
+
info_constant = cast("InfoT", getattr(info_class, info_name))
|
|
1072
|
+
if info_name in cacheable_attrs:
|
|
1073
|
+
cache_attr = intern("_info_cache_"+info_lower)
|
|
1074
|
+
setattr(cls, info_lower, make_cacheable_getinfo(
|
|
1075
|
+
info_method, cache_attr, info_constant))
|
|
1076
|
+
else:
|
|
1077
|
+
setattr(cls, info_lower, make_getinfo(info_method, info_constant))
|
|
1078
|
+
|
|
1079
|
+
# }}}
|
|
1080
|
+
|
|
1081
|
+
if _cl.have_gl():
|
|
1082
|
+
def gl_object_get_gl_object(self):
|
|
1083
|
+
return self.get_gl_object_info()[1]
|
|
1084
|
+
|
|
1085
|
+
_cl.GLBuffer.gl_object = property(gl_object_get_gl_object)
|
|
1086
|
+
_cl.GLTexture.gl_object = property(gl_object_get_gl_object)
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
def _add_all_get_info():
|
|
1090
|
+
add_get_info(_cl.Platform, _cl.Platform.get_info, _cl.platform_info)
|
|
1091
|
+
add_get_info(_cl.Device, _cl.Device.get_info, _cl.device_info,
|
|
1092
|
+
["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"])
|
|
1093
|
+
add_get_info(_cl.Context, _cl.Context.get_info, _cl.context_info)
|
|
1094
|
+
add_get_info(_cl.CommandQueue, _cl.CommandQueue.get_info, _cl.command_queue_info,
|
|
1095
|
+
["CONTEXT", "DEVICE"])
|
|
1096
|
+
add_get_info(_cl.Event, _cl.Event.get_info, _cl.event_info)
|
|
1097
|
+
add_get_info(_cl.MemoryObjectHolder, _cl.MemoryObjectHolder.get_info, _cl.mem_info)
|
|
1098
|
+
add_get_info(_cl.Image, _cl.Image.get_image_info, _cl.image_info)
|
|
1099
|
+
add_get_info(_cl.Pipe, _cl.Pipe.get_pipe_info, _cl.pipe_info)
|
|
1100
|
+
add_get_info(_cl.Kernel, _cl.Kernel.get_info, _cl.kernel_info)
|
|
1101
|
+
add_get_info(_cl.Sampler, _cl.Sampler.get_info, _cl.sampler_info)
|
|
1102
|
+
|
|
1103
|
+
|
|
1104
|
+
_add_all_get_info()
|