pyopencl 2024.2.7__cp310-cp310-win_amd64.whl → 2025.1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +127 -122
- pyopencl/_cl.cp310-win_amd64.pyd +0 -0
- pyopencl/_mymako.py +3 -3
- pyopencl/algorithm.py +10 -7
- pyopencl/array.py +58 -123
- pyopencl/bitonic_sort.py +3 -1
- pyopencl/bitonic_sort_templates.py +1 -1
- pyopencl/cache.py +23 -22
- pyopencl/capture_call.py +5 -4
- pyopencl/clrandom.py +1 -0
- pyopencl/cltypes.py +2 -2
- pyopencl/compyte/dtypes.py +4 -4
- pyopencl/compyte/pyproject.toml +54 -0
- pyopencl/elementwise.py +9 -2
- pyopencl/invoker.py +11 -9
- pyopencl/ipython_ext.py +1 -1
- pyopencl/reduction.py +16 -10
- pyopencl/scan.py +38 -22
- pyopencl/tools.py +23 -13
- pyopencl/version.py +1 -1
- {pyopencl-2024.2.7.dist-info → pyopencl-2025.1.dist-info}/METADATA +11 -8
- pyopencl-2025.1.dist-info/RECORD +42 -0
- {pyopencl-2024.2.7.dist-info → pyopencl-2025.1.dist-info}/WHEEL +1 -1
- pyopencl/compyte/.git +0 -1
- pyopencl/compyte/ndarray/Makefile +0 -31
- pyopencl/compyte/ndarray/__init__.py +0 -0
- pyopencl/compyte/ndarray/gen_elemwise.py +0 -1907
- pyopencl/compyte/ndarray/gen_reduction.py +0 -1511
- pyopencl/compyte/ndarray/gpu_ndarray.h +0 -35
- pyopencl/compyte/ndarray/pygpu_language.h +0 -207
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +0 -622
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +0 -317
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +0 -1546
- pyopencl/compyte/ndarray/pygpu_ndarray.h +0 -71
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +0 -232
- pyopencl/compyte/ndarray/setup_opencl.py +0 -101
- pyopencl/compyte/ndarray/test_gpu_elemwise.py +0 -411
- pyopencl/compyte/ndarray/test_gpu_ndarray.py +0 -487
- pyopencl-2024.2.7.dist-info/RECORD +0 -56
- {pyopencl-2024.2.7.dist-info → pyopencl-2025.1.dist-info}/licenses/LICENSE +0 -0
pyopencl/__init__.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
__copyright__ = "Copyright (C) 2009-15 Andreas Kloeckner"
|
|
2
5
|
|
|
3
6
|
__license__ = """
|
|
@@ -22,11 +25,11 @@ THE SOFTWARE.
|
|
|
22
25
|
|
|
23
26
|
import logging
|
|
24
27
|
from sys import intern
|
|
25
|
-
from typing import Any,
|
|
28
|
+
from typing import Any, Sequence
|
|
26
29
|
from warnings import warn
|
|
27
30
|
|
|
28
31
|
# must import, otherwise dtype registry will not be fully populated
|
|
29
|
-
import pyopencl.cltypes
|
|
32
|
+
import pyopencl.cltypes
|
|
30
33
|
from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa: F401
|
|
31
34
|
|
|
32
35
|
|
|
@@ -54,7 +57,7 @@ except ImportError:
|
|
|
54
57
|
stacklevel=2)
|
|
55
58
|
raise
|
|
56
59
|
|
|
57
|
-
import numpy as np
|
|
60
|
+
import numpy as np
|
|
58
61
|
|
|
59
62
|
import sys
|
|
60
63
|
|
|
@@ -187,14 +190,14 @@ if not _PYPY:
|
|
|
187
190
|
if get_cl_header_version() >= (1, 1):
|
|
188
191
|
from pyopencl._cl import UserEvent # noqa: F401
|
|
189
192
|
if get_cl_header_version() >= (1, 2):
|
|
190
|
-
from pyopencl._cl import ImageDescriptor
|
|
193
|
+
from pyopencl._cl import ImageDescriptor
|
|
191
194
|
from pyopencl._cl import ( # noqa: F401
|
|
192
195
|
_enqueue_barrier_with_wait_list, _enqueue_fill_buffer,
|
|
193
196
|
_enqueue_marker_with_wait_list, enqueue_fill_image,
|
|
194
197
|
enqueue_migrate_mem_objects, unload_platform_compiler)
|
|
195
198
|
|
|
196
199
|
if get_cl_header_version() >= (2, 0):
|
|
197
|
-
from pyopencl._cl import SVM, SVMAllocation, SVMPointer
|
|
200
|
+
from pyopencl._cl import SVM, SVMAllocation, SVMPointer
|
|
198
201
|
|
|
199
202
|
if _cl.have_gl():
|
|
200
203
|
from pyopencl._cl import ( # noqa: F401
|
|
@@ -272,7 +275,7 @@ def _find_pyopencl_include_path() -> str:
|
|
|
272
275
|
# NOTE: only available in Python >=3.9
|
|
273
276
|
from importlib.resources import files
|
|
274
277
|
except ImportError:
|
|
275
|
-
from importlib_resources import files
|
|
278
|
+
from importlib_resources import files # type: ignore[no-redef]
|
|
276
279
|
|
|
277
280
|
include_path = str(files("pyopencl") / "cl")
|
|
278
281
|
if not exists(include_path):
|
|
@@ -347,11 +350,11 @@ from pytools import strtobool
|
|
|
347
350
|
|
|
348
351
|
_PYOPENCL_NO_CACHE = strtobool(os.environ.get("PYOPENCL_NO_CACHE", "false"))
|
|
349
352
|
|
|
350
|
-
_DEFAULT_BUILD_OPTIONS:
|
|
351
|
-
_DEFAULT_INCLUDE_OPTIONS:
|
|
353
|
+
_DEFAULT_BUILD_OPTIONS: list[str] = []
|
|
354
|
+
_DEFAULT_INCLUDE_OPTIONS: list[str] = ["-I", _find_pyopencl_include_path()]
|
|
352
355
|
|
|
353
356
|
# map of platform.name to build options list
|
|
354
|
-
_PLAT_BUILD_OPTIONS:
|
|
357
|
+
_PLAT_BUILD_OPTIONS: dict[str, list[str]] = {
|
|
355
358
|
"Oclgrind": ["-D", "PYOPENCL_USING_OCLGRIND"],
|
|
356
359
|
}
|
|
357
360
|
|
|
@@ -423,10 +426,7 @@ class Program:
|
|
|
423
426
|
return self._get_prg().get_build_info(*args, **kwargs)
|
|
424
427
|
|
|
425
428
|
def all_kernels(self):
|
|
426
|
-
|
|
427
|
-
for knl in result:
|
|
428
|
-
knl._setup(self)
|
|
429
|
-
return result
|
|
429
|
+
return self._get_prg().all_kernels()
|
|
430
430
|
|
|
431
431
|
@property
|
|
432
432
|
def int_ptr(self):
|
|
@@ -443,22 +443,22 @@ class Program:
|
|
|
443
443
|
knl = Kernel(self, attr)
|
|
444
444
|
# Nvidia does not raise errors even for invalid names,
|
|
445
445
|
# but this will give an error if the kernel is invalid.
|
|
446
|
-
knl.num_args
|
|
447
|
-
knl._source = getattr(self, "_source", None)
|
|
446
|
+
knl.num_args # noqa: B018
|
|
448
447
|
|
|
449
448
|
if self._build_duration_info is not None:
|
|
450
|
-
build_descr,
|
|
449
|
+
build_descr, _was_cached, duration = self._build_duration_info
|
|
451
450
|
if duration > 0.2:
|
|
452
|
-
logger.info(
|
|
453
|
-
|
|
451
|
+
logger.info(
|
|
452
|
+
"build program: kernel '%s' was part of a "
|
|
453
|
+
"lengthy %s (%.2f s)", attr, build_descr, duration)
|
|
454
454
|
|
|
455
455
|
# don't whine about build times more than once.
|
|
456
456
|
self._build_duration_info = None
|
|
457
457
|
|
|
458
458
|
return knl
|
|
459
|
-
except LogicError:
|
|
459
|
+
except LogicError as err:
|
|
460
460
|
raise AttributeError("'%s' was not found as a program "
|
|
461
|
-
"info attribute or as a kernel name" % attr)
|
|
461
|
+
"info attribute or as a kernel name" % attr) from err
|
|
462
462
|
|
|
463
463
|
# {{{ build
|
|
464
464
|
|
|
@@ -666,18 +666,6 @@ def _add_functionality():
|
|
|
666
666
|
|
|
667
667
|
# {{{ Context
|
|
668
668
|
|
|
669
|
-
context_old_init = Context.__init__
|
|
670
|
-
|
|
671
|
-
def context_init(self, devices, properties, dev_type, cache_dir=None):
|
|
672
|
-
if cache_dir is not None:
|
|
673
|
-
warn("The 'cache_dir' argument to the Context constructor "
|
|
674
|
-
"is deprecated and no longer has an effect. "
|
|
675
|
-
"It was removed because it only applied to the wrapper "
|
|
676
|
-
"object and not the context itself, leading to inconsistencies.",
|
|
677
|
-
DeprecationWarning, stacklevel=2)
|
|
678
|
-
|
|
679
|
-
context_old_init(self, devices, properties, dev_type)
|
|
680
|
-
|
|
681
669
|
def context_repr(self):
|
|
682
670
|
return "<pyopencl.Context at 0x{:x} on {}>".format(self.int_ptr,
|
|
683
671
|
", ".join(repr(dev) for dev in self.devices))
|
|
@@ -780,9 +768,9 @@ def _add_functionality():
|
|
|
780
768
|
|
|
781
769
|
try:
|
|
782
770
|
inf_attr = getattr(info_cls, name.upper())
|
|
783
|
-
except AttributeError:
|
|
771
|
+
except AttributeError as err:
|
|
784
772
|
raise AttributeError("%s has no attribute '%s'"
|
|
785
|
-
% (type(self), name))
|
|
773
|
+
% (type(self), name)) from err
|
|
786
774
|
else:
|
|
787
775
|
return self.event.get_profiling_info(inf_attr)
|
|
788
776
|
|
|
@@ -792,31 +780,9 @@ def _add_functionality():
|
|
|
792
780
|
|
|
793
781
|
# {{{ Kernel
|
|
794
782
|
|
|
795
|
-
kernel_old_init = Kernel.__init__
|
|
796
783
|
kernel_old_get_info = Kernel.get_info
|
|
797
784
|
kernel_old_get_work_group_info = Kernel.get_work_group_info
|
|
798
785
|
|
|
799
|
-
def kernel_init(self, prg, name):
|
|
800
|
-
if not isinstance(prg, _cl._Program):
|
|
801
|
-
prg = prg._get_prg()
|
|
802
|
-
|
|
803
|
-
kernel_old_init(self, prg, name)
|
|
804
|
-
|
|
805
|
-
self._setup(prg)
|
|
806
|
-
|
|
807
|
-
def kernel__setup(self, prg):
|
|
808
|
-
self._source = getattr(prg, "_source", None)
|
|
809
|
-
|
|
810
|
-
from pyopencl.invoker import generate_enqueue_and_set_args
|
|
811
|
-
self._enqueue, self._set_args = generate_enqueue_and_set_args(
|
|
812
|
-
self.function_name, self.num_args, self.num_args,
|
|
813
|
-
None,
|
|
814
|
-
warn_about_arg_count_bug=None,
|
|
815
|
-
work_around_arg_count_bug=None, devs=self.context.devices)
|
|
816
|
-
|
|
817
|
-
self._wg_info_cache = {}
|
|
818
|
-
return self
|
|
819
|
-
|
|
820
786
|
def kernel_set_arg_types(self, arg_types):
|
|
821
787
|
arg_types = tuple(arg_types)
|
|
822
788
|
|
|
@@ -845,38 +811,31 @@ def _add_functionality():
|
|
|
845
811
|
# }}}
|
|
846
812
|
|
|
847
813
|
from pyopencl.invoker import generate_enqueue_and_set_args
|
|
848
|
-
self.
|
|
849
|
-
generate_enqueue_and_set_args(
|
|
814
|
+
self._set_enqueue_and_set_args(
|
|
815
|
+
*generate_enqueue_and_set_args(
|
|
850
816
|
self.function_name,
|
|
851
817
|
len(arg_types), self.num_args,
|
|
852
818
|
arg_types,
|
|
853
819
|
warn_about_arg_count_bug=warn_about_arg_count_bug,
|
|
854
820
|
work_around_arg_count_bug=work_around_arg_count_bug,
|
|
855
|
-
devs=self.context.devices)
|
|
821
|
+
devs=self.context.devices))
|
|
856
822
|
|
|
857
823
|
def kernel_get_work_group_info(self, param, device):
|
|
824
|
+
try:
|
|
825
|
+
wg_info_cache = self._wg_info_cache
|
|
826
|
+
except AttributeError:
|
|
827
|
+
wg_info_cache = self._wg_info_cache = {}
|
|
828
|
+
|
|
858
829
|
cache_key = (param, device.int_ptr)
|
|
859
830
|
try:
|
|
860
|
-
return
|
|
831
|
+
return wg_info_cache[cache_key]
|
|
861
832
|
except KeyError:
|
|
862
833
|
pass
|
|
863
834
|
|
|
864
835
|
result = kernel_old_get_work_group_info(self, param, device)
|
|
865
|
-
|
|
836
|
+
wg_info_cache[cache_key] = result
|
|
866
837
|
return result
|
|
867
838
|
|
|
868
|
-
def kernel_set_args(self, *args, **kwargs):
|
|
869
|
-
# Need to duplicate the 'self' argument for dynamically generated method
|
|
870
|
-
return self._set_args(self, *args, **kwargs)
|
|
871
|
-
|
|
872
|
-
def kernel_call(self, queue, global_size, local_size, *args, **kwargs):
|
|
873
|
-
# __call__ can't be overridden directly, so we need this
|
|
874
|
-
# trampoline hack.
|
|
875
|
-
|
|
876
|
-
# Note: This is only used for the generic __call__, before
|
|
877
|
-
# kernel_set_scalar_arg_dtypes is called.
|
|
878
|
-
return self._enqueue(self, queue, global_size, local_size, *args, **kwargs)
|
|
879
|
-
|
|
880
839
|
def kernel_capture_call(self, output_file, queue, global_size, local_size,
|
|
881
840
|
*args, **kwargs):
|
|
882
841
|
from pyopencl.capture_call import capture_kernel_call
|
|
@@ -891,16 +850,12 @@ def _add_functionality():
|
|
|
891
850
|
else:
|
|
892
851
|
return val
|
|
893
852
|
|
|
894
|
-
Kernel.__init__ = kernel_init
|
|
895
|
-
Kernel._setup = kernel__setup
|
|
896
853
|
Kernel.get_work_group_info = kernel_get_work_group_info
|
|
897
854
|
|
|
898
855
|
# FIXME: Possibly deprecate this version
|
|
899
856
|
Kernel.set_scalar_arg_dtypes = kernel_set_arg_types
|
|
900
857
|
Kernel.set_arg_types = kernel_set_arg_types
|
|
901
858
|
|
|
902
|
-
Kernel.set_args = kernel_set_args
|
|
903
|
-
Kernel.__call__ = kernel_call
|
|
904
859
|
Kernel.capture_call = kernel_capture_call
|
|
905
860
|
Kernel.get_info = kernel_get_info
|
|
906
861
|
|
|
@@ -934,10 +889,30 @@ def _add_functionality():
|
|
|
934
889
|
|
|
935
890
|
# {{{ Image
|
|
936
891
|
|
|
937
|
-
|
|
892
|
+
def image_init(
|
|
893
|
+
self, context, flags, format, shape=None, pitches=None,
|
|
894
|
+
hostbuf=None, is_array=False, buffer=None, *,
|
|
895
|
+
desc: ImageDescriptor | None = None,
|
|
896
|
+
_through_create_image: bool = False,
|
|
897
|
+
) -> None:
|
|
898
|
+
if hostbuf is not None and not \
|
|
899
|
+
(flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)):
|
|
900
|
+
warn("'hostbuf' was passed, but no memory flags to make use of it.",
|
|
901
|
+
stacklevel=2)
|
|
902
|
+
|
|
903
|
+
if desc is not None:
|
|
904
|
+
if shape is not None:
|
|
905
|
+
raise TypeError("shape may not be passed when using descriptor")
|
|
906
|
+
if pitches is not None:
|
|
907
|
+
raise TypeError("pitches may not be passed when using descriptor")
|
|
908
|
+
if is_array:
|
|
909
|
+
raise TypeError("is_array may not be passed when using descriptor")
|
|
910
|
+
if buffer is not None:
|
|
911
|
+
raise TypeError("is_array may not be passed when using descriptor")
|
|
938
912
|
|
|
939
|
-
|
|
940
|
-
|
|
913
|
+
Image._custom_init(self, context, flags, format, desc, hostbuf)
|
|
914
|
+
|
|
915
|
+
return
|
|
941
916
|
|
|
942
917
|
if shape is None and hostbuf is None:
|
|
943
918
|
raise Error("'shape' must be passed if 'hostbuf' is not given")
|
|
@@ -945,15 +920,16 @@ def _add_functionality():
|
|
|
945
920
|
if shape is None and hostbuf is not None:
|
|
946
921
|
shape = hostbuf.shape
|
|
947
922
|
|
|
948
|
-
if hostbuf is not None and not \
|
|
949
|
-
(flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)):
|
|
950
|
-
warn("'hostbuf' was passed, but no memory flags to make use of it.",
|
|
951
|
-
stacklevel=2)
|
|
952
|
-
|
|
953
923
|
if hostbuf is None and pitches is not None:
|
|
954
924
|
raise Error("'pitches' may only be given if 'hostbuf' is given")
|
|
955
925
|
|
|
956
926
|
if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2):
|
|
927
|
+
if not _through_create_image:
|
|
928
|
+
warn("Non-descriptor Image constructor called. "
|
|
929
|
+
"This will stop working in 2026. "
|
|
930
|
+
"Use create_image instead (with the same arguments).",
|
|
931
|
+
DeprecationWarning, stacklevel=2)
|
|
932
|
+
|
|
957
933
|
if buffer is not None and is_array:
|
|
958
934
|
raise ValueError(
|
|
959
935
|
"'buffer' and 'is_array' are mutually exclusive")
|
|
@@ -1002,7 +978,7 @@ def _add_functionality():
|
|
|
1002
978
|
desc.num_samples = 0 # per CL 1.2 spec
|
|
1003
979
|
desc.buffer = buffer
|
|
1004
980
|
|
|
1005
|
-
|
|
981
|
+
Image._custom_init(self, context, flags, format, desc, hostbuf)
|
|
1006
982
|
else:
|
|
1007
983
|
# legacy init for CL 1.1 and older
|
|
1008
984
|
if is_array:
|
|
@@ -1016,7 +992,7 @@ def _add_functionality():
|
|
|
1016
992
|
if buffer is not None:
|
|
1017
993
|
raise TypeError("'buffer' argument is not supported for CL < 1.2")
|
|
1018
994
|
|
|
1019
|
-
|
|
995
|
+
Image._custom_init(self, context, flags, format, shape,
|
|
1020
996
|
pitches, hostbuf)
|
|
1021
997
|
|
|
1022
998
|
class _ImageInfoGetter:
|
|
@@ -1030,9 +1006,9 @@ def _add_functionality():
|
|
|
1030
1006
|
def __getattr__(self, name):
|
|
1031
1007
|
try:
|
|
1032
1008
|
inf_attr = getattr(_cl.image_info, name.upper())
|
|
1033
|
-
except AttributeError:
|
|
1009
|
+
except AttributeError as err:
|
|
1034
1010
|
raise AttributeError("%s has no attribute '%s'"
|
|
1035
|
-
% (type(self), name))
|
|
1011
|
+
% (type(self), name)) from err
|
|
1036
1012
|
else:
|
|
1037
1013
|
return self.event.get_image_info(inf_attr)
|
|
1038
1014
|
|
|
@@ -1055,7 +1031,7 @@ def _add_functionality():
|
|
|
1055
1031
|
def error_str(self):
|
|
1056
1032
|
val = self.what
|
|
1057
1033
|
try:
|
|
1058
|
-
val.routine
|
|
1034
|
+
val.routine # noqa: B018
|
|
1059
1035
|
except AttributeError:
|
|
1060
1036
|
return str(val)
|
|
1061
1037
|
else:
|
|
@@ -1151,8 +1127,8 @@ def _add_functionality():
|
|
|
1151
1127
|
"""
|
|
1152
1128
|
|
|
1153
1129
|
def svmptr_map(self, queue: CommandQueue, *, flags: int, is_blocking: bool =
|
|
1154
|
-
True, wait_for:
|
|
1155
|
-
size:
|
|
1130
|
+
True, wait_for: Sequence[Event] | None = None,
|
|
1131
|
+
size: Event | None = None) -> SVMMap:
|
|
1156
1132
|
"""
|
|
1157
1133
|
:arg is_blocking: If *False*, subsequent code must wait on
|
|
1158
1134
|
:attr:`SVMMap.event` in the returned object before accessing the
|
|
@@ -1170,8 +1146,8 @@ def _add_functionality():
|
|
|
1170
1146
|
size=size))
|
|
1171
1147
|
|
|
1172
1148
|
def svmptr_map_ro(self, queue: CommandQueue, *, is_blocking: bool = True,
|
|
1173
|
-
wait_for:
|
|
1174
|
-
size:
|
|
1149
|
+
wait_for: Sequence[Event] | None = None,
|
|
1150
|
+
size: int | None = None) -> SVMMap:
|
|
1175
1151
|
"""Like :meth:`map`, but with *flags* set for a read-only map.
|
|
1176
1152
|
"""
|
|
1177
1153
|
|
|
@@ -1179,8 +1155,8 @@ def _add_functionality():
|
|
|
1179
1155
|
is_blocking=is_blocking, wait_for=wait_for, size=size)
|
|
1180
1156
|
|
|
1181
1157
|
def svmptr_map_rw(self, queue: CommandQueue, *, is_blocking: bool = True,
|
|
1182
|
-
wait_for:
|
|
1183
|
-
size:
|
|
1158
|
+
wait_for: Sequence[Event] | None = None,
|
|
1159
|
+
size: int | None = None) -> SVMMap:
|
|
1184
1160
|
"""Like :meth:`map`, but with *flags* set for a read-only map.
|
|
1185
1161
|
"""
|
|
1186
1162
|
|
|
@@ -1190,8 +1166,8 @@ def _add_functionality():
|
|
|
1190
1166
|
def svmptr__enqueue_unmap(self, queue, wait_for=None):
|
|
1191
1167
|
return _cl._enqueue_svm_unmap(queue, self, wait_for)
|
|
1192
1168
|
|
|
1193
|
-
def svmptr_as_buffer(self, ctx: Context, *, flags:
|
|
1194
|
-
size:
|
|
1169
|
+
def svmptr_as_buffer(self, ctx: Context, *, flags: int | None = None,
|
|
1170
|
+
size: int | None = None) -> Buffer:
|
|
1195
1171
|
"""
|
|
1196
1172
|
:arg ctx: a :class:`Context`
|
|
1197
1173
|
:arg flags: a combination of :class:`pyopencl.map_flags`, defaults to
|
|
@@ -1359,15 +1335,6 @@ def _add_functionality():
|
|
|
1359
1335
|
|
|
1360
1336
|
# }}}
|
|
1361
1337
|
|
|
1362
|
-
if get_cl_header_version() >= (2, 0):
|
|
1363
|
-
svm_old_init = SVM.__init__
|
|
1364
|
-
|
|
1365
|
-
def svm_init(self, mem):
|
|
1366
|
-
if get_cl_header_version() >= (2, 0):
|
|
1367
|
-
svm_old_init(self, mem)
|
|
1368
|
-
|
|
1369
|
-
self.mem = mem
|
|
1370
|
-
|
|
1371
1338
|
def svm_map(self, queue, flags, is_blocking=True, wait_for=None):
|
|
1372
1339
|
"""
|
|
1373
1340
|
:arg is_blocking: If *False*, subsequent code must wait on
|
|
@@ -1404,7 +1371,6 @@ def _add_functionality():
|
|
|
1404
1371
|
return _cl._enqueue_svm_unmap(queue, self, wait_for)
|
|
1405
1372
|
|
|
1406
1373
|
if get_cl_header_version() >= (2, 0):
|
|
1407
|
-
SVM.__init__ = svm_init
|
|
1408
1374
|
SVM.map = svm_map
|
|
1409
1375
|
SVM.map_ro = svm_map_ro
|
|
1410
1376
|
SVM.map_rw = svm_map_rw
|
|
@@ -1532,10 +1498,48 @@ if get_cl_header_version() >= (2, 0):
|
|
|
1532
1498
|
# }}}
|
|
1533
1499
|
|
|
1534
1500
|
|
|
1501
|
+
# {{{ create_image
|
|
1502
|
+
|
|
1503
|
+
def create_image(context, flags, format, shape=None, pitches=None,
|
|
1504
|
+
hostbuf=None, is_array=False, buffer=None) -> Image:
|
|
1505
|
+
"""
|
|
1506
|
+
See :class:`mem_flags` for values of *flags*.
|
|
1507
|
+
*shape* is a 2- or 3-tuple. *format* is an instance of :class:`ImageFormat`.
|
|
1508
|
+
*pitches* is a 1-tuple for 2D images and a 2-tuple for 3D images, indicating
|
|
1509
|
+
the distance in bytes from one scan line to the next, and from one 2D image
|
|
1510
|
+
slice to the next.
|
|
1511
|
+
|
|
1512
|
+
If *hostbuf* is given and *shape* is *None*, then *hostbuf.shape* is
|
|
1513
|
+
used as the *shape* parameter.
|
|
1514
|
+
|
|
1515
|
+
:class:`Image` inherits from :class:`MemoryObject`.
|
|
1516
|
+
|
|
1517
|
+
.. note::
|
|
1518
|
+
|
|
1519
|
+
If you want to load images from :class:`numpy.ndarray` instances or read images
|
|
1520
|
+
back into them, be aware that OpenCL images expect the *x* dimension to vary
|
|
1521
|
+
fastest, whereas in the default (C) order of :mod:`numpy` arrays, the last index
|
|
1522
|
+
varies fastest. If your array is arranged in the wrong order in memory,
|
|
1523
|
+
there are two possible fixes for this:
|
|
1524
|
+
|
|
1525
|
+
* Convert the array to Fortran (column-major) order using :func:`numpy.asarray`.
|
|
1526
|
+
|
|
1527
|
+
* Pass *ary.T.copy()* to the image creation function.
|
|
1528
|
+
|
|
1529
|
+
.. versionadded:: 2024.3
|
|
1530
|
+
"""
|
|
1531
|
+
|
|
1532
|
+
return Image(context, flags, format, shape=shape, pitches=pitches,
|
|
1533
|
+
hostbuf=hostbuf, is_array=is_array, buffer=buffer,
|
|
1534
|
+
_through_create_image=True)
|
|
1535
|
+
|
|
1536
|
+
# }}}
|
|
1537
|
+
|
|
1538
|
+
|
|
1535
1539
|
# {{{ create_some_context
|
|
1536
1540
|
|
|
1537
|
-
def choose_devices(interactive:
|
|
1538
|
-
answers:
|
|
1541
|
+
def choose_devices(interactive: bool | None = None,
|
|
1542
|
+
answers: list[str] | None = None) -> list[Device]:
|
|
1539
1543
|
"""
|
|
1540
1544
|
Choose :class:`Device` instances 'somehow'.
|
|
1541
1545
|
|
|
@@ -1647,7 +1651,8 @@ def choose_devices(interactive: Optional[bool] = None,
|
|
|
1647
1651
|
|
|
1648
1652
|
if not devices:
|
|
1649
1653
|
raise Error("no devices found")
|
|
1650
|
-
elif len(devices) == 1:
|
|
1654
|
+
elif len(devices) == 1 and not answers:
|
|
1655
|
+
cc_print(f"Choosing only available device: {devices[0]}")
|
|
1651
1656
|
pass
|
|
1652
1657
|
else:
|
|
1653
1658
|
if not answers:
|
|
@@ -1671,13 +1676,13 @@ def choose_devices(interactive: Optional[bool] = None,
|
|
|
1671
1676
|
|
|
1672
1677
|
if answers:
|
|
1673
1678
|
raise RuntimeError("not all provided choices were used by "
|
|
1674
|
-
"
|
|
1679
|
+
"choose_devices. (left over: '%s')" % ":".join(answers))
|
|
1675
1680
|
|
|
1676
1681
|
return devices
|
|
1677
1682
|
|
|
1678
1683
|
|
|
1679
|
-
def create_some_context(interactive:
|
|
1680
|
-
answers:
|
|
1684
|
+
def create_some_context(interactive: bool | None = None,
|
|
1685
|
+
answers: list[str] | None = None) -> Context:
|
|
1681
1686
|
"""
|
|
1682
1687
|
Create a :class:`Context` 'somehow'.
|
|
1683
1688
|
|
|
@@ -2066,9 +2071,9 @@ def enqueue_copy(queue, dest, src, **kwargs):
|
|
|
2066
2071
|
# {{{ enqueue_fill
|
|
2067
2072
|
|
|
2068
2073
|
def enqueue_fill(queue: CommandQueue,
|
|
2069
|
-
dest:
|
|
2074
|
+
dest: MemoryObject | SVMPointer,
|
|
2070
2075
|
pattern: Any, size: int, *, offset: int = 0,
|
|
2071
|
-
wait_for:
|
|
2076
|
+
wait_for: Sequence[Event] | None = None) -> Event:
|
|
2072
2077
|
"""
|
|
2073
2078
|
.. versionadded:: 2022.2
|
|
2074
2079
|
"""
|
|
@@ -2097,7 +2102,7 @@ DTYPE_TO_CHANNEL_TYPE = {
|
|
|
2097
2102
|
np.dtype(np.uint8): channel_type.UNSIGNED_INT8,
|
|
2098
2103
|
}
|
|
2099
2104
|
try:
|
|
2100
|
-
np.float16
|
|
2105
|
+
np.float16 # noqa: B018
|
|
2101
2106
|
except Exception:
|
|
2102
2107
|
pass
|
|
2103
2108
|
else:
|
|
@@ -2159,7 +2164,7 @@ def image_from_array(ctx, ary, num_channels=None, mode="r", norm_int=False):
|
|
|
2159
2164
|
else:
|
|
2160
2165
|
channel_type = DTYPE_TO_CHANNEL_TYPE[dtype]
|
|
2161
2166
|
|
|
2162
|
-
return
|
|
2167
|
+
return create_image(ctx, mode_flags | mem_flags.COPY_HOST_PTR,
|
|
2163
2168
|
ImageFormat(img_format, channel_type),
|
|
2164
2169
|
shape=shape[::-1], pitches=strides[::-1][1:],
|
|
2165
2170
|
hostbuf=ary)
|
|
@@ -2272,12 +2277,12 @@ def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None, queue=None):
|
|
|
2272
2277
|
s = 1
|
|
2273
2278
|
for dim in shape:
|
|
2274
2279
|
s *= dim
|
|
2275
|
-
except TypeError:
|
|
2280
|
+
except TypeError as err:
|
|
2276
2281
|
admissible_types = (int, np.integer)
|
|
2277
2282
|
|
|
2278
2283
|
if not isinstance(shape, admissible_types):
|
|
2279
2284
|
raise TypeError("shape must either be iterable or "
|
|
2280
|
-
"castable to an integer")
|
|
2285
|
+
"castable to an integer") from err
|
|
2281
2286
|
s = shape
|
|
2282
2287
|
shape = (shape,)
|
|
2283
2288
|
|
|
@@ -2392,14 +2397,14 @@ def fsvm_empty_like(ctx, ary, alignment=None):
|
|
|
2392
2397
|
# }}}
|
|
2393
2398
|
|
|
2394
2399
|
|
|
2395
|
-
_KERNEL_ARG_CLASSES:
|
|
2400
|
+
_KERNEL_ARG_CLASSES: tuple[type, ...] = (
|
|
2396
2401
|
MemoryObjectHolder,
|
|
2397
2402
|
Sampler,
|
|
2398
2403
|
CommandQueue,
|
|
2399
2404
|
LocalMemory,
|
|
2400
2405
|
)
|
|
2401
2406
|
if get_cl_header_version() >= (2, 0):
|
|
2402
|
-
_KERNEL_ARG_CLASSES = _KERNEL_ARG_CLASSES
|
|
2407
|
+
_KERNEL_ARG_CLASSES = (*_KERNEL_ARG_CLASSES, SVM)
|
|
2403
2408
|
|
|
2404
2409
|
|
|
2405
2410
|
# vim: foldmethod=marker
|
pyopencl/_cl.cp310-win_amd64.pyd
CHANGED
|
Binary file
|
pyopencl/_mymako.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
try:
|
|
2
2
|
import mako.template # noqa: F401
|
|
3
|
-
except ImportError:
|
|
3
|
+
except ImportError as err:
|
|
4
4
|
raise ImportError(
|
|
5
5
|
"Some of PyOpenCL's facilities require the Mako templating engine.\n"
|
|
6
6
|
"You or a piece of software you have used has tried to call such a\n"
|
|
@@ -9,6 +9,6 @@ except ImportError:
|
|
|
9
9
|
"- easy_install Mako\n"
|
|
10
10
|
"- pip install Mako\n"
|
|
11
11
|
"- aptitude install python-mako\n"
|
|
12
|
-
"\nor whatever else is appropriate for your system.")
|
|
12
|
+
"\nor whatever else is appropriate for your system.") from err
|
|
13
13
|
|
|
14
|
-
from mako import * # noqa:
|
|
14
|
+
from mako import * # noqa: F403
|
pyopencl/algorithm.py
CHANGED
|
@@ -34,6 +34,7 @@ from typing import Optional
|
|
|
34
34
|
|
|
35
35
|
import numpy as np
|
|
36
36
|
from mako.template import Template
|
|
37
|
+
|
|
37
38
|
from pytools import memoize, memoize_method
|
|
38
39
|
|
|
39
40
|
import pyopencl as cl
|
|
@@ -976,8 +977,10 @@ class ListOfListsBuilder:
|
|
|
976
977
|
knl = getattr(prg, kernel_name)
|
|
977
978
|
|
|
978
979
|
from pyopencl.tools import get_arg_list_scalar_arg_dtypes
|
|
979
|
-
knl.set_scalar_arg_dtypes(
|
|
980
|
-
kernel_list_args
|
|
980
|
+
knl.set_scalar_arg_dtypes([
|
|
981
|
+
*get_arg_list_scalar_arg_dtypes([*kernel_list_args, *self.arg_decls]),
|
|
982
|
+
index_dtype
|
|
983
|
+
])
|
|
981
984
|
|
|
982
985
|
return knl
|
|
983
986
|
|
|
@@ -1049,8 +1052,9 @@ class ListOfListsBuilder:
|
|
|
1049
1052
|
knl = getattr(prg, kernel_name)
|
|
1050
1053
|
|
|
1051
1054
|
from pyopencl.tools import get_arg_list_scalar_arg_dtypes
|
|
1052
|
-
knl.set_scalar_arg_dtypes(
|
|
1053
|
-
kernel_list_args+self.arg_decls)
|
|
1055
|
+
knl.set_scalar_arg_dtypes([
|
|
1056
|
+
*get_arg_list_scalar_arg_dtypes(kernel_list_args + self.arg_decls),
|
|
1057
|
+
index_dtype])
|
|
1054
1058
|
|
|
1055
1059
|
return knl
|
|
1056
1060
|
|
|
@@ -1226,14 +1230,13 @@ class ListOfListsBuilder:
|
|
|
1226
1230
|
queue, (n_objects + 1,), index_dtype, allocator=allocator)
|
|
1227
1231
|
info_record.compressed_indices[0] = 0
|
|
1228
1232
|
|
|
1229
|
-
compress_events[name] = compress_kernel(
|
|
1230
|
-
# pylint: disable=possibly-used-before-assignment
|
|
1233
|
+
compress_events[name] = compress_kernel( # pylint: disable=possibly-used-before-assignment
|
|
1231
1234
|
info_record.starts,
|
|
1232
1235
|
compressed_counts,
|
|
1233
1236
|
info_record.nonempty_indices,
|
|
1234
1237
|
info_record.compressed_indices,
|
|
1235
1238
|
info_record.num_nonempty_lists,
|
|
1236
|
-
wait_for=[count_event
|
|
1239
|
+
wait_for=[count_event, *info_record.compressed_indices.events])
|
|
1237
1240
|
|
|
1238
1241
|
info_record.starts = compressed_counts
|
|
1239
1242
|
|