pyopencl 2024.2.2__cp39-cp39-macosx_11_0_arm64.whl → 2024.2.5__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (99) hide show
  1. pyopencl/__init__.py +16 -4
  2. pyopencl/_cl.cpython-39-darwin.so +0 -0
  3. pyopencl/algorithm.py +3 -1
  4. pyopencl/bitonic_sort.py +2 -0
  5. pyopencl/characterize/__init__.py +23 -0
  6. pyopencl/compyte/.git +1 -0
  7. pyopencl/compyte/.gitignore +21 -0
  8. pyopencl/compyte/ndarray/Makefile +31 -0
  9. pyopencl/compyte/ndarray/gpu_ndarray.h +35 -0
  10. pyopencl/compyte/ndarray/pygpu_language.h +207 -0
  11. pyopencl/compyte/ndarray/pygpu_language_cuda.cu +622 -0
  12. pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +317 -0
  13. pyopencl/compyte/ndarray/pygpu_ndarray.cpp +1546 -0
  14. pyopencl/compyte/ndarray/pygpu_ndarray.h +71 -0
  15. pyopencl/compyte/ndarray/pygpu_ndarray_object.h +232 -0
  16. pyopencl/tools.py +60 -56
  17. pyopencl/version.py +9 -3
  18. {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/METADATA +14 -14
  19. pyopencl-2024.2.5.dist-info/RECORD +56 -0
  20. {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/WHEEL +1 -1
  21. pyopencl-2024.2.2.data/data/CITATION.cff +0 -74
  22. pyopencl-2024.2.2.data/data/CMakeLists.txt +0 -83
  23. pyopencl-2024.2.2.data/data/Makefile.in +0 -21
  24. pyopencl-2024.2.2.data/data/README.rst +0 -70
  25. pyopencl-2024.2.2.data/data/README_SETUP.txt +0 -34
  26. pyopencl-2024.2.2.data/data/aksetup_helper.py +0 -1013
  27. pyopencl-2024.2.2.data/data/configure.py +0 -6
  28. pyopencl-2024.2.2.data/data/contrib/cldis.py +0 -91
  29. pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/README +0 -29
  30. pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/translate.py +0 -1441
  31. pyopencl-2024.2.2.data/data/contrib/pyopencl.vim +0 -84
  32. pyopencl-2024.2.2.data/data/doc/Makefile +0 -23
  33. pyopencl-2024.2.2.data/data/doc/algorithm.rst +0 -214
  34. pyopencl-2024.2.2.data/data/doc/array.rst +0 -305
  35. pyopencl-2024.2.2.data/data/doc/conf.py +0 -26
  36. pyopencl-2024.2.2.data/data/doc/howto.rst +0 -105
  37. pyopencl-2024.2.2.data/data/doc/index.rst +0 -137
  38. pyopencl-2024.2.2.data/data/doc/make_constants.py +0 -561
  39. pyopencl-2024.2.2.data/data/doc/misc.rst +0 -885
  40. pyopencl-2024.2.2.data/data/doc/runtime.rst +0 -51
  41. pyopencl-2024.2.2.data/data/doc/runtime_const.rst +0 -30
  42. pyopencl-2024.2.2.data/data/doc/runtime_gl.rst +0 -78
  43. pyopencl-2024.2.2.data/data/doc/runtime_memory.rst +0 -527
  44. pyopencl-2024.2.2.data/data/doc/runtime_platform.rst +0 -184
  45. pyopencl-2024.2.2.data/data/doc/runtime_program.rst +0 -364
  46. pyopencl-2024.2.2.data/data/doc/runtime_queue.rst +0 -182
  47. pyopencl-2024.2.2.data/data/doc/subst.rst +0 -36
  48. pyopencl-2024.2.2.data/data/doc/tools.rst +0 -4
  49. pyopencl-2024.2.2.data/data/doc/types.rst +0 -42
  50. pyopencl-2024.2.2.data/data/examples/black-hole-accretion.py +0 -2227
  51. pyopencl-2024.2.2.data/data/examples/demo-struct-reduce.py +0 -75
  52. pyopencl-2024.2.2.data/data/examples/demo.py +0 -39
  53. pyopencl-2024.2.2.data/data/examples/demo_array.py +0 -32
  54. pyopencl-2024.2.2.data/data/examples/demo_array_svm.py +0 -37
  55. pyopencl-2024.2.2.data/data/examples/demo_elementwise.py +0 -34
  56. pyopencl-2024.2.2.data/data/examples/demo_elementwise_complex.py +0 -53
  57. pyopencl-2024.2.2.data/data/examples/demo_mandelbrot.py +0 -183
  58. pyopencl-2024.2.2.data/data/examples/demo_meta_codepy.py +0 -56
  59. pyopencl-2024.2.2.data/data/examples/demo_meta_template.py +0 -55
  60. pyopencl-2024.2.2.data/data/examples/dump-performance.py +0 -38
  61. pyopencl-2024.2.2.data/data/examples/dump-properties.py +0 -86
  62. pyopencl-2024.2.2.data/data/examples/gl_interop_demo.py +0 -84
  63. pyopencl-2024.2.2.data/data/examples/gl_particle_animation.py +0 -218
  64. pyopencl-2024.2.2.data/data/examples/ipython-demo.ipynb +0 -203
  65. pyopencl-2024.2.2.data/data/examples/median-filter.py +0 -99
  66. pyopencl-2024.2.2.data/data/examples/n-body.py +0 -1070
  67. pyopencl-2024.2.2.data/data/examples/narray.py +0 -37
  68. pyopencl-2024.2.2.data/data/examples/noisyImage.jpg +0 -0
  69. pyopencl-2024.2.2.data/data/examples/pi-monte-carlo.py +0 -1166
  70. pyopencl-2024.2.2.data/data/examples/svm.py +0 -82
  71. pyopencl-2024.2.2.data/data/examples/transpose.py +0 -229
  72. pyopencl-2024.2.2.data/data/pytest.ini +0 -3
  73. pyopencl-2024.2.2.data/data/src/bitlog.cpp +0 -51
  74. pyopencl-2024.2.2.data/data/src/bitlog.hpp +0 -83
  75. pyopencl-2024.2.2.data/data/src/clinfo_ext.h +0 -134
  76. pyopencl-2024.2.2.data/data/src/mempool.hpp +0 -444
  77. pyopencl-2024.2.2.data/data/src/pyopencl_ext.h +0 -77
  78. pyopencl-2024.2.2.data/data/src/tools.hpp +0 -90
  79. pyopencl-2024.2.2.data/data/src/wrap_cl.cpp +0 -61
  80. pyopencl-2024.2.2.data/data/src/wrap_cl.hpp +0 -5853
  81. pyopencl-2024.2.2.data/data/src/wrap_cl_part_1.cpp +0 -369
  82. pyopencl-2024.2.2.data/data/src/wrap_cl_part_2.cpp +0 -702
  83. pyopencl-2024.2.2.data/data/src/wrap_constants.cpp +0 -1274
  84. pyopencl-2024.2.2.data/data/src/wrap_helpers.hpp +0 -213
  85. pyopencl-2024.2.2.data/data/src/wrap_mempool.cpp +0 -738
  86. pyopencl-2024.2.2.data/data/test/add-vectors-32.spv +0 -0
  87. pyopencl-2024.2.2.data/data/test/add-vectors-64.spv +0 -0
  88. pyopencl-2024.2.2.data/data/test/empty-header.h +0 -1
  89. pyopencl-2024.2.2.data/data/test/test_algorithm.py +0 -1180
  90. pyopencl-2024.2.2.data/data/test/test_array.py +0 -2392
  91. pyopencl-2024.2.2.data/data/test/test_arrays_in_structs.py +0 -100
  92. pyopencl-2024.2.2.data/data/test/test_clmath.py +0 -529
  93. pyopencl-2024.2.2.data/data/test/test_clrandom.py +0 -75
  94. pyopencl-2024.2.2.data/data/test/test_enqueue_copy.py +0 -271
  95. pyopencl-2024.2.2.data/data/test/test_wrapper.py +0 -1565
  96. pyopencl-2024.2.2.dist-info/LICENSE +0 -282
  97. pyopencl-2024.2.2.dist-info/RECORD +0 -123
  98. pyopencl-2024.2.2.dist-info/top_level.txt +0 -1
  99. {pyopencl-2024.2.2.data/data → pyopencl-2024.2.5.dist-info/licenses}/LICENSE +0 -0
@@ -1,1565 +0,0 @@
1
- __copyright__ = "Copyright (C) 2009 Andreas Kloeckner"
2
-
3
- __license__ = """
4
- Permission is hereby granted, free of charge, to any person obtaining a copy
5
- of this software and associated documentation files (the "Software"), to deal
6
- in the Software without restriction, including without limitation the rights
7
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
- copies of the Software, and to permit persons to whom the Software is
9
- furnished to do so, subject to the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be included in
12
- all copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
- THE SOFTWARE.
21
- """
22
-
23
- import numpy as np
24
- import numpy.linalg as la
25
- import pytest
26
-
27
- import pyopencl as cl
28
- import pyopencl.array as cl_array
29
- import pyopencl.clrandom
30
- import pyopencl.cltypes as cltypes
31
- from pyopencl.characterize import get_pocl_version
32
- from pyopencl.tools import \
33
- pytest_generate_tests_for_pyopencl as pytest_generate_tests # noqa: F401
34
- from pyopencl.tools import DeferredAllocator, ImmediateAllocator
35
-
36
-
37
- def _xfail_if_pocl(plat, up_to_version, msg="unsupported by PoCL"):
38
- if plat.vendor == "The pocl project":
39
- if up_to_version is None or get_pocl_version(plat) <= up_to_version:
40
- pytest.xfail(msg)
41
-
42
-
43
- def _xfail_if_pocl_gpu(device, what):
44
- if device.platform.vendor == "The pocl project" \
45
- and device.type & cl.device_type.GPU:
46
- pytest.xfail(f"PoCL's {what} support don't work right on Nvidia GPUs, "
47
- "at least the Titan V, as of PoCL 1.6, 2021-01-20")
48
-
49
-
50
- # {{{ test_get_info
51
-
52
- def test_get_info(ctx_factory):
53
- ctx = ctx_factory()
54
- device, = ctx.devices
55
- platform = device.platform
56
-
57
- device.persistent_unique_id
58
- device.hashable_model_and_version_identifier
59
-
60
- failure_count = [0]
61
-
62
- pocl_quirks = [
63
- (cl.Buffer, cl.mem_info.OFFSET),
64
- (cl.Program, cl.program_info.BINARIES),
65
- (cl.Program, cl.program_info.BINARY_SIZES),
66
- ]
67
- if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2):
68
- pocl_quirks.extend([
69
- (cl.Program, cl.program_info.KERNEL_NAMES),
70
- (cl.Program, cl.program_info.NUM_KERNELS),
71
- ])
72
- CRASH_QUIRKS = [ # noqa: N806
73
- (("NVIDIA Corporation", "NVIDIA CUDA",
74
- "OpenCL 1.0 CUDA 3.0.1"),
75
- [
76
- (cl.Event, cl.event_info.COMMAND_QUEUE),
77
- ]),
78
- (("NVIDIA Corporation", "NVIDIA CUDA",
79
- "OpenCL 1.2 CUDA 7.5"),
80
- [
81
- (cl.Buffer, getattr(cl.mem_info, "USES_SVM_POINTER", None)),
82
- ]),
83
- (("The pocl project", "Portable Computing Language",
84
- "OpenCL 1.2 pocl 0.8-pre"),
85
- pocl_quirks),
86
- (("The pocl project", "Portable Computing Language",
87
- "OpenCL 1.2 pocl 0.8"),
88
- pocl_quirks),
89
- (("The pocl project", "Portable Computing Language",
90
- "OpenCL 1.2 pocl 0.9-pre"),
91
- pocl_quirks),
92
- (("The pocl project", "Portable Computing Language",
93
- "OpenCL 1.2 pocl 0.9"),
94
- pocl_quirks),
95
- (("The pocl project", "Portable Computing Language",
96
- "OpenCL 1.2 pocl 0.10-pre"),
97
- pocl_quirks),
98
- (("The pocl project", "Portable Computing Language",
99
- "OpenCL 1.2 pocl 0.10"),
100
- pocl_quirks),
101
- (("Apple", "Apple",
102
- "OpenCL 1.2"),
103
- [
104
- (cl.Program, cl.program_info.SOURCE),
105
- ]),
106
- ]
107
- QUIRKS = [] # noqa: N806
108
-
109
- def find_quirk(quirk_list, cl_obj, info):
110
- for (vendor, name, version), quirks in quirk_list:
111
- if (
112
- vendor == platform.vendor
113
- and name == platform.name
114
- and platform.version.startswith(version)):
115
- for quirk_cls, quirk_info in quirks:
116
- if (isinstance(cl_obj, quirk_cls)
117
- and quirk_info == info):
118
- return True
119
-
120
- return False
121
-
122
- def do_test(cl_obj, info_cls, func=None, try_attr_form=True):
123
- if func is None:
124
- func = cl_obj.get_info
125
-
126
- for info_name in dir(info_cls):
127
- if not info_name.startswith("_") and info_name != "to_string":
128
- print(info_cls, info_name)
129
- info = getattr(info_cls, info_name)
130
-
131
- if find_quirk(CRASH_QUIRKS, cl_obj, info):
132
- print("not executing get_info", type(cl_obj), info_name)
133
- print("(known crash quirk for %s)" % platform.name)
134
- continue
135
-
136
- try:
137
- func(info)
138
- except Exception:
139
- msg = "failed get_info", type(cl_obj), info_name
140
-
141
- if find_quirk(QUIRKS, cl_obj, info):
142
- msg += ("(known quirk for %s)" % platform.name)
143
- else:
144
- failure_count[0] += 1
145
-
146
- if try_attr_form:
147
- try:
148
- getattr(cl_obj, info_name.lower())
149
- except Exception:
150
- print("failed attr-based get_info", type(cl_obj), info_name)
151
-
152
- if find_quirk(QUIRKS, cl_obj, info):
153
- print("(known quirk for %s)" % platform.name)
154
- else:
155
- failure_count[0] += 1
156
-
157
- do_test(platform, cl.platform_info)
158
- do_test(device, cl.device_info)
159
- do_test(ctx, cl.context_info)
160
-
161
- props = 0
162
- if (device.queue_properties
163
- & cl.command_queue_properties.PROFILING_ENABLE):
164
- profiling = True
165
- props = cl.command_queue_properties.PROFILING_ENABLE
166
- queue = cl.CommandQueue(ctx,
167
- properties=props)
168
- do_test(queue, cl.command_queue_info)
169
-
170
- prg = cl.Program(ctx, """
171
- __kernel void sum(__global float *a)
172
- { a[get_global_id(0)] *= 2; }
173
- """).build()
174
- do_test(prg, cl.program_info)
175
- do_test(prg, cl.program_build_info,
176
- lambda info: prg.get_build_info(device, info),
177
- try_attr_form=False)
178
-
179
- n = 2000
180
- a_buf = cl.Buffer(ctx, 0, n*4)
181
-
182
- do_test(a_buf, cl.mem_info)
183
-
184
- kernel = prg.all_kernels()[0]
185
- do_test(kernel, cl.kernel_info)
186
-
187
- for _i in range(2): # exercise cache
188
- for info_name in dir(cl.kernel_work_group_info):
189
- if not info_name.startswith("_") and info_name != "to_string":
190
- try:
191
- print("kernel_wg_info: %s" % info_name)
192
- kernel.get_work_group_info(
193
- getattr(cl.kernel_work_group_info, info_name),
194
- device)
195
- except cl.LogicError as err:
196
- print("<error: %s>" % err)
197
-
198
- evt = kernel(queue, (n,), None, a_buf)
199
- do_test(evt, cl.event_info)
200
-
201
- if profiling:
202
- evt.wait()
203
- do_test(evt, cl.profiling_info,
204
- lambda info: evt.get_profiling_info(info),
205
- try_attr_form=False)
206
-
207
- # crashes on intel...
208
- # and pocl does not support CL_ADDRESS_CLAMP
209
- if device.image_support and platform.vendor not in [
210
- "Intel(R) Corporation",
211
- "The pocl project",
212
- ]:
213
- smp = cl.Sampler(ctx, False,
214
- cl.addressing_mode.CLAMP,
215
- cl.filter_mode.NEAREST)
216
- do_test(smp, cl.sampler_info)
217
-
218
- img_format = cl.get_supported_image_formats(
219
- ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]
220
-
221
- img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
222
- assert img.shape == (128, 256)
223
-
224
- img.depth
225
- img.image.depth
226
- do_test(img, cl.image_info,
227
- lambda info: img.get_image_info(info))
228
-
229
- # }}}
230
-
231
-
232
- # {{{ test_int_ptr
233
-
234
- def test_int_ptr(ctx_factory):
235
- def do_test(obj):
236
- new_obj = type(obj).from_int_ptr(obj.int_ptr)
237
- assert obj == new_obj
238
- assert type(obj) is type(new_obj)
239
-
240
- ctx = ctx_factory()
241
- device, = ctx.devices
242
- platform = device.platform
243
- do_test(device)
244
- do_test(platform)
245
- do_test(ctx)
246
-
247
- queue = cl.CommandQueue(ctx)
248
- do_test(queue)
249
-
250
- evt = cl.enqueue_marker(queue)
251
- do_test(evt)
252
-
253
- prg = cl.Program(ctx, """
254
- __kernel void sum(__global float *a)
255
- { a[get_global_id(0)] *= 2; }
256
- """).build()
257
-
258
- do_test(prg)
259
- do_test(prg.sum)
260
-
261
- n = 2000
262
- a_buf = cl.Buffer(ctx, 0, n*4)
263
- do_test(a_buf)
264
-
265
- # crashes on intel...
266
- # and pocl does not support CL_ADDRESS_CLAMP
267
- if device.image_support and platform.vendor not in [
268
- "Intel(R) Corporation",
269
- "The pocl project",
270
- ]:
271
- smp = cl.Sampler(ctx, False,
272
- cl.addressing_mode.CLAMP,
273
- cl.filter_mode.NEAREST)
274
- do_test(smp)
275
-
276
- img_format = cl.get_supported_image_formats(
277
- ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]
278
-
279
- img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
280
- do_test(img)
281
-
282
- # }}}
283
-
284
-
285
- # {{{ test_invalid_kernel_names_cause_failures
286
-
287
- def test_invalid_kernel_names_cause_failures(ctx_factory):
288
- ctx = ctx_factory()
289
- device = ctx.devices[0]
290
- prg = cl.Program(ctx, """
291
- __kernel void sum(__global float *a)
292
- { a[get_global_id(0)] *= 2; }
293
- """).build()
294
-
295
- try:
296
- prg.sam
297
- raise RuntimeError("invalid kernel name did not cause error")
298
- except AttributeError:
299
- pass
300
- except RuntimeError:
301
- if "Intel" in device.platform.vendor:
302
- from pytest import xfail
303
- xfail("weird exception from OpenCL implementation "
304
- "on invalid kernel name--are you using "
305
- "Intel's implementation? (if so, known bug in Intel CL)")
306
- else:
307
- raise
308
-
309
- # }}}
310
-
311
-
312
- # {{{ test_image_format_constructor
313
-
314
- def test_image_format_constructor():
315
- # doesn't need image support to succeed
316
- iform = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT)
317
-
318
- assert iform.channel_order == cl.channel_order.RGBA
319
- assert iform.channel_data_type == cl.channel_type.FLOAT
320
-
321
- if not cl._PYPY:
322
- assert not hasattr(iform, "__dict__")
323
-
324
- # }}}
325
-
326
-
327
- # {{{ test_device_topology_amd_constructor
328
-
329
- def test_device_topology_amd_constructor():
330
- # doesn't need cl_amd_device_attribute_query support to succeed
331
- topol = cl.DeviceTopologyAmd(3, 4, 5)
332
-
333
- assert topol.bus == 3
334
- assert topol.device == 4
335
- assert topol.function == 5
336
-
337
- if not cl._PYPY:
338
- assert not hasattr(topol, "__dict__")
339
-
340
- # }}}
341
-
342
-
343
- # {{{ test_nonempty_supported_image_formats
344
-
345
- def test_nonempty_supported_image_formats(ctx_factory):
346
- context = ctx_factory()
347
-
348
- device = context.devices[0]
349
-
350
- if device.image_support:
351
- assert len(cl.get_supported_image_formats(
352
- context, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)) > 0
353
- else:
354
- from pytest import skip
355
- skip("images not supported on %s" % device.name)
356
-
357
- # }}}
358
-
359
-
360
- # {{{ test_that_python_args_fail
361
-
362
- def test_that_python_args_fail(ctx_factory):
363
- context = ctx_factory()
364
-
365
- prg = cl.Program(context, """
366
- __kernel void mult(__global float *a, float b, int c)
367
- { a[get_global_id(0)] *= (b+c); }
368
- """).build()
369
-
370
- rng = np.random.default_rng(seed=42)
371
- a = rng.random(50000)
372
- queue = cl.CommandQueue(context)
373
- mf = cl.mem_flags
374
- a_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)
375
-
376
- knl = cl.Kernel(prg, "mult")
377
- try:
378
- knl(queue, a.shape, None, a_buf, 2, 3)
379
- raise AssertionError(
380
- "PyOpenCL should not accept bare Python types as arguments")
381
- except cl.LogicError:
382
- pass
383
-
384
- try:
385
- prg.mult(queue, a.shape, None, a_buf, float(2), 3)
386
- raise AssertionError(
387
- "PyOpenCL should not accept bare Python types as arguments")
388
- except cl.LogicError:
389
- pass
390
-
391
- prg.mult(queue, a.shape, None, a_buf, np.float32(2), np.int32(3))
392
-
393
- a_result = np.empty_like(a)
394
- cl.enqueue_copy(queue, a_buf, a_result).wait()
395
-
396
- # }}}
397
-
398
-
399
- # {{{ test_image_2d
400
-
401
- def test_image_2d(ctx_factory):
402
- context = ctx_factory()
403
-
404
- device, = context.devices
405
-
406
- if not device.image_support:
407
- from pytest import skip
408
- skip("images not supported on %s" % device)
409
-
410
- if "Intel" in device.vendor and "31360.31426" in device.version:
411
- from pytest import skip
412
- skip("images crashy on %s" % device)
413
- _xfail_if_pocl(device.platform, None, "PoCL does not support CL_ADDRESS_CLAMP")
414
-
415
- prg = cl.Program(context, """
416
- __kernel void copy_image(
417
- __global float *dest,
418
- __read_only image2d_t src,
419
- sampler_t samp,
420
- int stride0)
421
- {
422
- int d0 = get_global_id(0);
423
- int d1 = get_global_id(1);
424
- /*
425
- const sampler_t samp =
426
- CLK_NORMALIZED_COORDS_FALSE
427
- | CLK_ADDRESS_CLAMP
428
- | CLK_FILTER_NEAREST;
429
- */
430
- dest[d0*stride0 + d1] = read_imagef(src, samp, (float2)(d1, d0)).x;
431
- }
432
- """).build()
433
-
434
- num_channels = 1
435
-
436
- rng = np.random.default_rng(seed=42)
437
- a = rng.random((1024, 512, num_channels), dtype=np.float32)
438
- if num_channels == 1:
439
- a = a[:, :, 0]
440
-
441
- queue = cl.CommandQueue(context)
442
- try:
443
- a_img = cl.image_from_array(context, a, num_channels)
444
- except cl.RuntimeError:
445
- import sys
446
- exc = sys.exc_info()[1]
447
- if exc.code == cl.status_code.IMAGE_FORMAT_NOT_SUPPORTED:
448
- from pytest import skip
449
- skip("required image format not supported on %s" % device.name)
450
- else:
451
- raise
452
-
453
- a_dest = cl.Buffer(context, cl.mem_flags.READ_WRITE, a.nbytes)
454
-
455
- samp = cl.Sampler(context, False,
456
- cl.addressing_mode.CLAMP,
457
- cl.filter_mode.NEAREST)
458
- prg.copy_image(queue, a.shape, None, a_dest, a_img, samp,
459
- np.int32(a.strides[0]/a.dtype.itemsize))
460
-
461
- a_result = np.empty_like(a)
462
- cl.enqueue_copy(queue, a_result, a_dest)
463
-
464
- good = la.norm(a_result - a) == 0
465
- if not good:
466
- if queue.device.type & cl.device_type.CPU:
467
- assert good, ("The image implementation on your CPU CL platform '%s' "
468
- "returned bad values. This is bad, but common."
469
- % queue.device.platform)
470
- else:
471
- assert good
472
-
473
- # }}}
474
-
475
-
476
- # {{{ test_image_3d
477
-
478
- def test_image_3d(ctx_factory):
479
- #test for image_from_array for 3d image of float2
480
- context = ctx_factory()
481
-
482
- device, = context.devices
483
-
484
- if not device.image_support:
485
- from pytest import skip
486
- skip("images not supported on %s" % device)
487
-
488
- if device.platform.vendor == "Intel(R) Corporation":
489
- from pytest import skip
490
- skip("images crashy on %s" % device)
491
- _xfail_if_pocl(device.platform, None, "PoCL does not support CL_ADDRESS_CLAMP")
492
-
493
- prg = cl.Program(context, """
494
- __kernel void copy_image_plane(
495
- __global float2 *dest,
496
- __read_only image3d_t src,
497
- sampler_t samp,
498
- int stride0,
499
- int stride1)
500
- {
501
- int d0 = get_global_id(0);
502
- int d1 = get_global_id(1);
503
- int d2 = get_global_id(2);
504
- /*
505
- const sampler_t samp =
506
- CLK_NORMALIZED_COORDS_FALSE
507
- | CLK_ADDRESS_CLAMP
508
- | CLK_FILTER_NEAREST;
509
- */
510
- dest[d0*stride0 + d1*stride1 + d2] = read_imagef(
511
- src, samp, (float4)(d2, d1, d0, 0)).xy;
512
- }
513
- """).build()
514
-
515
- num_channels = 2
516
- shape = (3, 4, 2)
517
-
518
- rng = np.random.default_rng(seed=42)
519
- a = rng.random(size=shape + (num_channels,), dtype=np.float32)
520
-
521
- queue = cl.CommandQueue(context)
522
- try:
523
- a_img = cl.image_from_array(context, a, num_channels)
524
- except cl.RuntimeError:
525
- import sys
526
- exc = sys.exc_info()[1]
527
- if exc.code == cl.status_code.IMAGE_FORMAT_NOT_SUPPORTED:
528
- from pytest import skip
529
- skip("required image format not supported on %s" % device.name)
530
- else:
531
- raise
532
-
533
- a_dest = cl.Buffer(context, cl.mem_flags.READ_WRITE, a.nbytes)
534
-
535
- samp = cl.Sampler(context, False,
536
- cl.addressing_mode.CLAMP,
537
- cl.filter_mode.NEAREST)
538
- prg.copy_image_plane(queue, shape, None, a_dest, a_img, samp,
539
- np.int32(a.strides[0]/a.itemsize/num_channels),
540
- np.int32(a.strides[1]/a.itemsize/num_channels),
541
- )
542
-
543
- a_result = np.empty_like(a)
544
- cl.enqueue_copy(queue, a_result, a_dest)
545
-
546
- good = la.norm(a_result - a) == 0
547
- if not good:
548
- if queue.device.type & cl.device_type.CPU:
549
- assert good, ("The image implementation on your CPU CL platform '%s' "
550
- "returned bad values. This is bad, but common."
551
- % queue.device.platform)
552
- else:
553
- assert good
554
-
555
- # }}}
556
-
557
-
558
- # {{{ test_copy_buffer
559
-
560
- def test_copy_buffer(ctx_factory):
561
- context = ctx_factory()
562
-
563
- queue = cl.CommandQueue(context)
564
- mf = cl.mem_flags
565
-
566
- rng = np.random.default_rng(seed=42)
567
- a = rng.random(50000, dtype=np.float32)
568
- b = np.empty_like(a)
569
-
570
- buf1 = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
571
- buf2 = cl.Buffer(context, mf.WRITE_ONLY, b.nbytes)
572
-
573
- cl.enqueue_copy(queue, buf2, buf1).wait()
574
- cl.enqueue_copy(queue, b, buf2).wait()
575
-
576
- assert la.norm(a - b) == 0
577
-
578
- # }}}
579
-
580
-
581
- # {{{ test_mempool_*
582
-
583
- def test_mempool(ctx_factory):
584
- from pyopencl.tools import ImmediateAllocator, MemoryPool
585
-
586
- context = ctx_factory()
587
- queue = cl.CommandQueue(context)
588
-
589
- pool = MemoryPool(ImmediateAllocator(queue))
590
- alloc_queue = []
591
-
592
- e0 = 12
593
-
594
- for e in range(e0-6, e0-4):
595
- for _i in range(100):
596
- alloc_queue.append(pool.allocate(1 << e))
597
- if len(alloc_queue) > 10:
598
- alloc_queue.pop(0)
599
- del alloc_queue
600
- pool.stop_holding()
601
-
602
-
603
- def test_mempool_2(ctx_factory):
604
- from random import randrange
605
-
606
- from pyopencl.tools import ImmediateAllocator, MemoryPool
607
-
608
- context = ctx_factory()
609
- queue = cl.CommandQueue(context)
610
-
611
- pool = MemoryPool(ImmediateAllocator(queue))
612
-
613
- for s in [randrange(1 << 31) >> randrange(32) for _ in range(2000)] + [2**30]:
614
- bin_nr = pool.bin_number(s)
615
- asize = pool.alloc_size(bin_nr)
616
-
617
- assert asize >= s, s
618
- assert pool.bin_number(asize) == bin_nr, s
619
- assert asize < asize*(1+1/8)
620
-
621
-
622
- def test_mempool_32bit_issues():
623
- import struct
624
- if struct.calcsize("@P") * 8 < 64:
625
- pytest.skip("only relevant on 64-bit systems")
626
-
627
- # https://github.com/inducer/pycuda/issues/282
628
- from pyopencl._cl import _TestMemoryPool
629
- pool = _TestMemoryPool()
630
-
631
- for i in [30, 31, 32, 33, 34]:
632
- for offs in range(-5, 5):
633
- pool.allocate(2**i + offs)
634
-
635
- # }}}
636
-
637
-
638
- # {{{ test_allocator
639
-
640
- @pytest.mark.parametrize("allocator_cls", [ImmediateAllocator, DeferredAllocator])
641
- def test_allocator(ctx_factory, allocator_cls):
642
- context = ctx_factory()
643
- queue = cl.CommandQueue(context)
644
-
645
- if allocator_cls is DeferredAllocator:
646
- allocator = allocator_cls(context)
647
- else:
648
- allocator = allocator_cls(queue)
649
-
650
- mem = allocator(15)
651
- mem2 = allocator(0)
652
-
653
- assert mem is not None
654
- assert mem2 is None
655
-
656
- # }}}
657
-
658
-
659
- # {{{ test_vector_args
660
-
661
- def test_vector_args(ctx_factory):
662
- context = ctx_factory()
663
- queue = cl.CommandQueue(context)
664
-
665
- prg = cl.Program(context, """
666
- __kernel void set_vec(float4 x, __global float4 *dest)
667
- { dest[get_global_id(0)] = x; }
668
- """).build()
669
-
670
- x = cltypes.make_float4(1, 2, 3, 4)
671
- dest = np.empty(50000, cltypes.float4)
672
- mf = cl.mem_flags
673
- dest_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=dest)
674
-
675
- prg.set_vec(queue, dest.shape, None, x, dest_buf)
676
-
677
- cl.enqueue_copy(queue, dest, dest_buf).wait()
678
-
679
- assert (dest == x).all()
680
-
681
- # }}}
682
-
683
-
684
- # {{{ test_header_dep_handling
685
- def test_header_dep_handling(ctx_factory):
686
- context = ctx_factory()
687
-
688
- from os.path import dirname, exists, join
689
- assert exists(join(dirname(__file__), "empty-header.h"))
690
-
691
- kernel_src = """
692
- #include <empty-header.h>
693
- kernel void zonk(global int *a)
694
- {
695
- *a = 5;
696
- }
697
- """
698
-
699
- cl.Program(context, kernel_src).build(["-I", dirname(__file__)])
700
- cl.Program(context, kernel_src).build(["-I", dirname(__file__)])
701
-
702
- # }}}
703
-
704
-
705
- # {{{ test_context_dep_memoize
706
-
707
- def test_context_dep_memoize(ctx_factory):
708
- context = ctx_factory()
709
-
710
- from pyopencl.tools import context_dependent_memoize
711
-
712
- counter = [0]
713
-
714
- @context_dependent_memoize
715
- def do_something(ctx):
716
- counter[0] += 1
717
-
718
- do_something(context)
719
- do_something(context)
720
-
721
- assert counter[0] == 1
722
-
723
- # }}}
724
-
725
-
726
- # {{{ test_can_build_and_run_binary
727
-
728
- def test_can_build_and_run_binary(ctx_factory):
729
- ctx = ctx_factory()
730
- queue = cl.CommandQueue(ctx)
731
-
732
- device = queue.device
733
-
734
- program = cl.Program(ctx, """
735
- __kernel void simple(__global float *in, __global float *out)
736
- {
737
- out[get_global_id(0)] = in[get_global_id(0)];
738
- }""")
739
- program.build()
740
- binary = program.get_info(cl.program_info.BINARIES)[0]
741
-
742
- foo = cl.Program(ctx, [device], [binary])
743
- foo.build()
744
-
745
- n = 256
746
- a_dev = cl.clrandom.rand(queue, n, np.float32)
747
- dest_dev = cl_array.empty_like(a_dev)
748
-
749
- foo.simple(queue, (n,), (16,), a_dev.data, dest_dev.data)
750
-
751
- # }}}
752
-
753
-
754
- # {{{ test_enqueue_barrier_marker
755
-
756
- def test_enqueue_barrier_marker(ctx_factory):
757
- ctx = ctx_factory()
758
- # Still relevant on PoCL 1.0RC1.
759
- _xfail_if_pocl(
760
- ctx.devices[0].platform, (1, 0), "PoCL crashes on enqueue_barrier")
761
-
762
- queue = cl.CommandQueue(ctx)
763
-
764
- if queue._get_cl_version() >= (1, 2) and cl.get_cl_header_version() <= (1, 1):
765
- pytest.skip("CL impl version >= 1.2, header version <= 1.1--cannot be sure "
766
- "that clEnqueueWaitForEvents is implemented")
767
-
768
- cl.enqueue_barrier(queue)
769
- evt1 = cl.enqueue_marker(queue)
770
- evt2 = cl.enqueue_marker(queue, wait_for=[evt1])
771
- cl.enqueue_barrier(queue, wait_for=[evt1, evt2])
772
-
773
- # }}}
774
-
775
-
776
- # {{{ test_wait_for_events
777
-
778
- def test_wait_for_events(ctx_factory):
779
- ctx = ctx_factory()
780
- queue = cl.CommandQueue(ctx)
781
- evt1 = cl.enqueue_marker(queue)
782
- evt2 = cl.enqueue_marker(queue)
783
- cl.wait_for_events([evt1, evt2])
784
-
785
- # }}}
786
-
787
-
788
- # {{{ test_unload_compiler
789
-
790
- def test_unload_compiler(platform):
791
- if (platform._get_cl_version() < (1, 2)
792
- or cl.get_cl_header_version() < (1, 2)):
793
- from pytest import skip
794
- skip("clUnloadPlatformCompiler is only available in OpenCL 1.2")
795
- _xfail_if_pocl(platform, (0, 13), "PoCL does not support unloading compiler")
796
- if platform.vendor == "Intel(R) Corporation":
797
- from pytest import skip
798
- skip("Intel proprietary driver does not support unloading compiler")
799
- cl.unload_platform_compiler(platform)
800
-
801
- # }}}
802
-
803
-
804
- # {{{ test_platform_get_devices
805
-
806
- def test_platform_get_devices(ctx_factory):
807
- ctx = ctx_factory()
808
- platform = ctx.devices[0].platform
809
-
810
- if platform.name == "Apple":
811
- pytest.xfail("Apple doesn't understand all the values we pass "
812
- "for dev_type")
813
-
814
- dev_types = [cl.device_type.ACCELERATOR, cl.device_type.ALL,
815
- cl.device_type.CPU, cl.device_type.DEFAULT, cl.device_type.GPU]
816
- if (platform._get_cl_version() >= (1, 2)
817
- and cl.get_cl_header_version() >= (1, 2)
818
- and not platform.name.lower().startswith("nvidia")):
819
- dev_types.append(cl.device_type.CUSTOM)
820
-
821
- for dev_type in dev_types:
822
- print(dev_type)
823
- devs = platform.get_devices(dev_type)
824
- if dev_type in (cl.device_type.DEFAULT,
825
- cl.device_type.ALL,
826
- getattr(cl.device_type, "CUSTOM", None)):
827
- continue
828
- for dev in devs:
829
- assert dev.type & dev_type == dev_type
830
-
831
- # }}}
832
-
833
-
834
- # {{{ test_user_event
835
-
836
- def test_user_event(ctx_factory):
837
- ctx = ctx_factory()
838
- if (ctx._get_cl_version() < (1, 1)
839
- and cl.get_cl_header_version() < (1, 1)):
840
- from pytest import skip
841
- skip("UserEvent is only available in OpenCL 1.1")
842
-
843
- # https://github.com/pocl/pocl/issues/201
844
- _xfail_if_pocl(ctx.devices[0].platform, (0, 13),
845
- "PoCL's user events don't work right")
846
-
847
- status = {}
848
-
849
- def event_waiter1(e, key):
850
- e.wait()
851
- status[key] = True
852
-
853
- def event_waiter2(e, key):
854
- cl.wait_for_events([e])
855
- status[key] = True
856
-
857
- from threading import Thread
858
- from time import sleep
859
- evt = cl.UserEvent(ctx)
860
- Thread(target=event_waiter1, args=(evt, 1)).start()
861
- sleep(.05)
862
- if status.get(1, False):
863
- raise RuntimeError("UserEvent triggered before set_status")
864
- evt.set_status(cl.command_execution_status.COMPLETE)
865
- sleep(.05)
866
- if not status.get(1, False):
867
- raise RuntimeError("UserEvent.wait timeout")
868
- assert evt.command_execution_status == cl.command_execution_status.COMPLETE
869
-
870
- evt = cl.UserEvent(ctx)
871
- Thread(target=event_waiter2, args=(evt, 2)).start()
872
- sleep(.05)
873
- if status.get(2, False):
874
- raise RuntimeError("UserEvent triggered before set_status")
875
- evt.set_status(cl.command_execution_status.COMPLETE)
876
- sleep(.05)
877
- if not status.get(2, False):
878
- raise RuntimeError("cl.wait_for_events timeout on UserEvent")
879
- assert evt.command_execution_status == cl.command_execution_status.COMPLETE
880
-
881
- # }}}
882
-
883
-
884
- # {{{ test_buffer_get_host_array
885
-
886
- def test_buffer_get_host_array(ctx_factory):
887
- if cl._PYPY:
888
- # FIXME
889
- pytest.xfail("Buffer.get_host_array not yet working on pypy")
890
-
891
- ctx = ctx_factory()
892
- mf = cl.mem_flags
893
-
894
- rng = np.random.default_rng(seed=42)
895
- host_buf = rng.random(25, dtype=np.float32)
896
- buf = cl.Buffer(ctx, mf.READ_WRITE | mf.USE_HOST_PTR, hostbuf=host_buf)
897
- host_buf2 = buf.get_host_array(25, np.float32)
898
- assert (host_buf == host_buf2).all()
899
- assert (host_buf.__array_interface__["data"][0]
900
- == host_buf.__array_interface__["data"][0])
901
- assert host_buf2.base is buf
902
-
903
- buf = cl.Buffer(ctx, mf.READ_WRITE | mf.ALLOC_HOST_PTR, size=100)
904
- try:
905
- host_buf2 = buf.get_host_array(25, np.float32)
906
- raise AssertionError("MemoryObject.get_host_array should not accept buffer "
907
- "without USE_HOST_PTR")
908
- except cl.LogicError:
909
- pass
910
-
911
- host_buf = rng.random(25, dtype=np.float32)
912
- buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=host_buf)
913
- try:
914
- host_buf2 = buf.get_host_array(25, np.float32)
915
- raise AssertionError("MemoryObject.get_host_array should not accept buffer "
916
- "without USE_HOST_PTR")
917
- except cl.LogicError:
918
- pass
919
-
920
- # }}}
921
-
922
-
923
- # {{{ test_program_valued_get_info
924
-
925
- def test_program_valued_get_info(ctx_factory):
926
- ctx = ctx_factory()
927
-
928
- prg = cl.Program(ctx, """
929
- __kernel void
930
- reverse(__global float *out)
931
- {
932
- out[get_global_id(0)] *= 2;
933
- }
934
- """).build()
935
-
936
- knl = prg.reverse
937
-
938
- assert knl.program == prg
939
- knl.program.binaries[0]
940
-
941
- # }}}
942
-
943
-
944
- # {{{ test_event_set_callback
945
-
946
- def test_event_set_callback(ctx_factory):
947
- import sys
948
- if sys.platform.startswith("win"):
949
- pytest.xfail("Event.set_callback not present on Windows")
950
-
951
- ctx = ctx_factory()
952
- queue = cl.CommandQueue(ctx)
953
-
954
- _xfail_if_pocl_gpu(queue.device, "event callbacks")
955
-
956
- if ctx._get_cl_version() < (1, 1):
957
- pytest.skip("OpenCL 1.1 or newer required for set_callback")
958
-
959
- rng = np.random.default_rng(seed=42)
960
- a_np = rng.random(50000, dtype=np.float32)
961
- b_np = rng.random(50000, dtype=np.float32)
962
-
963
- got_called = []
964
-
965
- def cb(status):
966
- got_called.append(status)
967
-
968
- mf = cl.mem_flags
969
- a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
970
- b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
971
-
972
- prg = cl.Program(ctx, """
973
- __kernel void sum(__global const float *a_g, __global const float *b_g,
974
- __global float *res_g) {
975
- int gid = get_global_id(0);
976
- res_g[gid] = a_g[gid] + b_g[gid];
977
- }
978
- """).build()
979
-
980
- res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
981
-
982
- uevt = cl.UserEvent(ctx)
983
-
984
- evt = prg.sum(queue, a_np.shape, None, a_g, b_g, res_g, wait_for=[uevt])
985
-
986
- evt.set_callback(cl.command_execution_status.COMPLETE, cb)
987
-
988
- uevt.set_status(cl.command_execution_status.COMPLETE)
989
-
990
- queue.finish()
991
-
992
- counter = 0
993
-
994
- # yuck
995
- while not got_called:
996
- from time import sleep
997
- sleep(0.01)
998
-
999
- # wait up to five seconds (?!)
1000
- counter += 1
1001
- if counter >= 500:
1002
- break
1003
-
1004
- assert got_called
1005
-
1006
- # }}}
1007
-
1008
-
1009
- # {{{ test_global_offset
1010
-
1011
- def test_global_offset(ctx_factory):
1012
- context = ctx_factory()
1013
- queue = cl.CommandQueue(context)
1014
-
1015
- _xfail_if_pocl_gpu(queue.device, "global offset")
1016
-
1017
- prg = cl.Program(context, """
1018
- __kernel void mult(__global float *a)
1019
- { a[get_global_id(0)] *= 2; }
1020
- """).build()
1021
-
1022
- n = 50
1023
-
1024
- rng = np.random.default_rng(seed=42)
1025
- a = rng.random(n, dtype=np.float32)
1026
-
1027
- queue = cl.CommandQueue(context)
1028
- mf = cl.mem_flags
1029
- a_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)
1030
-
1031
- step = 10
1032
- for ofs in range(0, n, step):
1033
- prg.mult(queue, (step,), None, a_buf, global_offset=(ofs,))
1034
-
1035
- a_2 = np.empty_like(a)
1036
- cl.enqueue_copy(queue, a_2, a_buf)
1037
-
1038
- assert (a_2 == 2*a).all()
1039
-
1040
- # }}}
1041
-
1042
-
1043
- # {{{ test_sub_buffers
1044
-
1045
- def test_sub_buffers(ctx_factory):
1046
- ctx = ctx_factory()
1047
- if (ctx._get_cl_version() < (1, 1)
1048
- or cl.get_cl_header_version() < (1, 1)):
1049
- from pytest import skip
1050
- skip("sub-buffers are only available in OpenCL 1.1")
1051
-
1052
- alignment = ctx.devices[0].mem_base_addr_align
1053
-
1054
- queue = cl.CommandQueue(ctx)
1055
-
1056
- n = 30000
1057
-
1058
- rng = np.random.default_rng(seed=42)
1059
- a = (rng.random(n) * 100).astype(np.uint8)
1060
-
1061
- mf = cl.mem_flags
1062
- a_buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)
1063
-
1064
- start = (5000 // alignment) * alignment
1065
- stop = start + 20 * alignment
1066
-
1067
- a_sub_ref = a[start:stop]
1068
-
1069
- a_sub = np.empty_like(a_sub_ref)
1070
- cl.enqueue_copy(queue, a_sub, a_buf[start:stop])
1071
-
1072
- assert np.array_equal(a_sub, a_sub_ref)
1073
-
1074
- # }}}
1075
-
1076
-
1077
- # {{{ test_spirv
1078
-
1079
- def test_spirv(ctx_factory):
1080
- ctx = ctx_factory()
1081
- queue = cl.CommandQueue(ctx)
1082
-
1083
- if (ctx._get_cl_version() < (2, 1)
1084
- or cl.get_cl_header_version() < (2, 1)):
1085
- pytest.skip("SPIR-V program creation only available "
1086
- "in OpenCL 2.1 and higher")
1087
-
1088
- if not queue.device.il_version:
1089
- pytest.skip("SPIR-V program creation not supported by device")
1090
-
1091
- n = 50000
1092
-
1093
- a_dev = cl.clrandom.rand(queue, n, np.float32)
1094
- b_dev = cl.clrandom.rand(queue, n, np.float32)
1095
- dest_dev = cl_array.empty_like(a_dev)
1096
-
1097
- from os.path import dirname, join
1098
- spv_filename = join(dirname(__file__),
1099
- "add-vectors-%d.spv" % queue.device.address_bits)
1100
-
1101
- with open(spv_filename, "rb") as spv_file:
1102
- spv = spv_file.read()
1103
-
1104
- prg = cl.Program(ctx, spv).build()
1105
- if (not prg.all_kernels()
1106
- and queue.device.platform.name.startswith("AMD Accelerated")):
1107
- pytest.skip("SPIR-V program creation on AMD did not result in any kernels")
1108
-
1109
- prg.sum(queue, a_dev.shape, None, a_dev.data, b_dev.data, dest_dev.data)
1110
-
1111
- assert la.norm((dest_dev - (a_dev+b_dev)).get()) < 1e-7
1112
-
1113
- # }}}
1114
-
1115
-
1116
- # {{{ test_coarse_grain_svm
1117
-
1118
- @pytest.mark.parametrize("use_opaque_style", [False, True])
1119
- def test_coarse_grain_svm(ctx_factory, use_opaque_style):
1120
- import sys
1121
- is_pypy = "__pypy__" in sys.builtin_module_names
1122
-
1123
- ctx = ctx_factory()
1124
- queue = cl.CommandQueue(ctx)
1125
-
1126
- dev = ctx.devices[0]
1127
-
1128
- from pytest import skip
1129
-
1130
- from pyopencl.characterize import has_coarse_grain_buffer_svm
1131
- if not has_coarse_grain_buffer_svm(queue.device):
1132
- skip("device does not support coarse-grain SVM")
1133
-
1134
- if ("AMD" in dev.platform.name
1135
- and dev.type & cl.device_type.CPU):
1136
- pytest.xfail("AMD CPU doesn't do coarse-grain SVM")
1137
- if ("AMD" in dev.platform.name
1138
- and dev.type & cl.device_type.GPU):
1139
- pytest.xfail("AMD GPU crashes on SVM unmap")
1140
- if (dev.platform.vendor == "The pocl project"
1141
- and dev.type & cl.device_type.GPU
1142
- and "k40" in dev.name.lower()):
1143
- pytest.xfail("Crashes on K40s via PoCL-CUDA")
1144
-
1145
- dtype = np.dtype(np.float32)
1146
- n = 3000
1147
- if use_opaque_style:
1148
- svm_ary = cl.SVMAllocation(ctx, n*dtype.itemsize, alignment=64,
1149
- flags=cl.svm_mem_flags.READ_WRITE)
1150
- else:
1151
- svm_ary = cl.SVM(cl.csvm_empty(ctx, (n,), dtype, alignment=64))
1152
- if not is_pypy:
1153
- # https://bitbucket.org/pypy/numpy/issues/52
1154
- assert isinstance(svm_ary.mem.base, cl.SVMAllocation)
1155
-
1156
- cl.enqueue_svm_memfill(queue, svm_ary, np.zeros((), dtype))
1157
-
1158
- with svm_ary.map_rw(queue) as ary:
1159
- if use_opaque_style:
1160
- ary = ary.view(dtype)
1161
- else:
1162
- assert ary is svm_ary.mem
1163
-
1164
- assert ary.nbytes == n * dtype.itemsize
1165
-
1166
- ary.fill(17)
1167
- orig_ary = ary.copy()
1168
-
1169
- prg = cl.Program(ctx, """
1170
- __kernel void twice(__global float *a_g)
1171
- {
1172
- a_g[get_global_id(0)] *= 2;
1173
- }
1174
- """).build()
1175
-
1176
- prg.twice(queue, (n,), None, svm_ary)
1177
-
1178
- if dev.platform.vendor == "The pocl project" \
1179
- and dev.type & cl.device_type.GPU:
1180
- # clCreateBuffer from SVM doesn't work yet on GPU pocl
1181
- prg.twice(queue, (n,), None, svm_ary)
1182
- else:
1183
- prg.twice(queue, (n,), None, svm_ary.as_buffer(ctx))
1184
-
1185
- with svm_ary.map_ro(queue) as ary:
1186
- if use_opaque_style:
1187
- ary = ary.view(dtype)
1188
- else:
1189
- assert ary is svm_ary.mem
1190
-
1191
- assert np.array_equal(orig_ary*4, ary)
1192
-
1193
- new_ary = np.empty_like(orig_ary)
1194
- new_ary.fill(-1)
1195
-
1196
- cl.enqueue_copy(queue, new_ary, svm_ary)
1197
- assert np.array_equal(orig_ary*4, new_ary)
1198
-
1199
- # {{{ https://github.com/inducer/pyopencl/issues/372
1200
-
1201
- buf_arr = cl.svm_empty(ctx, cl.svm_mem_flags.READ_ONLY, 10, np.int32)
1202
- out_arr = cl.svm_empty(ctx, cl.svm_mem_flags.READ_WRITE, 10, np.int32)
1203
-
1204
- svm_buf_arr = cl.SVM(buf_arr)
1205
- svm_out_arr = cl.SVM(out_arr)
1206
- with svm_buf_arr.map_rw(queue) as ary:
1207
- ary.fill(17)
1208
-
1209
- prg_ro = cl.Program(ctx, r"""
1210
- __kernel void twice_ro(__global int *out_g, __global int *in_g)
1211
- {
1212
- out_g[get_global_id(0)] = 2*in_g[get_global_id(0)];
1213
- }
1214
- """).build()
1215
-
1216
- prg_ro.twice_ro(queue, buf_arr.shape, None, svm_out_arr, svm_buf_arr)
1217
-
1218
- with svm_out_arr.map_ro(queue) as ary:
1219
- print(ary)
1220
-
1221
- # }}}
1222
-
1223
- # }}}
1224
-
1225
-
1226
- # {{{ test_fine_grain_svm
1227
-
1228
- def test_fine_grain_svm(ctx_factory):
1229
- import sys
1230
- is_pypy = "__pypy__" in sys.builtin_module_names
1231
-
1232
- ctx = ctx_factory()
1233
- queue = cl.CommandQueue(ctx)
1234
-
1235
- _xfail_if_pocl_gpu(queue.device, "GPU SVM")
1236
-
1237
- from pytest import skip
1238
-
1239
- from pyopencl.characterize import has_fine_grain_buffer_svm
1240
- if not has_fine_grain_buffer_svm(queue.device):
1241
- skip("device does not support fine-grain SVM")
1242
-
1243
- n = 3000
1244
- ary = cl.fsvm_empty(ctx, n, np.float32, alignment=64)
1245
-
1246
- if not is_pypy:
1247
- # https://bitbucket.org/pypy/numpy/issues/52
1248
- assert isinstance(ary.base, cl.SVMAllocation)
1249
-
1250
- ary.fill(17)
1251
- orig_ary = ary.copy()
1252
-
1253
- prg = cl.Program(ctx, """
1254
- __kernel void twice(__global float *a_g)
1255
- {
1256
- a_g[get_global_id(0)] *= 2;
1257
- }
1258
- """).build()
1259
-
1260
- prg.twice(queue, ary.shape, None, cl.SVM(ary))
1261
- queue.finish()
1262
-
1263
- print(ary)
1264
- assert np.array_equal(orig_ary*2, ary)
1265
-
1266
- # }}}
1267
-
1268
-
1269
- # {{{ test_map_dtype
1270
-
1271
- @pytest.mark.parametrize("dtype", [
1272
- np.uint,
1273
- cltypes.uint2, # type: ignore[attr-defined]
1274
- ])
1275
- def test_map_dtype(ctx_factory, dtype):
1276
- if cl._PYPY:
1277
- # FIXME
1278
- pytest.xfail("enqueue_map_buffer not yet working on pypy")
1279
-
1280
- ctx = ctx_factory()
1281
- queue = cl.CommandQueue(ctx)
1282
-
1283
- dt = np.dtype(dtype)
1284
-
1285
- b = pyopencl.Buffer(ctx,
1286
- pyopencl.mem_flags.READ_ONLY,
1287
- dt.itemsize)
1288
- array, ev = pyopencl.enqueue_map_buffer(queue, b, pyopencl.map_flags.WRITE, 0,
1289
- (1,), dt)
1290
- with array.base:
1291
- print(array.dtype)
1292
- assert array.dtype == dt
1293
-
1294
- # }}}
1295
-
1296
-
1297
- # {{{ test_compile_link
1298
-
1299
- def test_compile_link(ctx_factory):
1300
- ctx = ctx_factory()
1301
-
1302
- if ctx._get_cl_version() < (1, 2) or cl.get_cl_header_version() < (1, 2):
1303
- pytest.skip("Context and ICD loader must understand CL1.2 for compile/link")
1304
-
1305
- platform = ctx.devices[0].platform
1306
- if platform.name == "Apple":
1307
- pytest.skip("Apple doesn't like our compile/link test")
1308
-
1309
- # as of pocl 5.0
1310
- _xfail_if_pocl_gpu(ctx.devices[0], "compile/link")
1311
-
1312
- queue = cl.CommandQueue(ctx)
1313
- vsink_prg = cl.Program(ctx, """//CL//
1314
- void value_sink(float x)
1315
- {
1316
- }
1317
- """).compile()
1318
- pi_h__prg = cl.Program(ctx, """//CL//
1319
- inline float get_pi()
1320
- {
1321
- return 3.1415f;
1322
- }
1323
- """).compile()
1324
- main_prg = cl.Program(ctx, """//CL//
1325
- #include "pi.h"
1326
-
1327
- void value_sink(float x);
1328
-
1329
- __kernel void experiment()
1330
- {
1331
- value_sink(get_pi() + get_global_id(0));
1332
- }
1333
- """).compile(headers=[("pi.h", pi_h__prg)])
1334
- z = cl.link_program(ctx, [vsink_prg, main_prg], devices=ctx.devices)
1335
- z.experiment(queue, (128**2,), (128,))
1336
- queue.finish()
1337
-
1338
- # }}}
1339
-
1340
-
1341
- # {{{ test_copy_buffer_rect
1342
-
1343
- def test_copy_buffer_rect(ctx_factory):
1344
- ctx = ctx_factory()
1345
- queue = cl.CommandQueue(ctx)
1346
-
1347
- _xfail_if_pocl_gpu(queue.device, "rectangular copies")
1348
-
1349
- arr1 = cl_array.zeros(queue, (2, 3), "f")
1350
- arr2 = cl_array.zeros(queue, (4, 5), "f")
1351
- arr1.fill(1)
1352
- cl.enqueue_copy(
1353
- queue, arr2.data, arr1.data,
1354
- src_origin=(0, 0), dst_origin=(1, 1),
1355
- region=arr1.shape[::-1])
1356
-
1357
- # }}}
1358
-
1359
-
1360
- # {{{ test_threaded_nanny_events
1361
-
1362
- def test_threaded_nanny_events(ctx_factory):
1363
- # https://github.com/inducer/pyopencl/issues/296
1364
-
1365
- import gc
1366
- import threading
1367
-
1368
- def create_arrays_thread(n1=10, n2=20):
1369
- ctx = ctx_factory()
1370
- queue = cl.CommandQueue(ctx)
1371
- for _i1 in range(n2):
1372
- for _i in range(n1):
1373
- acl = cl.array.zeros(queue, 10, dtype=np.float32)
1374
- acl.get()
1375
- # Garbage collection triggers the error
1376
- print("collected ", str(gc.collect()))
1377
- print("stats ", gc.get_stats())
1378
-
1379
- t1 = threading.Thread(target=create_arrays_thread)
1380
- t2 = threading.Thread(target=create_arrays_thread)
1381
-
1382
- t1.start()
1383
- t2.start()
1384
-
1385
- t1.join()
1386
- t2.join()
1387
-
1388
- # }}}
1389
-
1390
-
1391
- # {{{ test_empty_ndrange
1392
-
1393
- @pytest.mark.parametrize("empty_shape", [(0,), (3, 0, 2)])
1394
- def test_empty_ndrange(ctx_factory, empty_shape):
1395
- ctx = ctx_factory()
1396
- queue = cl.CommandQueue(ctx)
1397
-
1398
- if ctx._get_cl_version() < (1, 2) or cl.get_cl_header_version() < (1, 2):
1399
- pytest.skip("OpenCL 1.2 required for empty NDRange suuport")
1400
-
1401
- a = cl_array.zeros(queue, empty_shape, dtype=np.float32)
1402
-
1403
- prg = cl.Program(ctx, """
1404
- __kernel void add_two(__global float *a_g)
1405
- {
1406
- a_g[get_global_id(0)] += 2;
1407
- }
1408
- """).build()
1409
-
1410
- prg.add_two(queue, a.shape, None, a.data, allow_empty_ndrange=True)
1411
-
1412
- # }}}
1413
-
1414
-
1415
- # {{{ test_command_queue_context_manager
1416
-
1417
- def test_command_queue_context_manager(ctx_factory):
1418
- ctx = ctx_factory()
1419
- with cl.CommandQueue(ctx) as queue:
1420
- q = queue
1421
-
1422
- with pytest.warns(cl.CommandQueueUsedAfterExit):
1423
- q.flush()
1424
-
1425
- # }}}
1426
-
1427
-
1428
- # {{{ test_capture_call
1429
-
1430
- def test_capture_call(ctx_factory):
1431
- ctx = ctx_factory()
1432
- queue = cl.CommandQueue(ctx)
1433
-
1434
- rng = np.random.default_rng()
1435
- a_np = rng.random(500, dtype=np.float32)
1436
- b_np = rng.random(500, dtype=np.float32)
1437
-
1438
- ctx = cl.create_some_context()
1439
- queue = cl.CommandQueue(ctx)
1440
-
1441
- mf = cl.mem_flags
1442
- a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
1443
- b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
1444
-
1445
- prg = cl.Program(ctx, """
1446
- __kernel void sum(
1447
- __global const float *a_g, __global const float *b_g, __global float *res_g)
1448
- {
1449
- int gid = get_global_id(0);
1450
- res_g[gid] = a_g[gid] + b_g[gid];
1451
- }
1452
- """).build()
1453
-
1454
- res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
1455
- from io import StringIO
1456
- sio = StringIO()
1457
- prg.sum.capture_call(sio, queue, a_np.shape, None, a_g, b_g, res_g)
1458
-
1459
- compile_dict = {}
1460
- exec(compile(sio.getvalue(), "captured.py", "exec"), compile_dict)
1461
- compile_dict["main"]()
1462
-
1463
- # }}}
1464
-
1465
-
1466
- # {{{ test_enqueue_copy_array
1467
-
1468
- def test_enqueue_copy_array(ctx_factory):
1469
- # https://github.com/inducer/pyopencl/issues/618
1470
- ctx = ctx_factory()
1471
- queue = cl.CommandQueue(ctx)
1472
-
1473
- if ctx._get_cl_version() < (1, 2) or cl.get_cl_header_version() < (1, 2):
1474
- pytest.skip("requires CL 1.2")
1475
-
1476
- if not queue.device.image_support:
1477
- pytest.skip("device has no image support")
1478
-
1479
- image_format = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT)
1480
- flags = cl.mem_flags.READ_ONLY
1481
- image = np.ascontiguousarray(np.zeros((128, 128, 4), np.float32))
1482
- image_cl = cl.Image(ctx, flags, image_format,
1483
- shape=(image.shape[1], image.shape[0], 1), is_array=True)
1484
- cl.enqueue_copy(queue, dest=image, src=image_cl,
1485
- origin=(0, 0, 0), region=(image.shape[1], image.shape[0], 1))
1486
-
1487
-
1488
- def test_enqueue_copy_array_2(ctx_factory):
1489
- # https://github.com/inducer/pyopencl/issues/618
1490
- ctx = ctx_factory()
1491
- queue = cl.CommandQueue(ctx)
1492
-
1493
- if ctx._get_cl_version() < (1, 2) or cl.get_cl_header_version() < (1, 2):
1494
- pytest.skip("requires CL 1.2")
1495
-
1496
- if not queue.device.image_support:
1497
- pytest.skip("device has no image support")
1498
-
1499
- image_format = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT)
1500
- image = np.ascontiguousarray(np.zeros((128, 128, 4), np.float32))
1501
- image_shape = (image.shape[1], image.shape[0])
1502
- array_shape = (*image_shape, 1)
1503
- cl.Image(ctx, cl.mem_flags.READ_ONLY,
1504
- image_format, shape=image_shape)
1505
- image_array_cl = cl.Image(ctx, cl.mem_flags.READ_ONLY,
1506
- image_format, shape=array_shape, is_array=True)
1507
- image2_array_cl = cl.Image(ctx, cl.mem_flags.WRITE_ONLY,
1508
- image_format, shape=array_shape, is_array=True)
1509
- buffer_cl = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, size=image.nbytes)
1510
-
1511
- cl._cl._enqueue_copy_image(
1512
- queue, src=image_array_cl, dest=image2_array_cl, src_origin=(0, 0, 0),
1513
- dest_origin=(0, 0, 0), region=array_shape)
1514
- cl._cl._enqueue_copy_image_to_buffer(
1515
- queue, src=image_array_cl, dest=buffer_cl, offset=0, origin=(0, 0, 0),
1516
- region=array_shape)
1517
-
1518
- # }}}
1519
-
1520
-
1521
- def test_zero_size_svm_allocations(ctx_factory):
1522
- ctx = ctx_factory()
1523
-
1524
- from pytest import skip
1525
-
1526
- from pyopencl.characterize import has_coarse_grain_buffer_svm
1527
- if not has_coarse_grain_buffer_svm(ctx.devices[0]):
1528
- skip("device does not support coarse-grain SVM")
1529
-
1530
- # Go back to svm_empty once
1531
- # https://github.com/numpy/numpy/issues/26366 is solved.
1532
- # zero_sized_svm = cl.svm_empty(ctx, cl.svm_mem_flags.READ_WRITE, 0, np.float64)
1533
- zero_sized_svm = cl.SVMAllocation(ctx, 0, 0, cl.svm_mem_flags.READ_WRITE)
1534
- zero_sized_svm.release()
1535
-
1536
- from pyopencl.tools import SVMAllocator, SVMPool
1537
- svm_alloc = SVMAllocator(ctx)
1538
- zero_sized_svm = svm_alloc(0)
1539
- zero_sized_svm.release()
1540
-
1541
- svm_pool = SVMPool(svm_alloc)
1542
- zero_sized_svm = svm_pool(0)
1543
- zero_sized_svm.release()
1544
-
1545
-
1546
- def test_buffer_release(ctx_factory):
1547
- ctx = ctx_factory()
1548
- queue = cl.CommandQueue(ctx)
1549
-
1550
- mem_pool = cl.tools.MemoryPool(cl.tools.ImmediateAllocator(queue))
1551
-
1552
- b = mem_pool.allocate(1000)
1553
- print(type(b))
1554
- b.release()
1555
-
1556
-
1557
- if __name__ == "__main__":
1558
- import sys
1559
- if len(sys.argv) > 1:
1560
- exec(sys.argv[1])
1561
- else:
1562
- from pytest import main
1563
- main([__file__])
1564
-
1565
- # vim: foldmethod=marker