pyopencl 2024.2__cp312-cp312-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (122) hide show
  1. pyopencl/__init__.py +2393 -0
  2. pyopencl/_cl.cpython-312-darwin.so +0 -0
  3. pyopencl/_cluda.py +54 -0
  4. pyopencl/_mymako.py +14 -0
  5. pyopencl/algorithm.py +1444 -0
  6. pyopencl/array.py +3427 -0
  7. pyopencl/bitonic_sort.py +238 -0
  8. pyopencl/bitonic_sort_templates.py +594 -0
  9. pyopencl/cache.py +534 -0
  10. pyopencl/capture_call.py +176 -0
  11. pyopencl/characterize/__init__.py +433 -0
  12. pyopencl/characterize/performance.py +237 -0
  13. pyopencl/cl/pyopencl-airy.cl +324 -0
  14. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  15. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  16. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  17. pyopencl/cl/pyopencl-complex.h +303 -0
  18. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  19. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  20. pyopencl/cl/pyopencl-random123/array.h +325 -0
  21. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  22. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  23. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  24. pyopencl/clmath.py +280 -0
  25. pyopencl/clrandom.py +408 -0
  26. pyopencl/cltypes.py +137 -0
  27. pyopencl/compyte/__init__.py +0 -0
  28. pyopencl/compyte/array.py +214 -0
  29. pyopencl/compyte/dtypes.py +290 -0
  30. pyopencl/compyte/ndarray/__init__.py +0 -0
  31. pyopencl/compyte/ndarray/gen_elemwise.py +1907 -0
  32. pyopencl/compyte/ndarray/gen_reduction.py +1511 -0
  33. pyopencl/compyte/ndarray/setup_opencl.py +101 -0
  34. pyopencl/compyte/ndarray/test_gpu_elemwise.py +411 -0
  35. pyopencl/compyte/ndarray/test_gpu_ndarray.py +487 -0
  36. pyopencl/elementwise.py +1164 -0
  37. pyopencl/invoker.py +418 -0
  38. pyopencl/ipython_ext.py +68 -0
  39. pyopencl/reduction.py +780 -0
  40. pyopencl/scan.py +1898 -0
  41. pyopencl/tools.py +1513 -0
  42. pyopencl/version.py +3 -0
  43. pyopencl-2024.2.data/data/CITATION.cff +74 -0
  44. pyopencl-2024.2.data/data/LICENSE +282 -0
  45. pyopencl-2024.2.data/data/Makefile.in +21 -0
  46. pyopencl-2024.2.data/data/README.rst +70 -0
  47. pyopencl-2024.2.data/data/README_SETUP.txt +34 -0
  48. pyopencl-2024.2.data/data/aksetup_helper.py +1013 -0
  49. pyopencl-2024.2.data/data/configure.py +6 -0
  50. pyopencl-2024.2.data/data/contrib/cldis.py +91 -0
  51. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/README +29 -0
  52. pyopencl-2024.2.data/data/contrib/fortran-to-opencl/translate.py +1441 -0
  53. pyopencl-2024.2.data/data/contrib/pyopencl.vim +84 -0
  54. pyopencl-2024.2.data/data/doc/Makefile +23 -0
  55. pyopencl-2024.2.data/data/doc/algorithm.rst +214 -0
  56. pyopencl-2024.2.data/data/doc/array.rst +305 -0
  57. pyopencl-2024.2.data/data/doc/conf.py +26 -0
  58. pyopencl-2024.2.data/data/doc/howto.rst +105 -0
  59. pyopencl-2024.2.data/data/doc/index.rst +137 -0
  60. pyopencl-2024.2.data/data/doc/make_constants.py +561 -0
  61. pyopencl-2024.2.data/data/doc/misc.rst +885 -0
  62. pyopencl-2024.2.data/data/doc/runtime.rst +51 -0
  63. pyopencl-2024.2.data/data/doc/runtime_const.rst +30 -0
  64. pyopencl-2024.2.data/data/doc/runtime_gl.rst +78 -0
  65. pyopencl-2024.2.data/data/doc/runtime_memory.rst +527 -0
  66. pyopencl-2024.2.data/data/doc/runtime_platform.rst +184 -0
  67. pyopencl-2024.2.data/data/doc/runtime_program.rst +364 -0
  68. pyopencl-2024.2.data/data/doc/runtime_queue.rst +182 -0
  69. pyopencl-2024.2.data/data/doc/subst.rst +36 -0
  70. pyopencl-2024.2.data/data/doc/tools.rst +4 -0
  71. pyopencl-2024.2.data/data/doc/types.rst +42 -0
  72. pyopencl-2024.2.data/data/examples/black-hole-accretion.py +2227 -0
  73. pyopencl-2024.2.data/data/examples/demo-struct-reduce.py +75 -0
  74. pyopencl-2024.2.data/data/examples/demo.py +39 -0
  75. pyopencl-2024.2.data/data/examples/demo_array.py +32 -0
  76. pyopencl-2024.2.data/data/examples/demo_array_svm.py +37 -0
  77. pyopencl-2024.2.data/data/examples/demo_elementwise.py +34 -0
  78. pyopencl-2024.2.data/data/examples/demo_elementwise_complex.py +53 -0
  79. pyopencl-2024.2.data/data/examples/demo_mandelbrot.py +183 -0
  80. pyopencl-2024.2.data/data/examples/demo_meta_codepy.py +56 -0
  81. pyopencl-2024.2.data/data/examples/demo_meta_template.py +55 -0
  82. pyopencl-2024.2.data/data/examples/dump-performance.py +38 -0
  83. pyopencl-2024.2.data/data/examples/dump-properties.py +86 -0
  84. pyopencl-2024.2.data/data/examples/gl_interop_demo.py +84 -0
  85. pyopencl-2024.2.data/data/examples/gl_particle_animation.py +218 -0
  86. pyopencl-2024.2.data/data/examples/ipython-demo.ipynb +203 -0
  87. pyopencl-2024.2.data/data/examples/median-filter.py +99 -0
  88. pyopencl-2024.2.data/data/examples/n-body.py +1070 -0
  89. pyopencl-2024.2.data/data/examples/narray.py +37 -0
  90. pyopencl-2024.2.data/data/examples/noisyImage.jpg +0 -0
  91. pyopencl-2024.2.data/data/examples/pi-monte-carlo.py +1166 -0
  92. pyopencl-2024.2.data/data/examples/svm.py +82 -0
  93. pyopencl-2024.2.data/data/examples/transpose.py +229 -0
  94. pyopencl-2024.2.data/data/pytest.ini +3 -0
  95. pyopencl-2024.2.data/data/src/bitlog.cpp +51 -0
  96. pyopencl-2024.2.data/data/src/bitlog.hpp +83 -0
  97. pyopencl-2024.2.data/data/src/clinfo_ext.h +134 -0
  98. pyopencl-2024.2.data/data/src/mempool.hpp +444 -0
  99. pyopencl-2024.2.data/data/src/pyopencl_ext.h +77 -0
  100. pyopencl-2024.2.data/data/src/tools.hpp +90 -0
  101. pyopencl-2024.2.data/data/src/wrap_cl.cpp +61 -0
  102. pyopencl-2024.2.data/data/src/wrap_cl.hpp +5853 -0
  103. pyopencl-2024.2.data/data/src/wrap_cl_part_1.cpp +369 -0
  104. pyopencl-2024.2.data/data/src/wrap_cl_part_2.cpp +702 -0
  105. pyopencl-2024.2.data/data/src/wrap_constants.cpp +1274 -0
  106. pyopencl-2024.2.data/data/src/wrap_helpers.hpp +213 -0
  107. pyopencl-2024.2.data/data/src/wrap_mempool.cpp +731 -0
  108. pyopencl-2024.2.data/data/test/add-vectors-32.spv +0 -0
  109. pyopencl-2024.2.data/data/test/add-vectors-64.spv +0 -0
  110. pyopencl-2024.2.data/data/test/empty-header.h +1 -0
  111. pyopencl-2024.2.data/data/test/test_algorithm.py +1180 -0
  112. pyopencl-2024.2.data/data/test/test_array.py +2392 -0
  113. pyopencl-2024.2.data/data/test/test_arrays_in_structs.py +100 -0
  114. pyopencl-2024.2.data/data/test/test_clmath.py +529 -0
  115. pyopencl-2024.2.data/data/test/test_clrandom.py +75 -0
  116. pyopencl-2024.2.data/data/test/test_enqueue_copy.py +271 -0
  117. pyopencl-2024.2.data/data/test/test_wrapper.py +1554 -0
  118. pyopencl-2024.2.dist-info/LICENSE +282 -0
  119. pyopencl-2024.2.dist-info/METADATA +105 -0
  120. pyopencl-2024.2.dist-info/RECORD +122 -0
  121. pyopencl-2024.2.dist-info/WHEEL +5 -0
  122. pyopencl-2024.2.dist-info/top_level.txt +1 -0
pyopencl/tools.py ADDED
@@ -0,0 +1,1513 @@
1
+ r"""
2
+ .. _memory-pools:
3
+
4
+ Memory Pools
5
+ ------------
6
+
7
+ Memory allocation (e.g. in the form of the :func:`pyopencl.Buffer` constructor)
8
+ can be expensive if used frequently. For example, code based on
9
+ :class:`pyopencl.array.Array` can easily run into this issue because a fresh
10
+ memory area is allocated for each intermediate result. Memory pools are a
11
+ remedy for this problem based on the observation that often many of the block
12
+ allocations are of the same sizes as previously used ones.
13
+
14
+ Then, instead of fully returning the memory to the system and incurring the
15
+ associated reallocation overhead, the pool holds on to the memory and uses it
16
+ to satisfy future allocations of similarly-sized blocks. The pool reacts
17
+ appropriately to out-of-memory conditions as long as all memory allocations
18
+ are made through it. Allocations performed from outside of the pool may run
19
+ into spurious out-of-memory conditions due to the pool owning much or all of
20
+ the available memory.
21
+
22
+ There are two flavors of allocators and memory pools:
23
+
24
+ - :ref:`buf-mempool`
25
+ - :ref:`svm-mempool`
26
+
27
+ Using :class:`pyopencl.array.Array`\ s can be used with memory pools in a
28
+ straightforward manner::
29
+
30
+ mem_pool = pyopencl.tools.MemoryPool(pyopencl.tools.ImmediateAllocator(queue))
31
+ a_dev = cl_array.arange(queue, 2000, dtype=np.float32, allocator=mem_pool)
32
+
33
+ Likewise, SVM-based allocators are directly usable with
34
+ :class:`pyopencl.array.Array`.
35
+
36
+ .. _buf-mempool:
37
+
38
+ :class:`~pyopencl.Buffer`-based Allocators and Memory Pools
39
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
40
+
41
+ .. autoclass:: PooledBuffer
42
+
43
+ .. autoclass:: AllocatorBase
44
+
45
+ .. autoclass:: DeferredAllocator
46
+
47
+ .. autoclass:: ImmediateAllocator
48
+
49
+ .. autoclass:: MemoryPool
50
+
51
+ .. _svm-mempool:
52
+
53
+ :ref:`SVM <svm>`-Based Allocators and Memory Pools
54
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
55
+
56
+ SVM functionality requires OpenCL 2.0.
57
+
58
+ .. autoclass:: PooledSVM
59
+
60
+ .. autoclass:: SVMAllocator
61
+
62
+ .. autoclass:: SVMPool
63
+
64
+ CL-Object-dependent Caching
65
+ ---------------------------
66
+
67
+ .. autofunction:: first_arg_dependent_memoize
68
+ .. autofunction:: clear_first_arg_caches
69
+
70
+ Testing
71
+ -------
72
+
73
+ .. autofunction:: pytest_generate_tests_for_pyopencl
74
+
75
+ Argument Types
76
+ --------------
77
+
78
+ .. autoclass:: Argument
79
+ .. autoclass:: DtypedArgument
80
+
81
+ .. autoclass:: VectorArg
82
+ .. autoclass:: ScalarArg
83
+ .. autoclass:: OtherArg
84
+
85
+ .. autofunction:: parse_arg_list
86
+
87
+ Device Characterization
88
+ -----------------------
89
+
90
+ .. automodule:: pyopencl.characterize
91
+ :members:
92
+
93
+ Type aliases
94
+ ------------
95
+
96
+ .. currentmodule:: pyopencl._cl
97
+
98
+ .. class:: AllocatorBase
99
+
100
+ See :class:`pyopencl.tools.AllocatorBase`.
101
+ """
102
+
103
+
104
+ __copyright__ = "Copyright (C) 2010 Andreas Kloeckner"
105
+
106
+ __license__ = """
107
+ Permission is hereby granted, free of charge, to any person
108
+ obtaining a copy of this software and associated documentation
109
+ files (the "Software"), to deal in the Software without
110
+ restriction, including without limitation the rights to use,
111
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
112
+ copies of the Software, and to permit persons to whom the
113
+ Software is furnished to do so, subject to the following
114
+ conditions:
115
+
116
+ The above copyright notice and this permission notice shall be
117
+ included in all copies or substantial portions of the Software.
118
+
119
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
120
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
121
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
122
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
123
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
124
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
125
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
126
+ OTHER DEALINGS IN THE SOFTWARE.
127
+ """
128
+
129
+ import re
130
+ from abc import ABC, abstractmethod
131
+ from sys import intern
132
+ from typing import Any, List, Optional, Union
133
+
134
+ import numpy as np
135
+ from pytools import memoize, memoize_method
136
+ from pytools.persistent_dict import KeyBuilder as KeyBuilderBase
137
+
138
+ from pyopencl._cl import bitlog2, get_cl_header_version # noqa: F401
139
+ from pyopencl.compyte.dtypes import TypeNameNotKnown # noqa: F401
140
+ from pyopencl.compyte.dtypes import ( # noqa: F401
141
+ dtype_to_ctype, get_or_register_dtype, register_dtype)
142
+
143
+
144
+ # Do not add a pyopencl import here: This will add an import cycle.
145
+
146
+
147
+ def _register_types():
148
+ from pyopencl.compyte.dtypes import (
149
+ TYPE_REGISTRY, fill_registry_with_opencl_c_types)
150
+
151
+ fill_registry_with_opencl_c_types(TYPE_REGISTRY)
152
+
153
+ get_or_register_dtype("cfloat_t", np.complex64)
154
+ get_or_register_dtype("cdouble_t", np.complex128)
155
+
156
+
157
+ _register_types()
158
+
159
+
160
+ # {{{ imported names
161
+
162
+ from pyopencl._cl import ( # noqa: F401
163
+ AllocatorBase, DeferredAllocator, ImmediateAllocator, MemoryPool, PooledBuffer)
164
+
165
+
166
+ if get_cl_header_version() >= (2, 0):
167
+ from pyopencl._cl import PooledSVM, SVMAllocator, SVMPool # noqa: F401
168
+
169
+ # }}}
170
+
171
+
172
+ # {{{ monkeypatch docstrings into imported interfaces
173
+
174
+ _MEMPOOL_IFACE_DOCS = """
175
+ .. note::
176
+
177
+ The current implementation of the memory pool will retain allocated
178
+ memory after it is returned by the application and keep it in a bin
179
+ identified by the leading *leading_bits_in_bin_id* bits of the
180
+ allocation size. To ensure that allocations within each bin are
181
+ interchangeable, allocation sizes are rounded up to the largest size
182
+ that shares the leading bits of the requested allocation size.
183
+
184
+ The current default value of *leading_bits_in_bin_id* is
185
+ four, but this may change in future versions and is not
186
+ guaranteed.
187
+
188
+ *leading_bits_in_bin_id* must be passed by keyword,
189
+ and its role is purely advisory. It is not guaranteed
190
+ that future versions of the pool will use the
191
+ same allocation scheme and/or honor *leading_bits_in_bin_id*.
192
+
193
+ .. attribute:: held_blocks
194
+
195
+ The number of unused blocks being held by this pool.
196
+
197
+ .. attribute:: active_blocks
198
+
199
+ The number of blocks in active use that have been allocated
200
+ through this pool.
201
+
202
+ .. attribute:: managed_bytes
203
+
204
+ "Managed" memory is "active" and "held" memory.
205
+
206
+ .. versionadded:: 2021.1.2
207
+
208
+ .. attribute:: active_bytes
209
+
210
+ "Active" bytes are bytes under the control of the application.
211
+ This may be smaller than the actual allocated size reflected
212
+ in :attr:`managed_bytes`.
213
+
214
+ .. versionadded:: 2021.1.2
215
+
216
+
217
+ .. method:: free_held
218
+
219
+ Free all unused memory that the pool is currently holding.
220
+
221
+ .. method:: stop_holding
222
+
223
+ Instruct the memory to start immediately freeing memory returned
224
+ to it, instead of holding it for future allocations.
225
+ Implicitly calls :meth:`free_held`.
226
+ This is useful as a cleanup action when a memory pool falls out
227
+ of use.
228
+ """
229
+
230
+
231
+ def _monkeypatch_docstrings():
232
+ from pytools.codegen import remove_common_indentation
233
+
234
+ PooledBuffer.__doc__ = """
235
+ An object representing a :class:`MemoryPool`-based allocation of
236
+ :class:`~pyopencl.Buffer`-style device memory. Analogous to
237
+ :class:`~pyopencl.Buffer`, however once this object is deleted, its
238
+ associated device memory is returned to the pool.
239
+
240
+ Is a :class:`pyopencl.MemoryObject`.
241
+ """
242
+
243
+ AllocatorBase.__doc__ = """
244
+ An interface implemented by various memory allocation functions
245
+ in :mod:`pyopencl`.
246
+
247
+ .. automethod:: __call__
248
+
249
+ Allocate and return a :class:`pyopencl.Buffer` of the given *size*.
250
+ """
251
+
252
+ # {{{ DeferredAllocator
253
+
254
+ DeferredAllocator.__doc__ = """
255
+ *mem_flags* takes its values from :class:`pyopencl.mem_flags` and corresponds
256
+ to the *flags* argument of :class:`pyopencl.Buffer`. DeferredAllocator
257
+ has the same semantics as regular OpenCL buffer allocation, i.e. it may
258
+ promise memory to be available that may (in any call to a buffer-using
259
+ CL function) turn out to not exist later on. (Allocations in CL are
260
+ bound to contexts, not devices, and memory availability depends on which
261
+ device the buffer is used with.)
262
+
263
+ Implements :class:`AllocatorBase`.
264
+
265
+ .. versionchanged :: 2013.1
266
+
267
+ ``CLAllocator`` was deprecated and replaced
268
+ by :class:`DeferredAllocator`.
269
+
270
+ .. method:: __init__(context, mem_flags=pyopencl.mem_flags.READ_WRITE)
271
+
272
+ .. automethod:: __call__
273
+
274
+ Allocate a :class:`pyopencl.Buffer` of the given *size*.
275
+
276
+ .. versionchanged :: 2020.2
277
+
278
+ The allocator will succeed even for allocations of size zero,
279
+ returning *None*.
280
+ """
281
+
282
+ # }}}
283
+
284
+ # {{{ ImmediateAllocator
285
+
286
+ ImmediateAllocator.__doc__ = """
287
+ *mem_flags* takes its values from :class:`pyopencl.mem_flags` and corresponds
288
+ to the *flags* argument of :class:`pyopencl.Buffer`.
289
+ :class:`ImmediateAllocator` will attempt to ensure at allocation time that
290
+ allocated memory is actually available. If no memory is available, an
291
+ out-of-memory error is reported at allocation time.
292
+
293
+ Implements :class:`AllocatorBase`.
294
+
295
+ .. versionadded:: 2013.1
296
+
297
+ .. method:: __init__(queue, mem_flags=pyopencl.mem_flags.READ_WRITE)
298
+
299
+ .. automethod:: __call__
300
+
301
+ Allocate a :class:`pyopencl.Buffer` of the given *size*.
302
+
303
+ .. versionchanged :: 2020.2
304
+
305
+ The allocator will succeed even for allocations of size zero,
306
+ returning *None*.
307
+ """
308
+
309
+ # }}}
310
+
311
+ # {{{ MemoryPool
312
+
313
+ MemoryPool.__doc__ = remove_common_indentation("""
314
+ A memory pool for OpenCL device memory in :class:`pyopencl.Buffer` form.
315
+ *allocator* must be an instance of one of the above classes, and should be
316
+ an :class:`ImmediateAllocator`. The memory pool assumes that allocation
317
+ failures are reported by the allocator immediately, and not in the
318
+ OpenCL-typical deferred manner.
319
+
320
+ Implements :class:`AllocatorBase`.
321
+
322
+ .. versionchanged:: 2019.1
323
+
324
+ Current bin allocation behavior documented, *leading_bits_in_bin_id*
325
+ added.
326
+
327
+ .. automethod:: __init__
328
+
329
+ .. automethod:: allocate
330
+
331
+ Return a :class:`PooledBuffer` of the given *size*.
332
+
333
+ .. automethod:: __call__
334
+
335
+ Synonym for :meth:`allocate` to match :class:`AllocatorBase`.
336
+
337
+ .. versionadded:: 2011.2
338
+ """) + _MEMPOOL_IFACE_DOCS
339
+
340
+ # }}}
341
+
342
+
343
+ _monkeypatch_docstrings()
344
+
345
+
346
+ def _monkeypatch_svm_docstrings():
347
+ from pytools.codegen import remove_common_indentation
348
+
349
+ # {{{ PooledSVM
350
+
351
+ PooledSVM.__doc__ = """
352
+ An object representing a :class:`SVMPool`-based allocation of
353
+ :ref:`svm`. Analogous to :class:`~pyopencl.SVMAllocation`, however once
354
+ this object is deleted, its associated device memory is returned to the
355
+ pool from which it came.
356
+
357
+ .. versionadded:: 2022.2
358
+
359
+ .. note::
360
+
361
+ If the :class:`SVMAllocator` for the :class:`SVMPool` that allocated an
362
+ object of this type is associated with an (in-order)
363
+ :class:`~pyopencl.CommandQueue`, sufficient synchronization is provided
364
+ to ensure operations enqueued before deallocation complete before
365
+ operations from a different use (possibly in a different queue) are
366
+ permitted to start. This applies when :class:`release` is called and
367
+ also when the object is freed automatically by the garbage collector.
368
+
369
+ Is a :class:`pyopencl.SVMPointer`.
370
+
371
+ Supports structural equality and hashing.
372
+
373
+ .. automethod:: release
374
+
375
+ Return the held memory to the pool. See the note about synchronization
376
+ behavior during deallocation above.
377
+
378
+ .. automethod:: enqueue_release
379
+
380
+ Synonymous to :meth:`release`, for consistency with
381
+ :class:`~pyopencl.SVMAllocation`. Note that, unlike
382
+ :meth:`pyopencl.SVMAllocation.enqueue_release`, specifying a queue
383
+ or events to be waited for is not supported.
384
+
385
+ .. automethod:: bind_to_queue
386
+
387
+ Analogous to :meth:`pyopencl.SVMAllocation.bind_to_queue`.
388
+
389
+ .. automethod:: unbind_from_queue
390
+
391
+ Analogous to :meth:`pyopencl.SVMAllocation.unbind_from_queue`.
392
+ """
393
+
394
+ # }}}
395
+
396
+ # {{{ SVMAllocator
397
+
398
+ SVMAllocator.__doc__ = """
399
+ .. versionadded:: 2022.2
400
+
401
+ .. automethod:: __init__
402
+
403
+ :arg flags: See :class:`~pyopencl.svm_mem_flags`.
404
+ :arg queue: If not specified, allocations will be freed
405
+ eagerly, irrespective of whether pending/enqueued operations
406
+ are still using the memory.
407
+
408
+ If specified, deallocation of memory will be enqueued
409
+ with the given queue, and will only be performed
410
+ after previously-enqueue operations in the queue have
411
+ completed.
412
+
413
+ It is an error to specify an out-of-order queue.
414
+
415
+ .. warning::
416
+
417
+ Not specifying a queue will typically lead to undesired
418
+ behavior, including crashes and memory corruption.
419
+ See the warning in :ref:`svm`.
420
+
421
+ .. automethod:: __call__
422
+
423
+ Return a :class:`~pyopencl.SVMAllocation` of the given *size*.
424
+ """
425
+
426
+ # }}}
427
+
428
+ # {{{ SVMPool
429
+
430
+ SVMPool.__doc__ = remove_common_indentation("""
431
+ A memory pool for OpenCL device memory in :ref:`SVM <svm>` form.
432
+ *allocator* must be an instance of :class:`SVMAllocator`.
433
+
434
+ .. versionadded:: 2022.2
435
+
436
+ .. automethod:: __init__
437
+ .. automethod:: __call__
438
+
439
+ Return a :class:`PooledSVM` of the given *size*.
440
+ """) + _MEMPOOL_IFACE_DOCS
441
+
442
+ # }}}
443
+
444
+
445
+ if get_cl_header_version() >= (2, 0):
446
+ _monkeypatch_svm_docstrings()
447
+
448
+ # }}}
449
+
450
+
451
+ # {{{ first-arg caches
452
+
453
+ _first_arg_dependent_caches = []
454
+
455
+
456
+ def first_arg_dependent_memoize(func):
457
+ def wrapper(cl_object, *args, **kwargs):
458
+ """Provides memoization for a function. Typically used to cache
459
+ things that get created inside a :class:`pyopencl.Context`, e.g. programs
460
+ and kernels. Assumes that the first argument of the decorated function is
461
+ an OpenCL object that might go away, such as a :class:`pyopencl.Context` or
462
+ a :class:`pyopencl.CommandQueue`, and based on which we might want to clear
463
+ the cache.
464
+
465
+ .. versionadded:: 2011.2
466
+ """
467
+ if kwargs:
468
+ cache_key = (args, frozenset(kwargs.items()))
469
+ else:
470
+ cache_key = (args,)
471
+
472
+ try:
473
+ ctx_dict = func._pyopencl_first_arg_dep_memoize_dic
474
+ except AttributeError:
475
+ # FIXME: This may keep contexts alive longer than desired.
476
+ # But I guess since the memory in them is freed, who cares.
477
+ ctx_dict = func._pyopencl_first_arg_dep_memoize_dic = {}
478
+ _first_arg_dependent_caches.append(ctx_dict)
479
+
480
+ try:
481
+ return ctx_dict[cl_object][cache_key]
482
+ except KeyError:
483
+ arg_dict = ctx_dict.setdefault(cl_object, {})
484
+ result = func(cl_object, *args, **kwargs)
485
+ arg_dict[cache_key] = result
486
+ return result
487
+
488
+ from functools import update_wrapper
489
+ update_wrapper(wrapper, func)
490
+ return wrapper
491
+
492
+
493
+ context_dependent_memoize = first_arg_dependent_memoize
494
+
495
+
496
+ def first_arg_dependent_memoize_nested(nested_func):
497
+ """Provides memoization for nested functions. Typically used to cache
498
+ things that get created inside a :class:`pyopencl.Context`, e.g. programs
499
+ and kernels. Assumes that the first argument of the decorated function is
500
+ an OpenCL object that might go away, such as a :class:`pyopencl.Context` or
501
+ a :class:`pyopencl.CommandQueue`, and will therefore respond to
502
+ :func:`clear_first_arg_caches`.
503
+
504
+ .. versionadded:: 2013.1
505
+
506
+ Requires Python 2.5 or newer.
507
+ """
508
+
509
+ from functools import wraps
510
+ cache_dict_name = intern("_memoize_inner_dic_%s_%s_%d"
511
+ % (nested_func.__name__, nested_func.__code__.co_filename,
512
+ nested_func.__code__.co_firstlineno))
513
+
514
+ from inspect import currentframe
515
+
516
+ # prevent ref cycle
517
+ try:
518
+ caller_frame = currentframe().f_back
519
+ cache_context = caller_frame.f_globals[
520
+ caller_frame.f_code.co_name]
521
+ finally:
522
+ #del caller_frame
523
+ pass
524
+
525
+ try:
526
+ cache_dict = getattr(cache_context, cache_dict_name)
527
+ except AttributeError:
528
+ cache_dict = {}
529
+ _first_arg_dependent_caches.append(cache_dict)
530
+ setattr(cache_context, cache_dict_name, cache_dict)
531
+
532
+ @wraps(nested_func)
533
+ def new_nested_func(cl_object, *args):
534
+ try:
535
+ return cache_dict[cl_object][args]
536
+ except KeyError:
537
+ arg_dict = cache_dict.setdefault(cl_object, {})
538
+ result = nested_func(cl_object, *args)
539
+ arg_dict[args] = result
540
+ return result
541
+
542
+ return new_nested_func
543
+
544
+
545
+ def clear_first_arg_caches():
546
+ """Empties all first-argument-dependent memoization caches. Also releases
547
+ all held reference contexts. If it is important to you that the
548
+ program detaches from its context, you might need to call this
549
+ function to free all remaining references to your context.
550
+
551
+ .. versionadded:: 2011.2
552
+ """
553
+ for cache in _first_arg_dependent_caches:
554
+ cache.clear()
555
+
556
+
557
+ import atexit
558
+
559
+
560
+ atexit.register(clear_first_arg_caches)
561
+
562
+ # }}}
563
+
564
+
565
+ # {{{ pytest fixtures
566
+
567
+ class _ContextFactory:
568
+ def __init__(self, device):
569
+ self.device = device
570
+
571
+ def __call__(self):
572
+ # Get rid of leftovers from past tests.
573
+ # CL implementations are surprisingly limited in how many
574
+ # simultaneous contexts they allow...
575
+ clear_first_arg_caches()
576
+
577
+ from gc import collect
578
+ collect()
579
+
580
+ import pyopencl as cl
581
+ return cl.Context([self.device])
582
+
583
+ def __str__(self):
584
+ # Don't show address, so that parallel test collection works
585
+ return ("<context factory for <pyopencl.Device '%s' on '%s'>>" %
586
+ (self.device.name.strip(),
587
+ self.device.platform.name.strip()))
588
+
589
+
590
+ def get_test_platforms_and_devices(plat_dev_string=None):
591
+ """Parse a string of the form 'PYOPENCL_TEST=0:0,1;intel:i5'.
592
+
593
+ :return: list of tuples (platform, [device, device, ...])
594
+ """
595
+
596
+ import pyopencl as cl
597
+
598
+ if plat_dev_string is None:
599
+ import os
600
+ plat_dev_string = os.environ.get("PYOPENCL_TEST", None)
601
+
602
+ def find_cl_obj(objs, identifier):
603
+ try:
604
+ num = int(identifier)
605
+ except Exception:
606
+ pass
607
+ else:
608
+ return objs[num]
609
+
610
+ found = False
611
+ for obj in objs:
612
+ if identifier.lower() in (obj.name + " " + obj.vendor).lower():
613
+ return obj
614
+ if not found:
615
+ raise RuntimeError("object '%s' not found" % identifier)
616
+
617
+ if plat_dev_string:
618
+ result = []
619
+
620
+ for entry in plat_dev_string.split(";"):
621
+ lhsrhs = entry.split(":")
622
+
623
+ if len(lhsrhs) == 1:
624
+ platform = find_cl_obj(cl.get_platforms(), lhsrhs[0])
625
+ result.append((platform, platform.get_devices()))
626
+
627
+ elif len(lhsrhs) != 2:
628
+ raise RuntimeError("invalid syntax of PYOPENCL_TEST")
629
+ else:
630
+ plat_str, dev_strs = lhsrhs
631
+
632
+ platform = find_cl_obj(cl.get_platforms(), plat_str)
633
+ devs = platform.get_devices()
634
+ result.append(
635
+ (platform,
636
+ [find_cl_obj(devs, dev_id)
637
+ for dev_id in dev_strs.split(",")]))
638
+
639
+ return result
640
+
641
+ else:
642
+ return [
643
+ (platform, platform.get_devices())
644
+ for platform in cl.get_platforms()]
645
+
646
+
647
+ def get_pyopencl_fixture_arg_names(metafunc, extra_arg_names=None):
648
+ if extra_arg_names is None:
649
+ extra_arg_names = []
650
+
651
+ supported_arg_names = [
652
+ "platform", "device",
653
+ "ctx_factory", "ctx_getter",
654
+ ] + extra_arg_names
655
+
656
+ arg_names = []
657
+ for arg in supported_arg_names:
658
+ if arg not in metafunc.fixturenames:
659
+ continue
660
+
661
+ if arg == "ctx_getter":
662
+ from warnings import warn
663
+ warn(
664
+ "The 'ctx_getter' arg is deprecated in favor of 'ctx_factory'.",
665
+ DeprecationWarning, stacklevel=2)
666
+
667
+ arg_names.append(arg)
668
+
669
+ return arg_names
670
+
671
+
672
+ def get_pyopencl_fixture_arg_values():
673
+ import pyopencl as cl
674
+
675
+ arg_values = []
676
+ for platform, devices in get_test_platforms_and_devices():
677
+ for device in devices:
678
+ arg_dict = {
679
+ "platform": platform,
680
+ "device": device,
681
+ "ctx_factory": _ContextFactory(device),
682
+ "ctx_getter": _ContextFactory(device)
683
+ }
684
+ arg_values.append(arg_dict)
685
+
686
+ def idfn(val):
687
+ if isinstance(val, cl.Platform):
688
+ # Don't show address, so that parallel test collection works
689
+ return f"<pyopencl.Platform '{val.name}'>"
690
+ else:
691
+ return str(val)
692
+
693
+ return arg_values, idfn
694
+
695
+
696
+ def pytest_generate_tests_for_pyopencl(metafunc):
697
+ """Using the line::
698
+
699
+ from pyopencl.tools import pytest_generate_tests_for_pyopencl
700
+ as pytest_generate_tests
701
+
702
+ in your `pytest <https://docs.pytest.org/en/latest/>`__ test scripts allows
703
+ you to use the arguments *ctx_factory*, *device*, or *platform* in your test
704
+ functions, and they will automatically be run for each OpenCL device/platform
705
+ in the system, as appropriate.
706
+
707
+ The following two environment variabls is also supported to control
708
+ device/platform choice::
709
+
710
+ PYOPENCL_TEST=0:0,1;intel=i5,i7
711
+ """
712
+
713
+ arg_names = get_pyopencl_fixture_arg_names(metafunc)
714
+ if not arg_names:
715
+ return
716
+
717
+ arg_values, ids = get_pyopencl_fixture_arg_values()
718
+ arg_values = [
719
+ tuple(arg_dict[name] for name in arg_names)
720
+ for arg_dict in arg_values
721
+ ]
722
+
723
+ metafunc.parametrize(arg_names, arg_values, ids=ids)
724
+
725
+ # }}}
726
+
727
+
728
+ # {{{ C argument lists
729
+
730
+ class Argument(ABC):
731
+ """
732
+ .. automethod:: declarator
733
+ """
734
+
735
+ @abstractmethod
736
+ def declarator(self) -> str:
737
+ pass
738
+
739
+
740
+ class DtypedArgument(Argument):
741
+ """
742
+ .. attribute:: name
743
+ .. attribute:: dtype
744
+ """
745
+
746
+ def __init__(self, dtype: Any, name: str) -> None:
747
+ self.dtype = np.dtype(dtype)
748
+ self.name = name
749
+
750
+ def __repr__(self) -> str:
751
+ return "{}({!r}, {})".format(
752
+ self.__class__.__name__,
753
+ self.name,
754
+ self.dtype)
755
+
756
+ def __eq__(self, other: Any) -> bool:
757
+ return (type(self) is type(other)
758
+ and self.dtype == other.dtype
759
+ and self.name == other.name)
760
+
761
+ def __hash__(self) -> int:
762
+ return (
763
+ hash(type(self))
764
+ ^ hash(self.dtype)
765
+ ^ hash(self.name))
766
+
767
+
768
+ class VectorArg(DtypedArgument):
769
+ """Inherits from :class:`DtypedArgument`.
770
+
771
+ .. automethod:: __init__
772
+ """
773
+
774
+ def __init__(self, dtype: Any, name: str, with_offset: bool = False) -> None:
775
+ super().__init__(dtype, name)
776
+ self.with_offset = with_offset
777
+
778
+ def declarator(self) -> str:
779
+ if self.with_offset:
780
+ # Two underscores -> less likelihood of a name clash.
781
+ return "__global {} *{}__base, long {}__offset".format(
782
+ dtype_to_ctype(self.dtype), self.name, self.name)
783
+ else:
784
+ result = "__global {} *{}".format(dtype_to_ctype(self.dtype), self.name)
785
+
786
+ return result
787
+
788
+ def __eq__(self, other) -> bool:
789
+ return (super().__eq__(other)
790
+ and self.with_offset == other.with_offset)
791
+
792
+ def __hash__(self) -> int:
793
+ return super().__hash__() ^ hash(self.with_offset)
794
+
795
+
796
+ class ScalarArg(DtypedArgument):
797
+ """Inherits from :class:`DtypedArgument`."""
798
+
799
+ def declarator(self):
800
+ return "{} {}".format(dtype_to_ctype(self.dtype), self.name)
801
+
802
+
803
+ class OtherArg(Argument):
804
+ def __init__(self, declarator: str, name: str) -> None:
805
+ self.decl = declarator
806
+ self.name = name
807
+
808
+ def declarator(self) -> str:
809
+ return self.decl
810
+
811
+ def __eq__(self, other) -> bool:
812
+ return (type(self) is type(other)
813
+ and self.decl == other.decl
814
+ and self.name == other.name)
815
+
816
+ def __hash__(self) -> int:
817
+ return (
818
+ hash(type(self))
819
+ ^ hash(self.decl)
820
+ ^ hash(self.name))
821
+
822
+
823
+ def parse_c_arg(c_arg: str, with_offset: bool = False) -> DtypedArgument:
824
+ for aspace in ["__local", "__constant"]:
825
+ if aspace in c_arg:
826
+ raise RuntimeError("cannot deal with local or constant "
827
+ "OpenCL address spaces in C argument lists ")
828
+
829
+ c_arg = c_arg.replace("__global", "")
830
+
831
+ if with_offset:
832
+ def vec_arg_factory(dtype, name):
833
+ return VectorArg(dtype, name, with_offset=True)
834
+ else:
835
+ vec_arg_factory = VectorArg
836
+
837
+ from pyopencl.compyte.dtypes import parse_c_arg_backend
838
+ return parse_c_arg_backend(c_arg, ScalarArg, vec_arg_factory)
839
+
840
+
841
+ def parse_arg_list(
842
+ arguments: Union[str, List[str], List[DtypedArgument]],
843
+ with_offset: bool = False) -> List[DtypedArgument]:
844
+ """Parse a list of kernel arguments. *arguments* may be a comma-separate
845
+ list of C declarators in a string, a list of strings representing C
846
+ declarators, or :class:`Argument` objects.
847
+ """
848
+
849
+ if isinstance(arguments, str):
850
+ arguments = arguments.split(",")
851
+
852
+ def parse_single_arg(obj: Union[str, DtypedArgument]) -> DtypedArgument:
853
+ if isinstance(obj, str):
854
+ from pyopencl.tools import parse_c_arg
855
+ return parse_c_arg(obj, with_offset=with_offset)
856
+ else:
857
+ assert isinstance(obj, DtypedArgument)
858
+ return obj
859
+
860
+ return [parse_single_arg(arg) for arg in arguments]
861
+
862
+
863
+ def get_arg_list_arg_types(arg_types):
864
+ result = []
865
+
866
+ for arg_type in arg_types:
867
+ if isinstance(arg_type, ScalarArg):
868
+ result.append(arg_type.dtype)
869
+ elif isinstance(arg_type, VectorArg):
870
+ result.append(arg_type)
871
+ else:
872
+ raise RuntimeError("arg type not understood: %s" % type(arg_type))
873
+
874
+ return tuple(result)
875
+
876
+
877
+ def get_arg_list_scalar_arg_dtypes(
878
+ arg_types: List[DtypedArgument]
879
+ ) -> List[Optional[np.dtype]]:
880
+ result: List[Optional[np.dtype]] = []
881
+
882
+ for arg_type in arg_types:
883
+ if isinstance(arg_type, ScalarArg):
884
+ result.append(arg_type.dtype)
885
+ elif isinstance(arg_type, VectorArg):
886
+ result.append(None)
887
+ if arg_type.with_offset:
888
+ result.append(np.dtype(np.int64))
889
+ else:
890
+ raise RuntimeError(f"arg type not understood: {type(arg_type)}")
891
+
892
+ return result
893
+
894
+
895
+ def get_arg_offset_adjuster_code(arg_types):
896
+ result = []
897
+
898
+ for arg_type in arg_types:
899
+ if isinstance(arg_type, VectorArg) and arg_type.with_offset:
900
+ result.append("__global %(type)s *%(name)s = "
901
+ "(__global %(type)s *) "
902
+ "((__global char *) %(name)s__base + %(name)s__offset);"
903
+ % {
904
+ "type": dtype_to_ctype(arg_type.dtype),
905
+ "name": arg_type.name})
906
+
907
+ return "\n".join(result)
908
+
909
+ # }}}
910
+
911
+
912
+ def get_gl_sharing_context_properties():
913
+ import pyopencl as cl
914
+
915
+ ctx_props = cl.context_properties
916
+
917
+ from OpenGL import platform as gl_platform
918
+
919
+ props = []
920
+
921
+ import sys
922
+ if sys.platform in ["linux", "linux2"]:
923
+ from OpenGL import GLX
924
+ props.append(
925
+ (ctx_props.GL_CONTEXT_KHR, GLX.glXGetCurrentContext()))
926
+ props.append(
927
+ (ctx_props.GLX_DISPLAY_KHR,
928
+ GLX.glXGetCurrentDisplay()))
929
+ elif sys.platform == "win32":
930
+ from OpenGL import WGL
931
+ props.append(
932
+ (ctx_props.GL_CONTEXT_KHR, gl_platform.GetCurrentContext()))
933
+ props.append(
934
+ (ctx_props.WGL_HDC_KHR,
935
+ WGL.wglGetCurrentDC()))
936
+ elif sys.platform == "darwin":
937
+ props.append(
938
+ (ctx_props.CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
939
+ cl.get_apple_cgl_share_group()))
940
+ else:
941
+ raise NotImplementedError("platform '%s' not yet supported"
942
+ % sys.platform)
943
+
944
+ return props
945
+
946
+
947
+ class _CDeclList:
948
+ def __init__(self, device):
949
+ self.device = device
950
+ self.declared_dtypes = set()
951
+ self.declarations = []
952
+ self.saw_double = False
953
+ self.saw_complex = False
954
+
955
+ def add_dtype(self, dtype):
956
+ dtype = np.dtype(dtype)
957
+
958
+ if dtype in (np.float64, np.complex128):
959
+ self.saw_double = True
960
+
961
+ if dtype.kind == "c":
962
+ self.saw_complex = True
963
+
964
+ if dtype.kind != "V":
965
+ return
966
+
967
+ if dtype in self.declared_dtypes:
968
+ return
969
+
970
+ import pyopencl.cltypes
971
+ if dtype in pyopencl.cltypes.vec_type_to_scalar_and_count:
972
+ return
973
+
974
+ if hasattr(dtype, "subdtype") and dtype.subdtype is not None:
975
+ self.add_dtype(dtype.subdtype[0])
976
+ return
977
+
978
+ for _name, field_data in sorted(dtype.fields.items()):
979
+ field_dtype, offset = field_data[:2]
980
+ self.add_dtype(field_dtype)
981
+
982
+ _, cdecl = match_dtype_to_c_struct(
983
+ self.device, dtype_to_ctype(dtype), dtype)
984
+
985
+ self.declarations.append(cdecl)
986
+ self.declared_dtypes.add(dtype)
987
+
988
+ def visit_arguments(self, arguments):
989
+ for arg in arguments:
990
+ dtype = arg.dtype
991
+ if dtype in (np.float64, np.complex128):
992
+ self.saw_double = True
993
+
994
+ if dtype.kind == "c":
995
+ self.saw_complex = True
996
+
997
+ def get_declarations(self):
998
+ result = "\n\n".join(self.declarations)
999
+
1000
+ if self.saw_complex:
1001
+ result = (
1002
+ "#include <pyopencl-complex.h>\n\n"
1003
+ + result)
1004
+
1005
+ if self.saw_double:
1006
+ result = (
1007
+ """
1008
+ #if __OPENCL_C_VERSION__ < 120
1009
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
1010
+ #endif
1011
+ #define PYOPENCL_DEFINE_CDOUBLE
1012
+ """
1013
+ + result)
1014
+
1015
+ return result
1016
+
1017
+
1018
+ @memoize
1019
+ def match_dtype_to_c_struct(device, name, dtype, context=None):
1020
+ """Return a tuple ``(dtype, c_decl)`` such that the C struct declaration
1021
+ in ``c_decl`` and the structure :class:`numpy.dtype` instance ``dtype``
1022
+ have the same memory layout.
1023
+
1024
+ Note that *dtype* may be modified from the value that was passed in,
1025
+ for example to insert padding.
1026
+
1027
+ (As a remark on implementation, this routine runs a small kernel on
1028
+ the given *device* to ensure that :mod:`numpy` and C offsets and
1029
+ sizes match.)
1030
+
1031
+ .. versionadded:: 2013.1
1032
+
1033
+ This example explains the use of this function::
1034
+
1035
+ >>> import numpy as np
1036
+ >>> import pyopencl as cl
1037
+ >>> import pyopencl.tools
1038
+ >>> ctx = cl.create_some_context()
1039
+ >>> dtype = np.dtype([("id", np.uint32), ("value", np.float32)])
1040
+ >>> dtype, c_decl = pyopencl.tools.match_dtype_to_c_struct(
1041
+ ... ctx.devices[0], 'id_val', dtype)
1042
+ >>> print c_decl
1043
+ typedef struct {
1044
+ unsigned id;
1045
+ float value;
1046
+ } id_val;
1047
+ >>> print dtype
1048
+ [('id', '<u4'), ('value', '<f4')]
1049
+ >>> cl.tools.get_or_register_dtype('id_val', dtype)
1050
+
1051
+ As this example shows, it is important to call
1052
+ :func:`get_or_register_dtype` on the modified ``dtype`` returned by this
1053
+ function, not the original one.
1054
+ """
1055
+
1056
+ import pyopencl as cl
1057
+
1058
+ fields = sorted(dtype.fields.items(),
1059
+ key=lambda name_dtype_offset: name_dtype_offset[1][1])
1060
+
1061
+ c_fields = []
1062
+ for field_name, dtype_and_offset in fields:
1063
+ field_dtype, offset = dtype_and_offset[:2]
1064
+ if hasattr(field_dtype, "subdtype") and field_dtype.subdtype is not None:
1065
+ array_dtype = field_dtype.subdtype[0]
1066
+ if hasattr(array_dtype, "subdtype") and array_dtype.subdtype is not None:
1067
+ raise NotImplementedError("nested array dtypes are not supported")
1068
+ array_dims = field_dtype.subdtype[1]
1069
+ dims_str = ""
1070
+ try:
1071
+ for dim in array_dims:
1072
+ dims_str += "[%d]" % dim
1073
+ except TypeError:
1074
+ dims_str = "[%d]" % array_dims
1075
+ c_fields.append(" {} {}{};".format(
1076
+ dtype_to_ctype(array_dtype), field_name, dims_str)
1077
+ )
1078
+ else:
1079
+ c_fields.append(
1080
+ " {} {};".format(dtype_to_ctype(field_dtype), field_name))
1081
+
1082
+ c_decl = "typedef struct {{\n{}\n}} {};\n\n".format(
1083
+ "\n".join(c_fields),
1084
+ name)
1085
+
1086
+ cdl = _CDeclList(device)
1087
+ for _field_name, dtype_and_offset in fields:
1088
+ field_dtype, offset = dtype_and_offset[:2]
1089
+ cdl.add_dtype(field_dtype)
1090
+
1091
+ pre_decls = cdl.get_declarations()
1092
+
1093
+ offset_code = "\n".join(
1094
+ "result[%d] = pycl_offsetof(%s, %s);" % (i+1, name, field_name)
1095
+ for i, (field_name, _) in enumerate(fields))
1096
+
1097
+ src = rf"""
1098
+ #define pycl_offsetof(st, m) \
1099
+ ((uint) ((__local char *) &(dummy.m) \
1100
+ - (__local char *)&dummy ))
1101
+
1102
+ {pre_decls}
1103
+
1104
+ {c_decl}
1105
+
1106
+ __kernel void get_size_and_offsets(__global uint *result)
1107
+ {{
1108
+ result[0] = sizeof({name});
1109
+ __local {name} dummy;
1110
+ {offset_code}
1111
+ }}
1112
+ """
1113
+
1114
+ if context is None:
1115
+ context = cl.Context([device])
1116
+
1117
+ queue = cl.CommandQueue(context)
1118
+
1119
+ prg = cl.Program(context, src)
1120
+ knl = prg.build(devices=[device]).get_size_and_offsets
1121
+
1122
+ import pyopencl.array # noqa: F401
1123
+ result_buf = cl.array.empty(queue, 1+len(fields), np.uint32)
1124
+ knl(queue, (1,), (1,), result_buf.data)
1125
+ queue.finish()
1126
+ size_and_offsets = result_buf.get()
1127
+
1128
+ size = int(size_and_offsets[0])
1129
+
1130
+ offsets = size_and_offsets[1:]
1131
+ if any(ofs >= size for ofs in offsets):
1132
+ # offsets not plausible
1133
+
1134
+ if dtype.itemsize == size:
1135
+ # If sizes match, use numpy's idea of the offsets.
1136
+ offsets = [dtype_and_offset[1]
1137
+ for field_name, dtype_and_offset in fields]
1138
+ else:
1139
+ raise RuntimeError(
1140
+ "OpenCL compiler reported offsetof() past sizeof() "
1141
+ "for struct layout on '%s'. "
1142
+ "This makes no sense, and it's usually indicates a "
1143
+ "compiler bug. "
1144
+ "Refusing to discover struct layout." % device)
1145
+
1146
+ result_buf.data.release()
1147
+ del knl
1148
+ del prg
1149
+ del queue
1150
+ del context
1151
+
1152
+ try:
1153
+ dtype_arg_dict = {
1154
+ "names": [field_name
1155
+ for field_name, (field_dtype, offset) in fields],
1156
+ "formats": [field_dtype
1157
+ for field_name, (field_dtype, offset) in fields],
1158
+ "offsets": [int(x) for x in offsets],
1159
+ "itemsize": int(size_and_offsets[0]),
1160
+ }
1161
+ dtype = np.dtype(dtype_arg_dict)
1162
+ if dtype.itemsize != size_and_offsets[0]:
1163
+ # "Old" versions of numpy (1.6.x?) silently ignore "itemsize". Boo.
1164
+ dtype_arg_dict["names"].append("_pycl_size_fixer")
1165
+ dtype_arg_dict["formats"].append(np.uint8)
1166
+ dtype_arg_dict["offsets"].append(int(size_and_offsets[0])-1)
1167
+ dtype = np.dtype(dtype_arg_dict)
1168
+ except NotImplementedError:
1169
+ def calc_field_type():
1170
+ total_size = 0
1171
+ padding_count = 0
1172
+ for offset, (field_name, (field_dtype, _)) in zip(offsets, fields):
1173
+ if offset > total_size:
1174
+ padding_count += 1
1175
+ yield ("__pycl_padding%d" % padding_count,
1176
+ "V%d" % offset - total_size)
1177
+ yield field_name, field_dtype
1178
+ total_size = field_dtype.itemsize + offset
1179
+ dtype = np.dtype(list(calc_field_type()))
1180
+
1181
+ assert dtype.itemsize == size_and_offsets[0]
1182
+
1183
+ return dtype, c_decl
1184
+
1185
+
1186
+ @memoize
1187
+ def dtype_to_c_struct(device, dtype):
1188
+ if dtype.fields is None:
1189
+ return ""
1190
+
1191
+ import pyopencl.cltypes
1192
+ if dtype in pyopencl.cltypes.vec_type_to_scalar_and_count:
1193
+ # Vector types are built-in. Don't try to redeclare those.
1194
+ return ""
1195
+
1196
+ matched_dtype, c_decl = match_dtype_to_c_struct(
1197
+ device, dtype_to_ctype(dtype), dtype)
1198
+
1199
+ def dtypes_match():
1200
+ result = len(dtype.fields) == len(matched_dtype.fields)
1201
+
1202
+ for name, val in dtype.fields.items():
1203
+ result = result and matched_dtype.fields[name] == val
1204
+
1205
+ return result
1206
+
1207
+ assert dtypes_match()
1208
+
1209
+ return c_decl
1210
+
1211
+
1212
+ # {{{ code generation/templating helper
1213
+
1214
+ def _process_code_for_macro(code):
1215
+ code = code.replace("//CL//", "\n")
1216
+
1217
+ if "//" in code:
1218
+ raise RuntimeError("end-of-line comments ('//') may not be used in "
1219
+ "code snippets")
1220
+
1221
+ return code.replace("\n", " \\\n")
1222
+
1223
+
1224
+ class _SimpleTextTemplate:
1225
+ def __init__(self, txt):
1226
+ self.txt = txt
1227
+
1228
+ def render(self, context):
1229
+ return self.txt
1230
+
1231
+
1232
+ class _PrintfTextTemplate:
1233
+ def __init__(self, txt):
1234
+ self.txt = txt
1235
+
1236
+ def render(self, context):
1237
+ return self.txt % context
1238
+
1239
+
1240
+ class _MakoTextTemplate:
1241
+ def __init__(self, txt):
1242
+ from mako.template import Template
1243
+ self.template = Template(txt, strict_undefined=True)
1244
+
1245
+ def render(self, context):
1246
+ return self.template.render(**context)
1247
+
1248
+
1249
+ class _ArgumentPlaceholder:
1250
+ """A placeholder for subclasses of :class:`DtypedArgument`. This is needed
1251
+ because the concrete dtype of the argument is not known at template
1252
+ creation time--it may be a type alias that will only be filled in
1253
+ at run time. These types take the place of these proto-arguments until
1254
+ all types are known.
1255
+
1256
+ See also :class:`_TemplateRenderer.render_arg`.
1257
+ """
1258
+
1259
+ def __init__(self, typename, name, **extra_kwargs):
1260
+ self.typename = typename
1261
+ self.name = name
1262
+ self.extra_kwargs = extra_kwargs
1263
+
1264
+
1265
+ class _VectorArgPlaceholder(_ArgumentPlaceholder):
1266
+ target_class = VectorArg
1267
+
1268
+
1269
+ class _ScalarArgPlaceholder(_ArgumentPlaceholder):
1270
+ target_class = ScalarArg
1271
+
1272
+
1273
+ class _TemplateRenderer:
1274
+ def __init__(self, template, type_aliases, var_values, context=None,
1275
+ options=None):
1276
+ self.template = template
1277
+ self.type_aliases = dict(type_aliases)
1278
+ self.var_dict = dict(var_values)
1279
+
1280
+ for name in self.var_dict:
1281
+ if name.startswith("macro_"):
1282
+ self.var_dict[name] = _process_code_for_macro(
1283
+ self.var_dict[name])
1284
+
1285
+ self.context = context
1286
+ self.options = options
1287
+
1288
+ def __call__(self, txt):
1289
+ if txt is None:
1290
+ return txt
1291
+
1292
+ result = self.template.get_text_template(txt).render(self.var_dict)
1293
+
1294
+ return str(result)
1295
+
1296
+ def get_rendered_kernel(self, txt, kernel_name):
1297
+ import pyopencl as cl
1298
+ prg = cl.Program(self.context, self(txt)).build(self.options)
1299
+
1300
+ kernel_name_prefix = self.var_dict.get("kernel_name_prefix")
1301
+ if kernel_name_prefix is not None:
1302
+ kernel_name = kernel_name_prefix+kernel_name
1303
+
1304
+ return getattr(prg, kernel_name)
1305
+
1306
+ def parse_type(self, typename):
1307
+ if isinstance(typename, str):
1308
+ try:
1309
+ return self.type_aliases[typename]
1310
+ except KeyError:
1311
+ from pyopencl.compyte.dtypes import NAME_TO_DTYPE
1312
+ return NAME_TO_DTYPE[typename]
1313
+ else:
1314
+ return np.dtype(typename)
1315
+
1316
+ def render_arg(self, arg_placeholder):
1317
+ return arg_placeholder.target_class(
1318
+ self.parse_type(arg_placeholder.typename),
1319
+ arg_placeholder.name,
1320
+ **arg_placeholder.extra_kwargs)
1321
+
1322
+ _C_COMMENT_FINDER = re.compile(r"/\*.*?\*/")
1323
+
1324
+ def render_argument_list(self, *arg_lists, **kwargs):
1325
+ with_offset = kwargs.pop("with_offset", False)
1326
+ if kwargs:
1327
+ raise TypeError("unrecognized kwargs: " + ", ".join(kwargs))
1328
+
1329
+ all_args = []
1330
+
1331
+ for arg_list in arg_lists:
1332
+ if isinstance(arg_list, str):
1333
+ arg_list = str(
1334
+ self.template
1335
+ .get_text_template(arg_list).render(self.var_dict))
1336
+ arg_list = self._C_COMMENT_FINDER.sub("", arg_list)
1337
+ arg_list = arg_list.replace("\n", " ")
1338
+
1339
+ all_args.extend(arg_list.split(","))
1340
+ else:
1341
+ all_args.extend(arg_list)
1342
+
1343
+ if with_offset:
1344
+ def vec_arg_factory(typename, name):
1345
+ return _VectorArgPlaceholder(typename, name, with_offset=True)
1346
+ else:
1347
+ vec_arg_factory = _VectorArgPlaceholder
1348
+
1349
+ from pyopencl.compyte.dtypes import parse_c_arg_backend
1350
+ parsed_args = []
1351
+ for arg in all_args:
1352
+ if isinstance(arg, str):
1353
+ arg = arg.strip()
1354
+ if not arg:
1355
+ continue
1356
+
1357
+ ph = parse_c_arg_backend(arg,
1358
+ _ScalarArgPlaceholder, vec_arg_factory,
1359
+ name_to_dtype=lambda x: x)
1360
+ parsed_arg = self.render_arg(ph)
1361
+
1362
+ elif isinstance(arg, Argument):
1363
+ parsed_arg = arg
1364
+ elif isinstance(arg, tuple):
1365
+ parsed_arg = ScalarArg(self.parse_type(arg[0]), arg[1])
1366
+
1367
+ parsed_args.append(parsed_arg)
1368
+
1369
+ return parsed_args
1370
+
1371
+ def get_type_decl_preamble(self, device, decl_type_names, arguments=None):
1372
+ cdl = _CDeclList(device)
1373
+
1374
+ for typename in decl_type_names:
1375
+ cdl.add_dtype(self.parse_type(typename))
1376
+
1377
+ if arguments is not None:
1378
+ cdl.visit_arguments(arguments)
1379
+
1380
+ for _, tv in sorted(self.type_aliases.items()):
1381
+ cdl.add_dtype(tv)
1382
+
1383
+ type_alias_decls = [
1384
+ "typedef {} {};".format(dtype_to_ctype(val), name)
1385
+ for name, val in sorted(self.type_aliases.items())
1386
+ ]
1387
+
1388
+ return cdl.get_declarations() + "\n" + "\n".join(type_alias_decls)
1389
+
1390
+
1391
+ class KernelTemplateBase:
1392
+ def __init__(self, template_processor=None):
1393
+ self.template_processor = template_processor
1394
+
1395
+ self.build_cache = {}
1396
+ _first_arg_dependent_caches.append(self.build_cache)
1397
+
1398
+ def get_preamble(self):
1399
+ pass
1400
+
1401
+ _TEMPLATE_PROCESSOR_PATTERN = re.compile(r"^//CL(?::([a-zA-Z0-9_]+))?//")
1402
+
1403
+ @memoize_method
1404
+ def get_text_template(self, txt):
1405
+ proc_match = self._TEMPLATE_PROCESSOR_PATTERN.match(txt)
1406
+ tpl_processor = None
1407
+
1408
+ if proc_match is not None:
1409
+ tpl_processor = proc_match.group(1)
1410
+ # chop off //CL// mark
1411
+ txt = txt[len(proc_match.group(0)):]
1412
+ if tpl_processor is None:
1413
+ tpl_processor = self.template_processor
1414
+
1415
+ if tpl_processor is None or tpl_processor == "none":
1416
+ return _SimpleTextTemplate(txt)
1417
+ elif tpl_processor == "printf":
1418
+ return _PrintfTextTemplate(txt)
1419
+ elif tpl_processor == "mako":
1420
+ return _MakoTextTemplate(txt)
1421
+ else:
1422
+ raise RuntimeError(
1423
+ "unknown template processor '%s'" % proc_match.group(1))
1424
+
1425
+ def get_renderer(self, type_aliases, var_values, context=None, options=None):
1426
+ return _TemplateRenderer(self, type_aliases, var_values)
1427
+
1428
+ def build_inner(self, context, *args, **kwargs):
1429
+ raise NotImplementedError
1430
+
1431
+ def build(self, context, *args, **kwargs):
1432
+ """Provide caching for an :meth:`build_inner`."""
1433
+
1434
+ cache_key = (context, args, tuple(sorted(kwargs.items())))
1435
+ try:
1436
+ return self.build_cache[cache_key]
1437
+ except KeyError:
1438
+ result = self.build_inner(context, *args, **kwargs)
1439
+ self.build_cache[cache_key] = result
1440
+ return result
1441
+
1442
+ # }}}
1443
+
1444
+
1445
+ # {{{ array_module
1446
+
1447
+ class _CLFakeArrayModule:
1448
+ def __init__(self, queue):
1449
+ self.queue = queue
1450
+
1451
+ @property
1452
+ def ndarray(self):
1453
+ from pyopencl.array import Array
1454
+ return Array
1455
+
1456
+ def dot(self, x, y):
1457
+ from pyopencl.array import dot
1458
+ return dot(x, y, queue=self.queue).get()
1459
+
1460
+ def vdot(self, x, y):
1461
+ from pyopencl.array import vdot
1462
+ return vdot(x, y, queue=self.queue).get()
1463
+
1464
+ def empty(self, shape, dtype, order="C"):
1465
+ from pyopencl.array import empty
1466
+ return empty(self.queue, shape, dtype, order=order)
1467
+
1468
+ def hstack(self, arrays):
1469
+ from pyopencl.array import hstack
1470
+ return hstack(arrays, self.queue)
1471
+
1472
+
1473
+ def array_module(a):
1474
+ if isinstance(a, np.ndarray):
1475
+ return np
1476
+ else:
1477
+ from pyopencl.array import Array
1478
+ if isinstance(a, Array):
1479
+ return _CLFakeArrayModule(a.queue)
1480
+ else:
1481
+ raise TypeError("array type not understood: %s" % type(a))
1482
+
1483
+ # }}}
1484
+
1485
+
1486
+ def is_spirv(s):
1487
+ spirv_magic = b"\x07\x23\x02\x03"
1488
+ return (
1489
+ isinstance(s, bytes)
1490
+ and (
1491
+ s[:4] == spirv_magic
1492
+ or s[:4] == spirv_magic[::-1]))
1493
+
1494
+
1495
+ # {{{ numpy key types builder
1496
+
1497
+ class _NumpyTypesKeyBuilder(KeyBuilderBase):
1498
+ def update_for_VectorArg(self, key_hash, key): # noqa: N802
1499
+ self.rec(key_hash, key.dtype)
1500
+ self.update_for_str(key_hash, key.name)
1501
+ self.rec(key_hash, key.with_offset)
1502
+
1503
+ def update_for_type(self, key_hash, key):
1504
+ if issubclass(key, np.generic):
1505
+ self.update_for_str(key_hash, key.__name__)
1506
+ return
1507
+
1508
+ raise TypeError("unsupported type for persistent hash keying: %s"
1509
+ % type(key))
1510
+
1511
+ # }}}
1512
+
1513
+ # vim: foldmethod=marker