pyopencl 2024.3__cp38-cp38-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (43) hide show
  1. pyopencl/.libs/libOpenCL-1ef0e16e.so.1.0.0 +0 -0
  2. pyopencl/__init__.py +2410 -0
  3. pyopencl/_cl.cpython-38-x86_64-linux-gnu.so +0 -0
  4. pyopencl/_cluda.py +54 -0
  5. pyopencl/_mymako.py +14 -0
  6. pyopencl/algorithm.py +1449 -0
  7. pyopencl/array.py +3437 -0
  8. pyopencl/bitonic_sort.py +242 -0
  9. pyopencl/bitonic_sort_templates.py +594 -0
  10. pyopencl/cache.py +535 -0
  11. pyopencl/capture_call.py +177 -0
  12. pyopencl/characterize/__init__.py +456 -0
  13. pyopencl/characterize/performance.py +237 -0
  14. pyopencl/cl/pyopencl-airy.cl +324 -0
  15. pyopencl/cl/pyopencl-bessel-j-complex.cl +238 -0
  16. pyopencl/cl/pyopencl-bessel-j.cl +1084 -0
  17. pyopencl/cl/pyopencl-bessel-y.cl +435 -0
  18. pyopencl/cl/pyopencl-complex.h +303 -0
  19. pyopencl/cl/pyopencl-eval-tbl.cl +120 -0
  20. pyopencl/cl/pyopencl-hankel-complex.cl +444 -0
  21. pyopencl/cl/pyopencl-random123/array.h +325 -0
  22. pyopencl/cl/pyopencl-random123/openclfeatures.h +93 -0
  23. pyopencl/cl/pyopencl-random123/philox.cl +486 -0
  24. pyopencl/cl/pyopencl-random123/threefry.cl +864 -0
  25. pyopencl/clmath.py +280 -0
  26. pyopencl/clrandom.py +409 -0
  27. pyopencl/cltypes.py +137 -0
  28. pyopencl/compyte/.gitignore +21 -0
  29. pyopencl/compyte/__init__.py +0 -0
  30. pyopencl/compyte/array.py +214 -0
  31. pyopencl/compyte/dtypes.py +290 -0
  32. pyopencl/compyte/pyproject.toml +54 -0
  33. pyopencl/elementwise.py +1171 -0
  34. pyopencl/invoker.py +421 -0
  35. pyopencl/ipython_ext.py +68 -0
  36. pyopencl/reduction.py +786 -0
  37. pyopencl/scan.py +1915 -0
  38. pyopencl/tools.py +1527 -0
  39. pyopencl/version.py +9 -0
  40. pyopencl-2024.3.dist-info/METADATA +108 -0
  41. pyopencl-2024.3.dist-info/RECORD +43 -0
  42. pyopencl-2024.3.dist-info/WHEEL +5 -0
  43. pyopencl-2024.3.dist-info/licenses/LICENSE +104 -0
pyopencl/tools.py ADDED
@@ -0,0 +1,1527 @@
1
+ r"""
2
+ .. _memory-pools:
3
+
4
+ Memory Pools
5
+ ------------
6
+
7
+ Memory allocation (e.g. in the form of the :func:`pyopencl.Buffer` constructor)
8
+ can be expensive if used frequently. For example, code based on
9
+ :class:`pyopencl.array.Array` can easily run into this issue because a fresh
10
+ memory area is allocated for each intermediate result. Memory pools are a
11
+ remedy for this problem based on the observation that often many of the block
12
+ allocations are of the same sizes as previously used ones.
13
+
14
+ Then, instead of fully returning the memory to the system and incurring the
15
+ associated reallocation overhead, the pool holds on to the memory and uses it
16
+ to satisfy future allocations of similarly-sized blocks. The pool reacts
17
+ appropriately to out-of-memory conditions as long as all memory allocations
18
+ are made through it. Allocations performed from outside of the pool may run
19
+ into spurious out-of-memory conditions due to the pool owning much or all of
20
+ the available memory.
21
+
22
+ There are two flavors of allocators and memory pools:
23
+
24
+ - :ref:`buf-mempool`
25
+ - :ref:`svm-mempool`
26
+
27
+ Using :class:`pyopencl.array.Array`\ s can be used with memory pools in a
28
+ straightforward manner::
29
+
30
+ mem_pool = pyopencl.tools.MemoryPool(pyopencl.tools.ImmediateAllocator(queue))
31
+ a_dev = cl_array.arange(queue, 2000, dtype=np.float32, allocator=mem_pool)
32
+
33
+ Likewise, SVM-based allocators are directly usable with
34
+ :class:`pyopencl.array.Array`.
35
+
36
+ .. _buf-mempool:
37
+
38
+ :class:`~pyopencl.Buffer`-based Allocators and Memory Pools
39
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
40
+
41
+ .. autoclass:: PooledBuffer
42
+
43
+ .. autoclass:: AllocatorBase
44
+
45
+ .. autoclass:: DeferredAllocator
46
+
47
+ .. autoclass:: ImmediateAllocator
48
+
49
+ .. autoclass:: MemoryPool
50
+
51
+ .. _svm-mempool:
52
+
53
+ :ref:`SVM <svm>`-Based Allocators and Memory Pools
54
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
55
+
56
+ SVM functionality requires OpenCL 2.0.
57
+
58
+ .. autoclass:: PooledSVM
59
+
60
+ .. autoclass:: SVMAllocator
61
+
62
+ .. autoclass:: SVMPool
63
+
64
+ CL-Object-dependent Caching
65
+ ---------------------------
66
+
67
+ .. autofunction:: first_arg_dependent_memoize
68
+ .. autofunction:: clear_first_arg_caches
69
+
70
+ Testing
71
+ -------
72
+
73
+ .. autofunction:: pytest_generate_tests_for_pyopencl
74
+
75
+ Argument Types
76
+ --------------
77
+
78
+ .. autoclass:: Argument
79
+ .. autoclass:: DtypedArgument
80
+
81
+ .. autoclass:: VectorArg
82
+ .. autoclass:: ScalarArg
83
+ .. autoclass:: OtherArg
84
+
85
+ .. autofunction:: parse_arg_list
86
+
87
+ Device Characterization
88
+ -----------------------
89
+
90
+ .. automodule:: pyopencl.characterize
91
+ :members:
92
+
93
+ Type aliases
94
+ ------------
95
+
96
+ .. currentmodule:: pyopencl._cl
97
+
98
+ .. class:: AllocatorBase
99
+
100
+ See :class:`pyopencl.tools.AllocatorBase`.
101
+ """
102
+
103
+
104
+ __copyright__ = "Copyright (C) 2010 Andreas Kloeckner"
105
+
106
+ __license__ = """
107
+ Permission is hereby granted, free of charge, to any person
108
+ obtaining a copy of this software and associated documentation
109
+ files (the "Software"), to deal in the Software without
110
+ restriction, including without limitation the rights to use,
111
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
112
+ copies of the Software, and to permit persons to whom the
113
+ Software is furnished to do so, subject to the following
114
+ conditions:
115
+
116
+ The above copyright notice and this permission notice shall be
117
+ included in all copies or substantial portions of the Software.
118
+
119
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
120
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
121
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
122
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
123
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
124
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
125
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
126
+ OTHER DEALINGS IN THE SOFTWARE.
127
+ """
128
+
129
+ import re
130
+ from abc import ABC, abstractmethod
131
+ from sys import intern
132
+ from typing import Any, List, Optional, Union
133
+
134
+ import numpy as np
135
+
136
+ from pytools import memoize, memoize_method
137
+ from pytools.persistent_dict import KeyBuilder as KeyBuilderBase
138
+
139
+ from pyopencl._cl import bitlog2, get_cl_header_version # noqa: F401
140
+ from pyopencl.compyte.dtypes import ( # noqa: F401
141
+ TypeNameNotKnown,
142
+ dtype_to_ctype,
143
+ get_or_register_dtype,
144
+ register_dtype,
145
+ )
146
+
147
+
148
+ # Do not add a pyopencl import here: This will add an import cycle.
149
+
150
+
151
+ def _register_types():
152
+ from pyopencl.compyte.dtypes import TYPE_REGISTRY, fill_registry_with_opencl_c_types
153
+
154
+ fill_registry_with_opencl_c_types(TYPE_REGISTRY)
155
+
156
+ get_or_register_dtype("cfloat_t", np.complex64)
157
+ get_or_register_dtype("cdouble_t", np.complex128)
158
+
159
+
160
+ _register_types()
161
+
162
+
163
+ # {{{ imported names
164
+
165
+ from pyopencl._cl import (
166
+ AllocatorBase,
167
+ DeferredAllocator,
168
+ ImmediateAllocator,
169
+ MemoryPool,
170
+ PooledBuffer,
171
+ )
172
+
173
+
174
+ if get_cl_header_version() >= (2, 0):
175
+ from pyopencl._cl import PooledSVM, SVMAllocator, SVMPool
176
+
177
+ # }}}
178
+
179
+
180
+ # {{{ monkeypatch docstrings into imported interfaces
181
+
182
+ _MEMPOOL_IFACE_DOCS = """
183
+ .. note::
184
+
185
+ The current implementation of the memory pool will retain allocated
186
+ memory after it is returned by the application and keep it in a bin
187
+ identified by the leading *leading_bits_in_bin_id* bits of the
188
+ allocation size. To ensure that allocations within each bin are
189
+ interchangeable, allocation sizes are rounded up to the largest size
190
+ that shares the leading bits of the requested allocation size.
191
+
192
+ The current default value of *leading_bits_in_bin_id* is
193
+ four, but this may change in future versions and is not
194
+ guaranteed.
195
+
196
+ *leading_bits_in_bin_id* must be passed by keyword,
197
+ and its role is purely advisory. It is not guaranteed
198
+ that future versions of the pool will use the
199
+ same allocation scheme and/or honor *leading_bits_in_bin_id*.
200
+
201
+ .. attribute:: held_blocks
202
+
203
+ The number of unused blocks being held by this pool.
204
+
205
+ .. attribute:: active_blocks
206
+
207
+ The number of blocks in active use that have been allocated
208
+ through this pool.
209
+
210
+ .. attribute:: managed_bytes
211
+
212
+ "Managed" memory is "active" and "held" memory.
213
+
214
+ .. versionadded:: 2021.1.2
215
+
216
+ .. attribute:: active_bytes
217
+
218
+ "Active" bytes are bytes under the control of the application.
219
+ This may be smaller than the actual allocated size reflected
220
+ in :attr:`managed_bytes`.
221
+
222
+ .. versionadded:: 2021.1.2
223
+
224
+
225
+ .. method:: free_held
226
+
227
+ Free all unused memory that the pool is currently holding.
228
+
229
+ .. method:: stop_holding
230
+
231
+ Instruct the memory to start immediately freeing memory returned
232
+ to it, instead of holding it for future allocations.
233
+ Implicitly calls :meth:`free_held`.
234
+ This is useful as a cleanup action when a memory pool falls out
235
+ of use.
236
+ """
237
+
238
+
239
+ def _monkeypatch_docstrings():
240
+ from pytools.codegen import remove_common_indentation
241
+
242
+ PooledBuffer.__doc__ = """
243
+ An object representing a :class:`MemoryPool`-based allocation of
244
+ :class:`~pyopencl.Buffer`-style device memory. Analogous to
245
+ :class:`~pyopencl.Buffer`, however once this object is deleted, its
246
+ associated device memory is returned to the pool.
247
+
248
+ Is a :class:`pyopencl.MemoryObject`.
249
+ """
250
+
251
+ AllocatorBase.__doc__ = """
252
+ An interface implemented by various memory allocation functions
253
+ in :mod:`pyopencl`.
254
+
255
+ .. automethod:: __call__
256
+
257
+ Allocate and return a :class:`pyopencl.Buffer` of the given *size*.
258
+ """
259
+
260
+ # {{{ DeferredAllocator
261
+
262
+ DeferredAllocator.__doc__ = """
263
+ *mem_flags* takes its values from :class:`pyopencl.mem_flags` and corresponds
264
+ to the *flags* argument of :class:`pyopencl.Buffer`. DeferredAllocator
265
+ has the same semantics as regular OpenCL buffer allocation, i.e. it may
266
+ promise memory to be available that may (in any call to a buffer-using
267
+ CL function) turn out to not exist later on. (Allocations in CL are
268
+ bound to contexts, not devices, and memory availability depends on which
269
+ device the buffer is used with.)
270
+
271
+ Implements :class:`AllocatorBase`.
272
+
273
+ .. versionchanged :: 2013.1
274
+
275
+ ``CLAllocator`` was deprecated and replaced
276
+ by :class:`DeferredAllocator`.
277
+
278
+ .. method:: __init__(context, mem_flags=pyopencl.mem_flags.READ_WRITE)
279
+
280
+ .. automethod:: __call__
281
+
282
+ Allocate a :class:`pyopencl.Buffer` of the given *size*.
283
+
284
+ .. versionchanged :: 2020.2
285
+
286
+ The allocator will succeed even for allocations of size zero,
287
+ returning *None*.
288
+ """
289
+
290
+ # }}}
291
+
292
+ # {{{ ImmediateAllocator
293
+
294
+ ImmediateAllocator.__doc__ = """
295
+ *mem_flags* takes its values from :class:`pyopencl.mem_flags` and corresponds
296
+ to the *flags* argument of :class:`pyopencl.Buffer`.
297
+ :class:`ImmediateAllocator` will attempt to ensure at allocation time that
298
+ allocated memory is actually available. If no memory is available, an
299
+ out-of-memory error is reported at allocation time.
300
+
301
+ Implements :class:`AllocatorBase`.
302
+
303
+ .. versionadded:: 2013.1
304
+
305
+ .. method:: __init__(queue, mem_flags=pyopencl.mem_flags.READ_WRITE)
306
+
307
+ .. automethod:: __call__
308
+
309
+ Allocate a :class:`pyopencl.Buffer` of the given *size*.
310
+
311
+ .. versionchanged :: 2020.2
312
+
313
+ The allocator will succeed even for allocations of size zero,
314
+ returning *None*.
315
+ """
316
+
317
+ # }}}
318
+
319
+ # {{{ MemoryPool
320
+
321
+ MemoryPool.__doc__ = remove_common_indentation("""
322
+ A memory pool for OpenCL device memory in :class:`pyopencl.Buffer` form.
323
+ *allocator* must be an instance of one of the above classes, and should be
324
+ an :class:`ImmediateAllocator`. The memory pool assumes that allocation
325
+ failures are reported by the allocator immediately, and not in the
326
+ OpenCL-typical deferred manner.
327
+
328
+ Implements :class:`AllocatorBase`.
329
+
330
+ .. versionchanged:: 2019.1
331
+
332
+ Current bin allocation behavior documented, *leading_bits_in_bin_id*
333
+ added.
334
+
335
+ .. automethod:: __init__
336
+
337
+ .. automethod:: allocate
338
+
339
+ Return a :class:`PooledBuffer` of the given *size*.
340
+
341
+ .. automethod:: __call__
342
+
343
+ Synonym for :meth:`allocate` to match :class:`AllocatorBase`.
344
+
345
+ .. versionadded:: 2011.2
346
+ """) + _MEMPOOL_IFACE_DOCS
347
+
348
+ # }}}
349
+
350
+
351
+ _monkeypatch_docstrings()
352
+
353
+
354
+ def _monkeypatch_svm_docstrings():
355
+ from pytools.codegen import remove_common_indentation
356
+
357
+ # {{{ PooledSVM
358
+
359
+ PooledSVM.__doc__ = ( # pylint: disable=possibly-used-before-assignment
360
+ """An object representing a :class:`SVMPool`-based allocation of
361
+ :ref:`svm`. Analogous to :class:`~pyopencl.SVMAllocation`, however once
362
+ this object is deleted, its associated device memory is returned to the
363
+ pool from which it came.
364
+
365
+ .. versionadded:: 2022.2
366
+
367
+ .. note::
368
+
369
+ If the :class:`SVMAllocator` for the :class:`SVMPool` that allocated an
370
+ object of this type is associated with an (in-order)
371
+ :class:`~pyopencl.CommandQueue`, sufficient synchronization is provided
372
+ to ensure operations enqueued before deallocation complete before
373
+ operations from a different use (possibly in a different queue) are
374
+ permitted to start. This applies when :class:`release` is called and
375
+ also when the object is freed automatically by the garbage collector.
376
+
377
+ Is a :class:`pyopencl.SVMPointer`.
378
+
379
+ Supports structural equality and hashing.
380
+
381
+ .. automethod:: release
382
+
383
+ Return the held memory to the pool. See the note about synchronization
384
+ behavior during deallocation above.
385
+
386
+ .. automethod:: enqueue_release
387
+
388
+ Synonymous to :meth:`release`, for consistency with
389
+ :class:`~pyopencl.SVMAllocation`. Note that, unlike
390
+ :meth:`pyopencl.SVMAllocation.enqueue_release`, specifying a queue
391
+ or events to be waited for is not supported.
392
+
393
+ .. automethod:: bind_to_queue
394
+
395
+ Analogous to :meth:`pyopencl.SVMAllocation.bind_to_queue`.
396
+
397
+ .. automethod:: unbind_from_queue
398
+
399
+ Analogous to :meth:`pyopencl.SVMAllocation.unbind_from_queue`.
400
+ """)
401
+
402
+ # }}}
403
+
404
+ # {{{ SVMAllocator
405
+
406
+ SVMAllocator.__doc__ = ( # pylint: disable=possibly-used-before-assignment
407
+ """
408
+ .. versionadded:: 2022.2
409
+
410
+ .. automethod:: __init__
411
+
412
+ :arg flags: See :class:`~pyopencl.svm_mem_flags`.
413
+ :arg queue: If not specified, allocations will be freed
414
+ eagerly, irrespective of whether pending/enqueued operations
415
+ are still using the memory.
416
+
417
+ If specified, deallocation of memory will be enqueued
418
+ with the given queue, and will only be performed
419
+ after previously-enqueue operations in the queue have
420
+ completed.
421
+
422
+ It is an error to specify an out-of-order queue.
423
+
424
+ .. warning::
425
+
426
+ Not specifying a queue will typically lead to undesired
427
+ behavior, including crashes and memory corruption.
428
+ See the warning in :ref:`svm`.
429
+
430
+ .. automethod:: __call__
431
+
432
+ Return a :class:`~pyopencl.SVMAllocation` of the given *size*.
433
+ """)
434
+
435
+ # }}}
436
+
437
+ # {{{ SVMPool
438
+
439
+ SVMPool.__doc__ = ( # pylint: disable=possibly-used-before-assignment
440
+ remove_common_indentation("""
441
+ A memory pool for OpenCL device memory in :ref:`SVM <svm>` form.
442
+ *allocator* must be an instance of :class:`SVMAllocator`.
443
+
444
+ .. versionadded:: 2022.2
445
+
446
+ .. automethod:: __init__
447
+ .. automethod:: __call__
448
+
449
+ Return a :class:`PooledSVM` of the given *size*.
450
+ """) + _MEMPOOL_IFACE_DOCS)
451
+
452
+ # }}}
453
+
454
+
455
+ if get_cl_header_version() >= (2, 0):
456
+ _monkeypatch_svm_docstrings()
457
+
458
+ # }}}
459
+
460
+
461
+ # {{{ first-arg caches
462
+
463
+ _first_arg_dependent_caches = []
464
+
465
+
466
+ def first_arg_dependent_memoize(func):
467
+ def wrapper(cl_object, *args, **kwargs):
468
+ """Provides memoization for a function. Typically used to cache
469
+ things that get created inside a :class:`pyopencl.Context`, e.g. programs
470
+ and kernels. Assumes that the first argument of the decorated function is
471
+ an OpenCL object that might go away, such as a :class:`pyopencl.Context` or
472
+ a :class:`pyopencl.CommandQueue`, and based on which we might want to clear
473
+ the cache.
474
+
475
+ .. versionadded:: 2011.2
476
+ """
477
+ if kwargs:
478
+ cache_key = (args, frozenset(kwargs.items()))
479
+ else:
480
+ cache_key = (args,)
481
+
482
+ try:
483
+ ctx_dict = func._pyopencl_first_arg_dep_memoize_dic
484
+ except AttributeError:
485
+ # FIXME: This may keep contexts alive longer than desired.
486
+ # But I guess since the memory in them is freed, who cares.
487
+ ctx_dict = func._pyopencl_first_arg_dep_memoize_dic = {}
488
+ _first_arg_dependent_caches.append(ctx_dict)
489
+
490
+ try:
491
+ return ctx_dict[cl_object][cache_key]
492
+ except KeyError:
493
+ arg_dict = ctx_dict.setdefault(cl_object, {})
494
+ result = func(cl_object, *args, **kwargs)
495
+ arg_dict[cache_key] = result
496
+ return result
497
+
498
+ from functools import update_wrapper
499
+ update_wrapper(wrapper, func)
500
+ return wrapper
501
+
502
+
503
+ context_dependent_memoize = first_arg_dependent_memoize
504
+
505
+
506
+ def first_arg_dependent_memoize_nested(nested_func):
507
+ """Provides memoization for nested functions. Typically used to cache
508
+ things that get created inside a :class:`pyopencl.Context`, e.g. programs
509
+ and kernels. Assumes that the first argument of the decorated function is
510
+ an OpenCL object that might go away, such as a :class:`pyopencl.Context` or
511
+ a :class:`pyopencl.CommandQueue`, and will therefore respond to
512
+ :func:`clear_first_arg_caches`.
513
+
514
+ .. versionadded:: 2013.1
515
+
516
+ Requires Python 2.5 or newer.
517
+ """
518
+
519
+ from functools import wraps
520
+ cache_dict_name = intern("_memoize_inner_dic_%s_%s_%d"
521
+ % (nested_func.__name__, nested_func.__code__.co_filename,
522
+ nested_func.__code__.co_firstlineno))
523
+
524
+ from inspect import currentframe
525
+
526
+ # prevent ref cycle
527
+ try:
528
+ caller_frame = currentframe().f_back
529
+ cache_context = caller_frame.f_globals[
530
+ caller_frame.f_code.co_name]
531
+ finally:
532
+ # del caller_frame
533
+ pass
534
+
535
+ try:
536
+ cache_dict = getattr(cache_context, cache_dict_name)
537
+ except AttributeError:
538
+ cache_dict = {}
539
+ _first_arg_dependent_caches.append(cache_dict)
540
+ setattr(cache_context, cache_dict_name, cache_dict)
541
+
542
+ @wraps(nested_func)
543
+ def new_nested_func(cl_object, *args):
544
+ try:
545
+ return cache_dict[cl_object][args]
546
+ except KeyError:
547
+ arg_dict = cache_dict.setdefault(cl_object, {})
548
+ result = nested_func(cl_object, *args)
549
+ arg_dict[args] = result
550
+ return result
551
+
552
+ return new_nested_func
553
+
554
+
555
+ def clear_first_arg_caches():
556
+ """Empties all first-argument-dependent memoization caches. Also releases
557
+ all held reference contexts. If it is important to you that the
558
+ program detaches from its context, you might need to call this
559
+ function to free all remaining references to your context.
560
+
561
+ .. versionadded:: 2011.2
562
+ """
563
+ for cache in _first_arg_dependent_caches:
564
+ cache.clear()
565
+
566
+
567
+ import atexit
568
+
569
+
570
+ atexit.register(clear_first_arg_caches)
571
+
572
+ # }}}
573
+
574
+
575
+ # {{{ pytest fixtures
576
+
577
+ class _ContextFactory:
578
+ def __init__(self, device):
579
+ self.device = device
580
+
581
+ def __call__(self):
582
+ # Get rid of leftovers from past tests.
583
+ # CL implementations are surprisingly limited in how many
584
+ # simultaneous contexts they allow...
585
+ clear_first_arg_caches()
586
+
587
+ from gc import collect
588
+ collect()
589
+
590
+ import pyopencl as cl
591
+ return cl.Context([self.device])
592
+
593
+ def __str__(self):
594
+ # Don't show address, so that parallel test collection works
595
+ return ("<context factory for <pyopencl.Device '%s' on '%s'>>" %
596
+ (self.device.name.strip(),
597
+ self.device.platform.name.strip()))
598
+
599
+
600
+ def get_test_platforms_and_devices(plat_dev_string=None):
601
+ """Parse a string of the form 'PYOPENCL_TEST=0:0,1;intel:i5'.
602
+
603
+ :return: list of tuples (platform, [device, device, ...])
604
+ """
605
+
606
+ import pyopencl as cl
607
+
608
+ if plat_dev_string is None:
609
+ import os
610
+ plat_dev_string = os.environ.get("PYOPENCL_TEST", None)
611
+
612
+ def find_cl_obj(objs, identifier):
613
+ try:
614
+ num = int(identifier)
615
+ except Exception:
616
+ pass
617
+ else:
618
+ return objs[num]
619
+
620
+ found = False
621
+ for obj in objs:
622
+ if identifier.lower() in (obj.name + " " + obj.vendor).lower():
623
+ return obj
624
+ if not found:
625
+ raise RuntimeError("object '%s' not found" % identifier)
626
+
627
+ if plat_dev_string:
628
+ result = []
629
+
630
+ for entry in plat_dev_string.split(";"):
631
+ lhsrhs = entry.split(":")
632
+
633
+ if len(lhsrhs) == 1:
634
+ platform = find_cl_obj(cl.get_platforms(), lhsrhs[0])
635
+ result.append((platform, platform.get_devices()))
636
+
637
+ elif len(lhsrhs) != 2:
638
+ raise RuntimeError("invalid syntax of PYOPENCL_TEST")
639
+ else:
640
+ plat_str, dev_strs = lhsrhs
641
+
642
+ platform = find_cl_obj(cl.get_platforms(), plat_str)
643
+ devs = platform.get_devices()
644
+ result.append(
645
+ (platform,
646
+ [find_cl_obj(devs, dev_id)
647
+ for dev_id in dev_strs.split(",")]))
648
+
649
+ return result
650
+
651
+ else:
652
+ return [
653
+ (platform, platform.get_devices())
654
+ for platform in cl.get_platforms()]
655
+
656
+
657
+ def get_pyopencl_fixture_arg_names(metafunc, extra_arg_names=None):
658
+ if extra_arg_names is None:
659
+ extra_arg_names = []
660
+
661
+ supported_arg_names = [
662
+ "platform", "device",
663
+ "ctx_factory", "ctx_getter",
664
+ *extra_arg_names
665
+ ]
666
+
667
+ arg_names = []
668
+ for arg in supported_arg_names:
669
+ if arg not in metafunc.fixturenames:
670
+ continue
671
+
672
+ if arg == "ctx_getter":
673
+ from warnings import warn
674
+ warn(
675
+ "The 'ctx_getter' arg is deprecated in favor of 'ctx_factory'.",
676
+ DeprecationWarning, stacklevel=2)
677
+
678
+ arg_names.append(arg)
679
+
680
+ return arg_names
681
+
682
+
683
+ def get_pyopencl_fixture_arg_values():
684
+ import pyopencl as cl
685
+
686
+ arg_values = []
687
+ for platform, devices in get_test_platforms_and_devices():
688
+ for device in devices:
689
+ arg_dict = {
690
+ "platform": platform,
691
+ "device": device,
692
+ "ctx_factory": _ContextFactory(device),
693
+ "ctx_getter": _ContextFactory(device)
694
+ }
695
+ arg_values.append(arg_dict)
696
+
697
+ def idfn(val):
698
+ if isinstance(val, cl.Platform):
699
+ # Don't show address, so that parallel test collection works
700
+ return f"<pyopencl.Platform '{val.name}'>"
701
+ else:
702
+ return str(val)
703
+
704
+ return arg_values, idfn
705
+
706
+
707
+ def pytest_generate_tests_for_pyopencl(metafunc):
708
+ """Using the line::
709
+
710
+ from pyopencl.tools import pytest_generate_tests_for_pyopencl
711
+ as pytest_generate_tests
712
+
713
+ in your `pytest <https://docs.pytest.org/en/latest/>`__ test scripts allows
714
+ you to use the arguments *ctx_factory*, *device*, or *platform* in your test
715
+ functions, and they will automatically be run for each OpenCL device/platform
716
+ in the system, as appropriate.
717
+
718
+ The following two environment variabls is also supported to control
719
+ device/platform choice::
720
+
721
+ PYOPENCL_TEST=0:0,1;intel=i5,i7
722
+ """
723
+
724
+ arg_names = get_pyopencl_fixture_arg_names(metafunc)
725
+ if not arg_names:
726
+ return
727
+
728
+ arg_values, ids = get_pyopencl_fixture_arg_values()
729
+ arg_values = [
730
+ tuple(arg_dict[name] for name in arg_names)
731
+ for arg_dict in arg_values
732
+ ]
733
+
734
+ metafunc.parametrize(arg_names, arg_values, ids=ids)
735
+
736
+ # }}}
737
+
738
+
739
+ # {{{ C argument lists
740
+
741
+ class Argument(ABC):
742
+ """
743
+ .. automethod:: declarator
744
+ """
745
+
746
+ @abstractmethod
747
+ def declarator(self) -> str:
748
+ pass
749
+
750
+
751
+ class DtypedArgument(Argument):
752
+ """
753
+ .. attribute:: name
754
+ .. attribute:: dtype
755
+ """
756
+
757
+ def __init__(self, dtype: Any, name: str) -> None:
758
+ self.dtype = np.dtype(dtype)
759
+ self.name = name
760
+
761
+ def __repr__(self) -> str:
762
+ return "{}({!r}, {})".format(
763
+ self.__class__.__name__,
764
+ self.name,
765
+ self.dtype)
766
+
767
+ def __eq__(self, other: Any) -> bool:
768
+ return (type(self) is type(other)
769
+ and self.dtype == other.dtype
770
+ and self.name == other.name)
771
+
772
+ def __hash__(self) -> int:
773
+ return (
774
+ hash(type(self))
775
+ ^ hash(self.dtype)
776
+ ^ hash(self.name))
777
+
778
+
779
+ class VectorArg(DtypedArgument):
780
+ """Inherits from :class:`DtypedArgument`.
781
+
782
+ .. automethod:: __init__
783
+ """
784
+
785
+ def __init__(self, dtype: Any, name: str, with_offset: bool = False) -> None:
786
+ super().__init__(dtype, name)
787
+ self.with_offset = with_offset
788
+
789
+ def declarator(self) -> str:
790
+ if self.with_offset:
791
+ # Two underscores -> less likelihood of a name clash.
792
+ return "__global {} *{}__base, long {}__offset".format(
793
+ dtype_to_ctype(self.dtype), self.name, self.name)
794
+ else:
795
+ result = "__global {} *{}".format(dtype_to_ctype(self.dtype), self.name)
796
+
797
+ return result
798
+
799
+ def __eq__(self, other) -> bool:
800
+ return (super().__eq__(other)
801
+ and self.with_offset == other.with_offset)
802
+
803
+ def __hash__(self) -> int:
804
+ return super().__hash__() ^ hash(self.with_offset)
805
+
806
+
807
+ class ScalarArg(DtypedArgument):
808
+ """Inherits from :class:`DtypedArgument`."""
809
+
810
+ def declarator(self):
811
+ return "{} {}".format(dtype_to_ctype(self.dtype), self.name)
812
+
813
+
814
+ class OtherArg(Argument):
815
+ def __init__(self, declarator: str, name: str) -> None:
816
+ self.decl = declarator
817
+ self.name = name
818
+
819
+ def declarator(self) -> str:
820
+ return self.decl
821
+
822
+ def __eq__(self, other) -> bool:
823
+ return (type(self) is type(other)
824
+ and self.decl == other.decl
825
+ and self.name == other.name)
826
+
827
+ def __hash__(self) -> int:
828
+ return (
829
+ hash(type(self))
830
+ ^ hash(self.decl)
831
+ ^ hash(self.name))
832
+
833
+
834
+ def parse_c_arg(c_arg: str, with_offset: bool = False) -> DtypedArgument:
835
+ for aspace in ["__local", "__constant"]:
836
+ if aspace in c_arg:
837
+ raise RuntimeError("cannot deal with local or constant "
838
+ "OpenCL address spaces in C argument lists ")
839
+
840
+ c_arg = c_arg.replace("__global", "")
841
+
842
+ if with_offset:
843
+ def vec_arg_factory(dtype, name):
844
+ return VectorArg(dtype, name, with_offset=True)
845
+ else:
846
+ vec_arg_factory = VectorArg
847
+
848
+ from pyopencl.compyte.dtypes import parse_c_arg_backend
849
+ return parse_c_arg_backend(c_arg, ScalarArg, vec_arg_factory)
850
+
851
+
852
+ def parse_arg_list(
853
+ arguments: Union[str, List[str], List[DtypedArgument]],
854
+ with_offset: bool = False) -> List[DtypedArgument]:
855
+ """Parse a list of kernel arguments. *arguments* may be a comma-separate
856
+ list of C declarators in a string, a list of strings representing C
857
+ declarators, or :class:`Argument` objects.
858
+ """
859
+
860
+ if isinstance(arguments, str):
861
+ arguments = arguments.split(",")
862
+
863
+ def parse_single_arg(obj: Union[str, DtypedArgument]) -> DtypedArgument:
864
+ if isinstance(obj, str):
865
+ from pyopencl.tools import parse_c_arg
866
+ return parse_c_arg(obj, with_offset=with_offset)
867
+ else:
868
+ assert isinstance(obj, DtypedArgument)
869
+ return obj
870
+
871
+ return [parse_single_arg(arg) for arg in arguments]
872
+
873
+
874
+ def get_arg_list_arg_types(arg_types):
875
+ result = []
876
+
877
+ for arg_type in arg_types:
878
+ if isinstance(arg_type, ScalarArg):
879
+ result.append(arg_type.dtype)
880
+ elif isinstance(arg_type, VectorArg):
881
+ result.append(arg_type)
882
+ else:
883
+ raise RuntimeError("arg type not understood: %s" % type(arg_type))
884
+
885
+ return tuple(result)
886
+
887
+
888
+ def get_arg_list_scalar_arg_dtypes(
889
+ arg_types: List[DtypedArgument]
890
+ ) -> List[Optional[np.dtype]]:
891
+ result: List[Optional[np.dtype]] = []
892
+
893
+ for arg_type in arg_types:
894
+ if isinstance(arg_type, ScalarArg):
895
+ result.append(arg_type.dtype)
896
+ elif isinstance(arg_type, VectorArg):
897
+ result.append(None)
898
+ if arg_type.with_offset:
899
+ result.append(np.dtype(np.int64))
900
+ else:
901
+ raise RuntimeError(f"arg type not understood: {type(arg_type)}")
902
+
903
+ return result
904
+
905
+
906
+ def get_arg_offset_adjuster_code(arg_types):
907
+ result = []
908
+
909
+ for arg_type in arg_types:
910
+ if isinstance(arg_type, VectorArg) and arg_type.with_offset:
911
+ result.append("__global %(type)s *%(name)s = "
912
+ "(__global %(type)s *) "
913
+ "((__global char *) %(name)s__base + %(name)s__offset);"
914
+ % {
915
+ "type": dtype_to_ctype(arg_type.dtype),
916
+ "name": arg_type.name})
917
+
918
+ return "\n".join(result)
919
+
920
+ # }}}
921
+
922
+
923
+ def get_gl_sharing_context_properties():
924
+ import pyopencl as cl
925
+
926
+ ctx_props = cl.context_properties
927
+
928
+ from OpenGL import platform as gl_platform
929
+
930
+ props = []
931
+
932
+ import sys
933
+ if sys.platform in ["linux", "linux2"]:
934
+ from OpenGL import GLX
935
+ props.append(
936
+ (ctx_props.GL_CONTEXT_KHR, GLX.glXGetCurrentContext()))
937
+ props.append(
938
+ (ctx_props.GLX_DISPLAY_KHR,
939
+ GLX.glXGetCurrentDisplay()))
940
+ elif sys.platform == "win32":
941
+ from OpenGL import WGL
942
+ props.append(
943
+ (ctx_props.GL_CONTEXT_KHR, gl_platform.GetCurrentContext()))
944
+ props.append(
945
+ (ctx_props.WGL_HDC_KHR,
946
+ WGL.wglGetCurrentDC()))
947
+ elif sys.platform == "darwin":
948
+ props.append(
949
+ (ctx_props.CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
950
+ cl.get_apple_cgl_share_group()))
951
+ else:
952
+ raise NotImplementedError("platform '%s' not yet supported"
953
+ % sys.platform)
954
+
955
+ return props
956
+
957
+
958
+ class _CDeclList:
959
+ def __init__(self, device):
960
+ self.device = device
961
+ self.declared_dtypes = set()
962
+ self.declarations = []
963
+ self.saw_double = False
964
+ self.saw_complex = False
965
+
966
+ def add_dtype(self, dtype):
967
+ dtype = np.dtype(dtype)
968
+
969
+ if dtype in (np.float64, np.complex128):
970
+ self.saw_double = True
971
+
972
+ if dtype.kind == "c":
973
+ self.saw_complex = True
974
+
975
+ if dtype.kind != "V":
976
+ return
977
+
978
+ if dtype in self.declared_dtypes:
979
+ return
980
+
981
+ import pyopencl.cltypes
982
+ if dtype in pyopencl.cltypes.vec_type_to_scalar_and_count:
983
+ return
984
+
985
+ if hasattr(dtype, "subdtype") and dtype.subdtype is not None:
986
+ self.add_dtype(dtype.subdtype[0])
987
+ return
988
+
989
+ for _name, field_data in sorted(dtype.fields.items()):
990
+ field_dtype, _offset = field_data[:2]
991
+ self.add_dtype(field_dtype)
992
+
993
+ _, cdecl = match_dtype_to_c_struct(
994
+ self.device, dtype_to_ctype(dtype), dtype)
995
+
996
+ self.declarations.append(cdecl)
997
+ self.declared_dtypes.add(dtype)
998
+
999
+ def visit_arguments(self, arguments):
1000
+ for arg in arguments:
1001
+ dtype = arg.dtype
1002
+ if dtype in (np.float64, np.complex128):
1003
+ self.saw_double = True
1004
+
1005
+ if dtype.kind == "c":
1006
+ self.saw_complex = True
1007
+
1008
+ def get_declarations(self):
1009
+ result = "\n\n".join(self.declarations)
1010
+
1011
+ if self.saw_complex:
1012
+ result = (
1013
+ "#include <pyopencl-complex.h>\n\n"
1014
+ + result)
1015
+
1016
+ if self.saw_double:
1017
+ result = (
1018
+ """
1019
+ #if __OPENCL_C_VERSION__ < 120
1020
+ #pragma OPENCL EXTENSION cl_khr_fp64: enable
1021
+ #endif
1022
+ #define PYOPENCL_DEFINE_CDOUBLE
1023
+ """
1024
+ + result)
1025
+
1026
+ return result
1027
+
1028
+
1029
+ @memoize
1030
+ def match_dtype_to_c_struct(device, name, dtype, context=None):
1031
+ """Return a tuple ``(dtype, c_decl)`` such that the C struct declaration
1032
+ in ``c_decl`` and the structure :class:`numpy.dtype` instance ``dtype``
1033
+ have the same memory layout.
1034
+
1035
+ Note that *dtype* may be modified from the value that was passed in,
1036
+ for example to insert padding.
1037
+
1038
+ (As a remark on implementation, this routine runs a small kernel on
1039
+ the given *device* to ensure that :mod:`numpy` and C offsets and
1040
+ sizes match.)
1041
+
1042
+ .. versionadded:: 2013.1
1043
+
1044
+ This example explains the use of this function::
1045
+
1046
+ >>> import numpy as np
1047
+ >>> import pyopencl as cl
1048
+ >>> import pyopencl.tools
1049
+ >>> ctx = cl.create_some_context()
1050
+ >>> dtype = np.dtype([("id", np.uint32), ("value", np.float32)])
1051
+ >>> dtype, c_decl = pyopencl.tools.match_dtype_to_c_struct(
1052
+ ... ctx.devices[0], 'id_val', dtype)
1053
+ >>> print c_decl
1054
+ typedef struct {
1055
+ unsigned id;
1056
+ float value;
1057
+ } id_val;
1058
+ >>> print dtype
1059
+ [('id', '<u4'), ('value', '<f4')]
1060
+ >>> cl.tools.get_or_register_dtype('id_val', dtype)
1061
+
1062
+ As this example shows, it is important to call
1063
+ :func:`get_or_register_dtype` on the modified ``dtype`` returned by this
1064
+ function, not the original one.
1065
+ """
1066
+
1067
+ import pyopencl as cl
1068
+
1069
+ fields = sorted(dtype.fields.items(),
1070
+ key=lambda name_dtype_offset: name_dtype_offset[1][1])
1071
+
1072
+ c_fields = []
1073
+ for field_name, dtype_and_offset in fields:
1074
+ field_dtype, _offset = dtype_and_offset[:2]
1075
+ if hasattr(field_dtype, "subdtype") and field_dtype.subdtype is not None:
1076
+ array_dtype = field_dtype.subdtype[0]
1077
+ if hasattr(array_dtype, "subdtype") and array_dtype.subdtype is not None:
1078
+ raise NotImplementedError("nested array dtypes are not supported")
1079
+ array_dims = field_dtype.subdtype[1]
1080
+ dims_str = ""
1081
+ try:
1082
+ for dim in array_dims:
1083
+ dims_str += "[%d]" % dim
1084
+ except TypeError:
1085
+ dims_str = "[%d]" % array_dims
1086
+ c_fields.append(" {} {}{};".format(
1087
+ dtype_to_ctype(array_dtype), field_name, dims_str)
1088
+ )
1089
+ else:
1090
+ c_fields.append(
1091
+ " {} {};".format(dtype_to_ctype(field_dtype), field_name))
1092
+
1093
+ c_decl = "typedef struct {{\n{}\n}} {};\n\n".format(
1094
+ "\n".join(c_fields),
1095
+ name)
1096
+
1097
+ cdl = _CDeclList(device)
1098
+ for _field_name, dtype_and_offset in fields:
1099
+ field_dtype, _offset = dtype_and_offset[:2]
1100
+ cdl.add_dtype(field_dtype)
1101
+
1102
+ pre_decls = cdl.get_declarations()
1103
+
1104
+ offset_code = "\n".join(
1105
+ "result[%d] = pycl_offsetof(%s, %s);" % (i+1, name, field_name)
1106
+ for i, (field_name, _) in enumerate(fields))
1107
+
1108
+ src = rf"""
1109
+ #define pycl_offsetof(st, m) \
1110
+ ((uint) ((__local char *) &(dummy.m) \
1111
+ - (__local char *)&dummy ))
1112
+
1113
+ {pre_decls}
1114
+
1115
+ {c_decl}
1116
+
1117
+ __kernel void get_size_and_offsets(__global uint *result)
1118
+ {{
1119
+ result[0] = sizeof({name});
1120
+ __local {name} dummy;
1121
+ {offset_code}
1122
+ }}
1123
+ """
1124
+
1125
+ if context is None:
1126
+ context = cl.Context([device])
1127
+
1128
+ queue = cl.CommandQueue(context)
1129
+
1130
+ prg = cl.Program(context, src)
1131
+ knl = prg.build(devices=[device]).get_size_and_offsets
1132
+
1133
+ import pyopencl.array
1134
+
1135
+ result_buf = cl.array.empty(queue, 1+len(fields), np.uint32)
1136
+ knl(queue, (1,), (1,), result_buf.data)
1137
+ queue.finish()
1138
+ size_and_offsets = result_buf.get()
1139
+
1140
+ size = int(size_and_offsets[0])
1141
+
1142
+ offsets = size_and_offsets[1:]
1143
+ if any(ofs >= size for ofs in offsets):
1144
+ # offsets not plausible
1145
+
1146
+ if dtype.itemsize == size:
1147
+ # If sizes match, use numpy's idea of the offsets.
1148
+ offsets = [dtype_and_offset[1]
1149
+ for field_name, dtype_and_offset in fields]
1150
+ else:
1151
+ raise RuntimeError(
1152
+ "OpenCL compiler reported offsetof() past sizeof() "
1153
+ "for struct layout on '%s'. "
1154
+ "This makes no sense, and it's usually indicates a "
1155
+ "compiler bug. "
1156
+ "Refusing to discover struct layout." % device)
1157
+
1158
+ result_buf.data.release()
1159
+ del knl
1160
+ del prg
1161
+ del queue
1162
+ del context
1163
+
1164
+ try:
1165
+ dtype_arg_dict = {
1166
+ "names": [field_name
1167
+ for field_name, (field_dtype, offset) in fields],
1168
+ "formats": [field_dtype
1169
+ for field_name, (field_dtype, offset) in fields],
1170
+ "offsets": [int(x) for x in offsets],
1171
+ "itemsize": int(size_and_offsets[0]),
1172
+ }
1173
+ dtype = np.dtype(dtype_arg_dict)
1174
+ if dtype.itemsize != size_and_offsets[0]:
1175
+ # "Old" versions of numpy (1.6.x?) silently ignore "itemsize". Boo.
1176
+ dtype_arg_dict["names"].append("_pycl_size_fixer")
1177
+ dtype_arg_dict["formats"].append(np.uint8)
1178
+ dtype_arg_dict["offsets"].append(int(size_and_offsets[0])-1)
1179
+ dtype = np.dtype(dtype_arg_dict)
1180
+ except NotImplementedError:
1181
+ def calc_field_type():
1182
+ total_size = 0
1183
+ padding_count = 0
1184
+ for offset, (field_name, (field_dtype, _)) in zip(offsets, fields):
1185
+ if offset > total_size:
1186
+ padding_count += 1
1187
+ yield ("__pycl_padding%d" % padding_count,
1188
+ "V%d" % offset - total_size)
1189
+ yield field_name, field_dtype
1190
+ total_size = field_dtype.itemsize + offset
1191
+ dtype = np.dtype(list(calc_field_type()))
1192
+
1193
+ assert dtype.itemsize == size_and_offsets[0]
1194
+
1195
+ return dtype, c_decl
1196
+
1197
+
1198
+ @memoize
1199
+ def dtype_to_c_struct(device, dtype):
1200
+ if dtype.fields is None:
1201
+ return ""
1202
+
1203
+ import pyopencl.cltypes
1204
+ if dtype in pyopencl.cltypes.vec_type_to_scalar_and_count:
1205
+ # Vector types are built-in. Don't try to redeclare those.
1206
+ return ""
1207
+
1208
+ matched_dtype, c_decl = match_dtype_to_c_struct(
1209
+ device, dtype_to_ctype(dtype), dtype)
1210
+
1211
+ def dtypes_match():
1212
+ result = len(dtype.fields) == len(matched_dtype.fields)
1213
+
1214
+ for name, val in dtype.fields.items():
1215
+ result = result and matched_dtype.fields[name] == val
1216
+
1217
+ return result
1218
+
1219
+ assert dtypes_match()
1220
+
1221
+ return c_decl
1222
+
1223
+
1224
+ # {{{ code generation/templating helper
1225
+
1226
+ def _process_code_for_macro(code):
1227
+ code = code.replace("//CL//", "\n")
1228
+
1229
+ if "//" in code:
1230
+ raise RuntimeError("end-of-line comments ('//') may not be used in "
1231
+ "code snippets")
1232
+
1233
+ return code.replace("\n", " \\\n")
1234
+
1235
+
1236
+ class _SimpleTextTemplate:
1237
+ def __init__(self, txt):
1238
+ self.txt = txt
1239
+
1240
+ def render(self, context):
1241
+ return self.txt
1242
+
1243
+
1244
+ class _PrintfTextTemplate:
1245
+ def __init__(self, txt):
1246
+ self.txt = txt
1247
+
1248
+ def render(self, context):
1249
+ return self.txt % context
1250
+
1251
+
1252
+ class _MakoTextTemplate:
1253
+ def __init__(self, txt):
1254
+ from mako.template import Template
1255
+ self.template = Template(txt, strict_undefined=True)
1256
+
1257
+ def render(self, context):
1258
+ return self.template.render(**context)
1259
+
1260
+
1261
+ class _ArgumentPlaceholder:
1262
+ """A placeholder for subclasses of :class:`DtypedArgument`. This is needed
1263
+ because the concrete dtype of the argument is not known at template
1264
+ creation time--it may be a type alias that will only be filled in
1265
+ at run time. These types take the place of these proto-arguments until
1266
+ all types are known.
1267
+
1268
+ See also :class:`_TemplateRenderer.render_arg`.
1269
+ """
1270
+
1271
+ def __init__(self, typename, name, **extra_kwargs):
1272
+ self.typename = typename
1273
+ self.name = name
1274
+ self.extra_kwargs = extra_kwargs
1275
+
1276
+
1277
+ class _VectorArgPlaceholder(_ArgumentPlaceholder):
1278
+ target_class = VectorArg
1279
+
1280
+
1281
+ class _ScalarArgPlaceholder(_ArgumentPlaceholder):
1282
+ target_class = ScalarArg
1283
+
1284
+
1285
+ class _TemplateRenderer:
1286
+ def __init__(self, template, type_aliases, var_values, context=None,
1287
+ options=None):
1288
+ self.template = template
1289
+ self.type_aliases = dict(type_aliases)
1290
+ self.var_dict = dict(var_values)
1291
+
1292
+ for name in self.var_dict:
1293
+ if name.startswith("macro_"):
1294
+ self.var_dict[name] = _process_code_for_macro(
1295
+ self.var_dict[name])
1296
+
1297
+ self.context = context
1298
+ self.options = options
1299
+
1300
+ def __call__(self, txt):
1301
+ if txt is None:
1302
+ return txt
1303
+
1304
+ result = self.template.get_text_template(txt).render(self.var_dict)
1305
+
1306
+ return str(result)
1307
+
1308
+ def get_rendered_kernel(self, txt, kernel_name):
1309
+ import pyopencl as cl
1310
+ prg = cl.Program(self.context, self(txt)).build(self.options)
1311
+
1312
+ kernel_name_prefix = self.var_dict.get("kernel_name_prefix")
1313
+ if kernel_name_prefix is not None:
1314
+ kernel_name = kernel_name_prefix+kernel_name
1315
+
1316
+ return getattr(prg, kernel_name)
1317
+
1318
+ def parse_type(self, typename):
1319
+ if isinstance(typename, str):
1320
+ try:
1321
+ return self.type_aliases[typename]
1322
+ except KeyError:
1323
+ from pyopencl.compyte.dtypes import NAME_TO_DTYPE
1324
+ return NAME_TO_DTYPE[typename]
1325
+ else:
1326
+ return np.dtype(typename)
1327
+
1328
+ def render_arg(self, arg_placeholder):
1329
+ return arg_placeholder.target_class(
1330
+ self.parse_type(arg_placeholder.typename),
1331
+ arg_placeholder.name,
1332
+ **arg_placeholder.extra_kwargs)
1333
+
1334
+ _C_COMMENT_FINDER = re.compile(r"/\*.*?\*/")
1335
+
1336
+ def render_argument_list(self, *arg_lists, **kwargs):
1337
+ with_offset = kwargs.pop("with_offset", False)
1338
+ if kwargs:
1339
+ raise TypeError("unrecognized kwargs: " + ", ".join(kwargs))
1340
+
1341
+ all_args = []
1342
+
1343
+ for arg_list in arg_lists:
1344
+ if isinstance(arg_list, str):
1345
+ arg_list = str(
1346
+ self.template
1347
+ .get_text_template(arg_list).render(self.var_dict))
1348
+ arg_list = self._C_COMMENT_FINDER.sub("", arg_list)
1349
+ arg_list = arg_list.replace("\n", " ")
1350
+
1351
+ all_args.extend(arg_list.split(","))
1352
+ else:
1353
+ all_args.extend(arg_list)
1354
+
1355
+ if with_offset:
1356
+ def vec_arg_factory(typename, name):
1357
+ return _VectorArgPlaceholder(typename, name, with_offset=True)
1358
+ else:
1359
+ vec_arg_factory = _VectorArgPlaceholder
1360
+
1361
+ from pyopencl.compyte.dtypes import parse_c_arg_backend
1362
+ parsed_args = []
1363
+ for arg in all_args:
1364
+ if isinstance(arg, str):
1365
+ arg = arg.strip()
1366
+ if not arg:
1367
+ continue
1368
+
1369
+ ph = parse_c_arg_backend(arg,
1370
+ _ScalarArgPlaceholder, vec_arg_factory,
1371
+ name_to_dtype=lambda x: x)
1372
+ parsed_arg = self.render_arg(ph)
1373
+
1374
+ elif isinstance(arg, Argument):
1375
+ parsed_arg = arg
1376
+ elif isinstance(arg, tuple):
1377
+ parsed_arg = ScalarArg(self.parse_type(arg[0]), arg[1])
1378
+ else:
1379
+ raise TypeError("unexpected argument type: %s" % type(arg))
1380
+
1381
+ parsed_args.append(parsed_arg)
1382
+
1383
+ return parsed_args
1384
+
1385
+ def get_type_decl_preamble(self, device, decl_type_names, arguments=None):
1386
+ cdl = _CDeclList(device)
1387
+
1388
+ for typename in decl_type_names:
1389
+ cdl.add_dtype(self.parse_type(typename))
1390
+
1391
+ if arguments is not None:
1392
+ cdl.visit_arguments(arguments)
1393
+
1394
+ for _, tv in sorted(self.type_aliases.items()):
1395
+ cdl.add_dtype(tv)
1396
+
1397
+ type_alias_decls = [
1398
+ "typedef {} {};".format(dtype_to_ctype(val), name)
1399
+ for name, val in sorted(self.type_aliases.items())
1400
+ ]
1401
+
1402
+ return cdl.get_declarations() + "\n" + "\n".join(type_alias_decls)
1403
+
1404
+
1405
+ class KernelTemplateBase:
1406
+ def __init__(self, template_processor=None):
1407
+ self.template_processor = template_processor
1408
+
1409
+ self.build_cache = {}
1410
+ _first_arg_dependent_caches.append(self.build_cache)
1411
+
1412
+ def get_preamble(self):
1413
+ pass
1414
+
1415
+ _TEMPLATE_PROCESSOR_PATTERN = re.compile(r"^//CL(?::([a-zA-Z0-9_]+))?//")
1416
+
1417
+ @memoize_method
1418
+ def get_text_template(self, txt):
1419
+ proc_match = self._TEMPLATE_PROCESSOR_PATTERN.match(txt)
1420
+ tpl_processor = None
1421
+
1422
+ if proc_match is not None:
1423
+ tpl_processor = proc_match.group(1)
1424
+ # chop off //CL// mark
1425
+ txt = txt[len(proc_match.group(0)):]
1426
+ if tpl_processor is None:
1427
+ tpl_processor = self.template_processor
1428
+
1429
+ if tpl_processor is None or tpl_processor == "none":
1430
+ return _SimpleTextTemplate(txt)
1431
+ elif tpl_processor == "printf":
1432
+ return _PrintfTextTemplate(txt)
1433
+ elif tpl_processor == "mako":
1434
+ return _MakoTextTemplate(txt)
1435
+ else:
1436
+ raise RuntimeError(
1437
+ "unknown template processor '%s'" % proc_match.group(1))
1438
+
1439
+ def get_renderer(self, type_aliases, var_values, context=None, options=None):
1440
+ return _TemplateRenderer(self, type_aliases, var_values)
1441
+
1442
+ def build_inner(self, context, *args, **kwargs):
1443
+ raise NotImplementedError
1444
+
1445
+ def build(self, context, *args, **kwargs):
1446
+ """Provide caching for an :meth:`build_inner`."""
1447
+
1448
+ cache_key = (context, args, tuple(sorted(kwargs.items())))
1449
+ try:
1450
+ return self.build_cache[cache_key]
1451
+ except KeyError:
1452
+ result = self.build_inner(context, *args, **kwargs)
1453
+ self.build_cache[cache_key] = result
1454
+ return result
1455
+
1456
+ # }}}
1457
+
1458
+
1459
+ # {{{ array_module
1460
+
1461
+ class _CLFakeArrayModule:
1462
+ def __init__(self, queue):
1463
+ self.queue = queue
1464
+
1465
+ @property
1466
+ def ndarray(self):
1467
+ from pyopencl.array import Array
1468
+ return Array
1469
+
1470
+ def dot(self, x, y):
1471
+ from pyopencl.array import dot
1472
+ return dot(x, y, queue=self.queue).get()
1473
+
1474
+ def vdot(self, x, y):
1475
+ from pyopencl.array import vdot
1476
+ return vdot(x, y, queue=self.queue).get()
1477
+
1478
+ def empty(self, shape, dtype, order="C"):
1479
+ from pyopencl.array import empty
1480
+ return empty(self.queue, shape, dtype, order=order)
1481
+
1482
+ def hstack(self, arrays):
1483
+ from pyopencl.array import hstack
1484
+ return hstack(arrays, self.queue)
1485
+
1486
+
1487
+ def array_module(a):
1488
+ if isinstance(a, np.ndarray):
1489
+ return np
1490
+ else:
1491
+ from pyopencl.array import Array
1492
+ if isinstance(a, Array):
1493
+ return _CLFakeArrayModule(a.queue)
1494
+ else:
1495
+ raise TypeError("array type not understood: %s" % type(a))
1496
+
1497
+ # }}}
1498
+
1499
+
1500
+ def is_spirv(s):
1501
+ spirv_magic = b"\x07\x23\x02\x03"
1502
+ return (
1503
+ isinstance(s, bytes)
1504
+ and (
1505
+ s[:4] == spirv_magic
1506
+ or s[:4] == spirv_magic[::-1]))
1507
+
1508
+
1509
+ # {{{ numpy key types builder
1510
+
1511
+ class _NumpyTypesKeyBuilder(KeyBuilderBase):
1512
+ def update_for_VectorArg(self, key_hash, key): # noqa: N802
1513
+ self.rec(key_hash, key.dtype)
1514
+ self.update_for_str(key_hash, key.name)
1515
+ self.rec(key_hash, key.with_offset)
1516
+
1517
+ def update_for_type(self, key_hash, key):
1518
+ if issubclass(key, np.generic):
1519
+ self.update_for_str(key_hash, key.__name__)
1520
+ return
1521
+
1522
+ raise TypeError("unsupported type for persistent hash keying: %s"
1523
+ % type(key))
1524
+
1525
+ # }}}
1526
+
1527
+ # vim: foldmethod=marker