pyopencl 2024.2.2__cp312-cp312-win_amd64.whl → 2024.2.5__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (99) hide show
  1. pyopencl/__init__.py +16 -4
  2. pyopencl/_cl.cp312-win_amd64.pyd +0 -0
  3. pyopencl/algorithm.py +3 -1
  4. pyopencl/bitonic_sort.py +2 -0
  5. pyopencl/characterize/__init__.py +23 -0
  6. pyopencl/compyte/.git +1 -0
  7. pyopencl/compyte/.gitignore +21 -0
  8. pyopencl/compyte/ndarray/Makefile +31 -0
  9. pyopencl/compyte/ndarray/gpu_ndarray.h +35 -0
  10. pyopencl/compyte/ndarray/pygpu_language.h +207 -0
  11. pyopencl/compyte/ndarray/pygpu_language_cuda.cu +622 -0
  12. pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +317 -0
  13. pyopencl/compyte/ndarray/pygpu_ndarray.cpp +1546 -0
  14. pyopencl/compyte/ndarray/pygpu_ndarray.h +71 -0
  15. pyopencl/compyte/ndarray/pygpu_ndarray_object.h +232 -0
  16. pyopencl/tools.py +60 -56
  17. pyopencl/version.py +9 -3
  18. {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/METADATA +105 -105
  19. pyopencl-2024.2.5.dist-info/RECORD +56 -0
  20. {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/WHEEL +1 -1
  21. pyopencl-2024.2.2.data/data/CITATION.cff +0 -74
  22. pyopencl-2024.2.2.data/data/CMakeLists.txt +0 -83
  23. pyopencl-2024.2.2.data/data/Makefile.in +0 -21
  24. pyopencl-2024.2.2.data/data/README.rst +0 -70
  25. pyopencl-2024.2.2.data/data/README_SETUP.txt +0 -34
  26. pyopencl-2024.2.2.data/data/aksetup_helper.py +0 -1013
  27. pyopencl-2024.2.2.data/data/configure.py +0 -6
  28. pyopencl-2024.2.2.data/data/contrib/cldis.py +0 -91
  29. pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/README +0 -29
  30. pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/translate.py +0 -1441
  31. pyopencl-2024.2.2.data/data/contrib/pyopencl.vim +0 -84
  32. pyopencl-2024.2.2.data/data/doc/Makefile +0 -23
  33. pyopencl-2024.2.2.data/data/doc/algorithm.rst +0 -214
  34. pyopencl-2024.2.2.data/data/doc/array.rst +0 -305
  35. pyopencl-2024.2.2.data/data/doc/conf.py +0 -26
  36. pyopencl-2024.2.2.data/data/doc/howto.rst +0 -105
  37. pyopencl-2024.2.2.data/data/doc/index.rst +0 -137
  38. pyopencl-2024.2.2.data/data/doc/make_constants.py +0 -561
  39. pyopencl-2024.2.2.data/data/doc/misc.rst +0 -885
  40. pyopencl-2024.2.2.data/data/doc/runtime.rst +0 -51
  41. pyopencl-2024.2.2.data/data/doc/runtime_const.rst +0 -30
  42. pyopencl-2024.2.2.data/data/doc/runtime_gl.rst +0 -78
  43. pyopencl-2024.2.2.data/data/doc/runtime_memory.rst +0 -527
  44. pyopencl-2024.2.2.data/data/doc/runtime_platform.rst +0 -184
  45. pyopencl-2024.2.2.data/data/doc/runtime_program.rst +0 -364
  46. pyopencl-2024.2.2.data/data/doc/runtime_queue.rst +0 -182
  47. pyopencl-2024.2.2.data/data/doc/subst.rst +0 -36
  48. pyopencl-2024.2.2.data/data/doc/tools.rst +0 -4
  49. pyopencl-2024.2.2.data/data/doc/types.rst +0 -42
  50. pyopencl-2024.2.2.data/data/examples/black-hole-accretion.py +0 -2227
  51. pyopencl-2024.2.2.data/data/examples/demo-struct-reduce.py +0 -75
  52. pyopencl-2024.2.2.data/data/examples/demo.py +0 -39
  53. pyopencl-2024.2.2.data/data/examples/demo_array.py +0 -32
  54. pyopencl-2024.2.2.data/data/examples/demo_array_svm.py +0 -37
  55. pyopencl-2024.2.2.data/data/examples/demo_elementwise.py +0 -34
  56. pyopencl-2024.2.2.data/data/examples/demo_elementwise_complex.py +0 -53
  57. pyopencl-2024.2.2.data/data/examples/demo_mandelbrot.py +0 -183
  58. pyopencl-2024.2.2.data/data/examples/demo_meta_codepy.py +0 -56
  59. pyopencl-2024.2.2.data/data/examples/demo_meta_template.py +0 -55
  60. pyopencl-2024.2.2.data/data/examples/dump-performance.py +0 -38
  61. pyopencl-2024.2.2.data/data/examples/dump-properties.py +0 -86
  62. pyopencl-2024.2.2.data/data/examples/gl_interop_demo.py +0 -84
  63. pyopencl-2024.2.2.data/data/examples/gl_particle_animation.py +0 -218
  64. pyopencl-2024.2.2.data/data/examples/ipython-demo.ipynb +0 -203
  65. pyopencl-2024.2.2.data/data/examples/median-filter.py +0 -99
  66. pyopencl-2024.2.2.data/data/examples/n-body.py +0 -1070
  67. pyopencl-2024.2.2.data/data/examples/narray.py +0 -37
  68. pyopencl-2024.2.2.data/data/examples/noisyImage.jpg +0 -0
  69. pyopencl-2024.2.2.data/data/examples/pi-monte-carlo.py +0 -1166
  70. pyopencl-2024.2.2.data/data/examples/svm.py +0 -82
  71. pyopencl-2024.2.2.data/data/examples/transpose.py +0 -229
  72. pyopencl-2024.2.2.data/data/pytest.ini +0 -3
  73. pyopencl-2024.2.2.data/data/src/bitlog.cpp +0 -51
  74. pyopencl-2024.2.2.data/data/src/bitlog.hpp +0 -83
  75. pyopencl-2024.2.2.data/data/src/clinfo_ext.h +0 -134
  76. pyopencl-2024.2.2.data/data/src/mempool.hpp +0 -444
  77. pyopencl-2024.2.2.data/data/src/pyopencl_ext.h +0 -77
  78. pyopencl-2024.2.2.data/data/src/tools.hpp +0 -90
  79. pyopencl-2024.2.2.data/data/src/wrap_cl.cpp +0 -61
  80. pyopencl-2024.2.2.data/data/src/wrap_cl.hpp +0 -5853
  81. pyopencl-2024.2.2.data/data/src/wrap_cl_part_1.cpp +0 -369
  82. pyopencl-2024.2.2.data/data/src/wrap_cl_part_2.cpp +0 -702
  83. pyopencl-2024.2.2.data/data/src/wrap_constants.cpp +0 -1274
  84. pyopencl-2024.2.2.data/data/src/wrap_helpers.hpp +0 -213
  85. pyopencl-2024.2.2.data/data/src/wrap_mempool.cpp +0 -738
  86. pyopencl-2024.2.2.data/data/test/add-vectors-32.spv +0 -0
  87. pyopencl-2024.2.2.data/data/test/add-vectors-64.spv +0 -0
  88. pyopencl-2024.2.2.data/data/test/empty-header.h +0 -1
  89. pyopencl-2024.2.2.data/data/test/test_algorithm.py +0 -1180
  90. pyopencl-2024.2.2.data/data/test/test_array.py +0 -2392
  91. pyopencl-2024.2.2.data/data/test/test_arrays_in_structs.py +0 -100
  92. pyopencl-2024.2.2.data/data/test/test_clmath.py +0 -529
  93. pyopencl-2024.2.2.data/data/test/test_clrandom.py +0 -75
  94. pyopencl-2024.2.2.data/data/test/test_enqueue_copy.py +0 -271
  95. pyopencl-2024.2.2.data/data/test/test_wrapper.py +0 -1565
  96. pyopencl-2024.2.2.dist-info/LICENSE +0 -282
  97. pyopencl-2024.2.2.dist-info/RECORD +0 -123
  98. pyopencl-2024.2.2.dist-info/top_level.txt +0 -1
  99. {pyopencl-2024.2.2.data/data → pyopencl-2024.2.5.dist-info/licenses}/LICENSE +0 -0
@@ -1,82 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- import numpy as np
4
-
5
- import pyopencl as cl
6
- from pyopencl.characterize import (
7
- has_coarse_grain_buffer_svm, has_fine_grain_buffer_svm,
8
- has_fine_grain_system_svm)
9
-
10
-
11
- ctx = cl.create_some_context()
12
- queue = cl.CommandQueue(ctx)
13
-
14
- dev = queue.device
15
-
16
- print(
17
- f"Device '{dev.name}' on platform '{dev.platform.name} ({dev.platform.version})'"
18
- " has the following SVM features:\n"
19
- f" Coarse-grained buffer SVM: {has_coarse_grain_buffer_svm(dev)}\n"
20
- f" Fine-grained buffer SVM: {has_fine_grain_buffer_svm(dev)}\n"
21
- f" Fine-grained system SVM: {has_fine_grain_system_svm(dev)}"
22
- )
23
-
24
- prg = cl.Program(ctx, """
25
- __kernel void twice(
26
- __global float *a_g)
27
- {
28
- int gid = get_global_id(0);
29
- a_g[gid] = 2*a_g[gid];
30
- }
31
- """).build()
32
-
33
-
34
- if has_coarse_grain_buffer_svm(dev):
35
- print("Testing coarse-grained buffer SVM...", end="")
36
-
37
- svm_ary = cl.SVM(cl.csvm_empty(ctx, 10, np.float32))
38
- assert isinstance(svm_ary.mem, np.ndarray)
39
-
40
- with svm_ary.map_rw(queue) as ary:
41
- ary.fill(17) # use from host
42
- orig_ary = ary.copy()
43
-
44
- prg.twice(queue, svm_ary.mem.shape, None, svm_ary)
45
- queue.finish()
46
-
47
- with svm_ary.map_ro(queue) as ary:
48
- assert np.array_equal(orig_ary*2, ary)
49
-
50
- print(" done.")
51
-
52
- if has_fine_grain_buffer_svm(dev):
53
- print("Testing fine-grained buffer SVM...", end="")
54
-
55
- ary = cl.fsvm_empty(ctx, 10, np.float32)
56
- assert isinstance(ary.base, cl.SVMAllocation)
57
-
58
- ary.fill(17)
59
- orig_ary = ary.copy()
60
-
61
- prg.twice(queue, ary.shape, None, cl.SVM(ary))
62
- queue.finish()
63
-
64
- assert np.array_equal(orig_ary*2, ary)
65
-
66
- print(" done.")
67
-
68
- if has_fine_grain_system_svm(dev):
69
- print("Testing fine-grained system SVM...", end="")
70
-
71
- ary = np.zeros(10, np.float32)
72
- assert isinstance(ary, np.ndarray)
73
-
74
- ary.fill(17)
75
- orig_ary = ary.copy()
76
-
77
- prg.twice(queue, ary.shape, None, cl.SVM(ary))
78
- queue.finish()
79
-
80
- assert np.array_equal(orig_ary*2, ary)
81
-
82
- print(" done.")
@@ -1,229 +0,0 @@
1
- # Transposition of a matrix
2
- # originally for PyCUDA by Hendrik Riedmann <riedmann@dam.brown.edu>
3
-
4
- import numpy as np
5
- import numpy.linalg as la
6
-
7
- import pyopencl as cl
8
-
9
-
10
- block_size = 16
11
-
12
-
13
- class NaiveTranspose:
14
- def __init__(self, ctx):
15
- self.kernel = (
16
- cl.Program(
17
- ctx,
18
- """
19
- __kernel void transpose(
20
- __global float *a_t, __global float *a,
21
- unsigned a_width, unsigned a_height)
22
- {
23
- int read_idx = get_global_id(0) + get_global_id(1) * a_width;
24
- int write_idx = get_global_id(1) + get_global_id(0) * a_height;
25
-
26
- a_t[write_idx] = a[read_idx];
27
- }
28
- """,)
29
- .build()
30
- .transpose
31
- )
32
-
33
- def __call__(self, queue, tgt, src, shape):
34
- w, h = shape
35
- assert w % block_size == 0
36
- assert h % block_size == 0
37
-
38
- return self.kernel(
39
- queue,
40
- (w, h),
41
- (block_size, block_size),
42
- tgt,
43
- src,
44
- np.uint32(w),
45
- np.uint32(h),
46
- )
47
-
48
-
49
- class SillyTranspose(NaiveTranspose):
50
- def __call__(self, queue, tgt, src, shape):
51
- w, h = shape
52
- assert w % block_size == 0
53
- assert h % block_size == 0
54
-
55
- return self.kernel(
56
- queue, (w, h), None, tgt, src, np.uint32(w), np.uint32(h)
57
- )
58
-
59
-
60
- class TransposeWithLocal:
61
- def __init__(self, ctx):
62
- self.kernel = (
63
- cl.Program(
64
- ctx,
65
- """
66
- #define BLOCK_SIZE %(block_size)d
67
- #define A_BLOCK_STRIDE (BLOCK_SIZE * a_width)
68
- #define A_T_BLOCK_STRIDE (BLOCK_SIZE * a_height)
69
-
70
- __kernel __attribute__((reqd_work_group_size(BLOCK_SIZE, BLOCK_SIZE, 1)))
71
- void transpose(
72
- __global float *a_t, __global float *a,
73
- unsigned a_width, unsigned a_height,
74
- __local float *a_local)
75
- {
76
- int base_idx_a =
77
- get_group_id(0) * BLOCK_SIZE +
78
- get_group_id(1) * A_BLOCK_STRIDE;
79
- int base_idx_a_t =
80
- get_group_id(1) * BLOCK_SIZE +
81
- get_group_id(0) * A_T_BLOCK_STRIDE;
82
-
83
- int glob_idx_a =
84
- base_idx_a + get_local_id(0) + a_width * get_local_id(1);
85
- int glob_idx_a_t =
86
- base_idx_a_t + get_local_id(0) + a_height * get_local_id(1);
87
-
88
- a_local[get_local_id(1)*BLOCK_SIZE+get_local_id(0)] = a[glob_idx_a];
89
-
90
- barrier(CLK_LOCAL_MEM_FENCE);
91
-
92
- a_t[glob_idx_a_t] = a_local[get_local_id(0)*BLOCK_SIZE+get_local_id(1)];
93
- }
94
- """
95
- % {"block_size": block_size},
96
- )
97
- .build()
98
- .transpose
99
- )
100
-
101
- def __call__(self, queue, tgt, src, shape):
102
- w, h = shape
103
- assert w % block_size == 0
104
- assert h % block_size == 0
105
-
106
- return self.kernel(
107
- queue,
108
- (w, h),
109
- (block_size, block_size),
110
- tgt,
111
- src,
112
- np.uint32(w),
113
- np.uint32(h),
114
- cl.LocalMemory(4 * block_size * (block_size + 1)),
115
- )
116
-
117
-
118
- def transpose_using_cl(ctx, queue, cpu_src, cls):
119
- mf = cl.mem_flags
120
- a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=cpu_src)
121
- a_t_buf = cl.Buffer(ctx, mf.WRITE_ONLY, size=cpu_src.nbytes)
122
- cls(ctx)(queue, a_t_buf, a_buf, cpu_src.shape)
123
-
124
- w, h = cpu_src.shape
125
- result = np.empty((h, w), dtype=cpu_src.dtype)
126
- cl.enqueue_copy(queue, result, a_t_buf).wait()
127
-
128
- a_buf.release()
129
- a_t_buf.release()
130
-
131
- return result
132
-
133
-
134
- def check_transpose():
135
- for cls in [NaiveTranspose, SillyTranspose, TransposeWithLocal]:
136
- print("checking", cls.__name__)
137
- ctx = cl.create_some_context()
138
-
139
- for dev in ctx.devices:
140
- assert dev.local_mem_size > 0
141
-
142
- queue = cl.CommandQueue(ctx)
143
-
144
- for i in np.arange(10, 13, 0.125):
145
- size = int(((2 ** i) // 32) * 32)
146
- print(size)
147
-
148
- rng = np.random.default_rng()
149
- source = rng.random((size, size), dtype=np.float32)
150
- result = transpose_using_cl(ctx, queue, source, NaiveTranspose)
151
-
152
- err = source.T - result
153
- err_norm = la.norm(err)
154
-
155
- assert err_norm == 0, (size, err_norm)
156
-
157
-
158
- def benchmark_transpose():
159
- ctx = cl.create_some_context()
160
-
161
- for dev in ctx.devices:
162
- assert dev.local_mem_size > 0
163
-
164
- queue = cl.CommandQueue(
165
- ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
166
- )
167
-
168
- sizes = [int(((2 ** i) // 32) * 32) for i in np.arange(10, 13, 0.125)]
169
- # for i in np.arange(10, 10.5, 0.125)]
170
-
171
- mem_bandwidths = {}
172
-
173
- methods = [SillyTranspose, NaiveTranspose, TransposeWithLocal]
174
- for cls in methods:
175
- name = cls.__name__.replace("Transpose", "")
176
-
177
- mem_bandwidths[cls] = meth_mem_bws = []
178
-
179
- for size in sizes:
180
- rng = np.random.default_rng()
181
- source = rng.random((size, size), dtype=np.float32)
182
-
183
- mf = cl.mem_flags
184
- a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=source)
185
- a_t_buf = cl.Buffer(ctx, mf.WRITE_ONLY, size=source.nbytes)
186
- method = cls(ctx)
187
-
188
- for _i in range(4):
189
- method(queue, a_t_buf, a_buf, source.shape)
190
-
191
- count = 12
192
- events = []
193
- for _i in range(count):
194
- events.append(method(queue, a_t_buf, a_buf, source.shape))
195
-
196
- events[-1].wait()
197
- time = sum(evt.profile.end - evt.profile.start for evt in events)
198
-
199
- mem_bw = 2 * source.nbytes * count / (time * 1e-9)
200
- print("benchmarking", name, size, mem_bw / 1e9, "GB/s")
201
- meth_mem_bws.append(mem_bw)
202
-
203
- a_buf.release()
204
- a_t_buf.release()
205
-
206
- try:
207
- from matplotlib.pyplot import (
208
- clf, grid, legend, plot, savefig, xlabel, ylabel)
209
- except ModuleNotFoundError:
210
- pass
211
- else:
212
- for i in range(len(methods)):
213
- clf()
214
- for j in range(i + 1):
215
- method = methods[j]
216
- name = method.__name__.replace("Transpose", "")
217
- plot(sizes, np.array(mem_bandwidths[method]) / 1e9, "o-",
218
- label=name)
219
-
220
- xlabel("Matrix width/height $N$")
221
- ylabel("Memory Bandwidth [GB/s]")
222
- legend(loc="best")
223
- grid()
224
-
225
- savefig("transpose-benchmark-%d.pdf" % i)
226
-
227
-
228
- check_transpose()
229
- benchmark_transpose()
@@ -1,3 +0,0 @@
1
- [pytest]
2
- markers=
3
- bitonic: tests involving bitonic sort
@@ -1,51 +0,0 @@
1
- // Base-2 logarithm bithack
2
- //
3
- // Copyright (C) 2009 Andreas Kloeckner
4
- // Copyright (C) Sean Eron Anderson (in the public domain)
5
- //
6
- // Permission is hereby granted, free of charge, to any person
7
- // obtaining a copy of this software and associated documentation
8
- // files (the "Software"), to deal in the Software without
9
- // restriction, including without limitation the rights to use,
10
- // copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- // copies of the Software, and to permit persons to whom the
12
- // Software is furnished to do so, subject to the following
13
- // conditions:
14
- //
15
- // The above copyright notice and this permission notice shall be
16
- // included in all copies or substantial portions of the Software.
17
- //
18
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
- // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
- // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
- // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
- // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
- // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
- // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
- // OTHER DEALINGS IN THE SOFTWARE.
26
-
27
-
28
- #include "bitlog.hpp"
29
-
30
-
31
- const char pyopencl::log_table_8[] =
32
- {
33
- 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
34
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
35
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
36
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
37
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
38
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
39
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
42
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
43
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
49
- };
50
-
51
-
@@ -1,83 +0,0 @@
1
- // Base-2 logarithm bithack.
2
- //
3
- // Copyright (C) 2009 Andreas Kloeckner
4
- // Copyright (C) Sean Eron Anderson (in the public domain)
5
- //
6
- // Permission is hereby granted, free of charge, to any person
7
- // obtaining a copy of this software and associated documentation
8
- // files (the "Software"), to deal in the Software without
9
- // restriction, including without limitation the rights to use,
10
- // copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- // copies of the Software, and to permit persons to whom the
12
- // Software is furnished to do so, subject to the following
13
- // conditions:
14
- //
15
- // The above copyright notice and this permission notice shall be
16
- // included in all copies or substantial portions of the Software.
17
- //
18
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
- // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
- // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
- // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
- // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
- // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
- // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
- // OTHER DEALINGS IN THE SOFTWARE.
26
-
27
-
28
- #ifndef _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP
29
- #define _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP
30
-
31
-
32
- #include <climits>
33
- #include <cstdint>
34
-
35
-
36
- namespace pyopencl
37
- {
38
- /* from http://graphics.stanford.edu/~seander/bithacks.html */
39
-
40
- extern const char log_table_8[];
41
-
42
- inline unsigned bitlog2_16(uint16_t v)
43
- {
44
- if (unsigned long t = v >> 8)
45
- return 8+log_table_8[t];
46
- else
47
- return log_table_8[v];
48
- }
49
-
50
- inline unsigned bitlog2_32(uint32_t v)
51
- {
52
- if (uint16_t t = v >> 16)
53
- return 16+bitlog2_16(t);
54
- else
55
- return bitlog2_16(v);
56
- }
57
-
58
- #if defined(UINT64_MAX)
59
- inline unsigned bitlog2(uint64_t v)
60
- {
61
- if (uint32_t t = v >> 32)
62
- return 32+bitlog2_32(t);
63
- else
64
- return bitlog2_32(v);
65
- }
66
- #else
67
- inline unsigned bitlog2(unsigned long v)
68
- {
69
- #if (ULONG_MAX != 4294967295)
70
- if (uint32_t t = v >> 32)
71
- return 32+bitlog2_32(t);
72
- else
73
- #endif
74
- return bitlog2_32(v);
75
- }
76
- #endif
77
- }
78
-
79
-
80
-
81
-
82
-
83
- #endif
@@ -1,134 +0,0 @@
1
- /* Include OpenCL header, and define OpenCL extensions, since what is and is not
2
- * available in the official headers is very system-dependent */
3
-
4
- #ifndef _EXT_H
5
- #define _EXT_H
6
-
7
- #if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H))
8
- #include <OpenCL/opencl.h>
9
- #else
10
- #include <CL/cl.h>
11
- #endif
12
-
13
- /* These two defines were introduced in the 1.2 headers
14
- * on 2012-11-30, so earlier versions don't have them
15
- * (e.g. Debian wheezy)
16
- */
17
-
18
- #ifndef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
19
- #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
20
- #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
21
- #endif
22
-
23
- /*
24
- * Extensions
25
- */
26
-
27
- /* cl_khr_icd */
28
- #define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
29
- #define CL_PLATFORM_NOT_FOUND_KHR -1001
30
-
31
-
32
- /* cl_khr_fp64 */
33
- #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
34
-
35
- /* cl_khr_fp16 */
36
- #define CL_DEVICE_HALF_FP_CONFIG 0x1033
37
-
38
- /* cl_khr_terminate_context */
39
- #define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F
40
-
41
- /* cl_nv_device_attribute_query */
42
- #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
43
- #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
44
- #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
45
- #define CL_DEVICE_WARP_SIZE_NV 0x4003
46
- #define CL_DEVICE_GPU_OVERLAP_NV 0x4004
47
- #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
48
- #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
49
- #define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007
50
- #define CL_DEVICE_PCI_BUS_ID_NV 0x4008
51
- #define CL_DEVICE_PCI_SLOT_ID_NV 0x4009
52
- #define CL_DEVICE_PCI_DOMAIN_ID_NV 0x400A
53
-
54
- /* cl_ext_atomic_counters_{32,64} */
55
- #define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032
56
-
57
- /* cl_amd_device_attribute_query */
58
- #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
59
- #define CL_DEVICE_TOPOLOGY_AMD 0x4037
60
- #define CL_DEVICE_BOARD_NAME_AMD 0x4038
61
- #define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039
62
- #define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040
63
- #define CL_DEVICE_SIMD_WIDTH_AMD 0x4041
64
- #define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
65
- #define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
66
- #define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044
67
- #define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045
68
- #define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046
69
- #define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047
70
- #define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048
71
- #define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049
72
- #define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A
73
- #define CL_DEVICE_GFXIP_MINOR_AMD 0x404B
74
- #define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C
75
- #define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030
76
- #define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031
77
- #define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033
78
- #define CL_DEVICE_PCIE_ID_AMD 0x4034
79
-
80
- #ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
81
- #define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1
82
-
83
- typedef union
84
- {
85
- struct { cl_uint type; cl_uint data[5]; } raw;
86
- struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
87
- } cl_device_topology_amd;
88
- #endif
89
-
90
- /* cl_amd_offline_devices */
91
- #define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F
92
-
93
- /* cl_ext_device_fission */
94
- #define cl_ext_device_fission 1
95
-
96
- typedef cl_ulong cl_device_partition_property_ext;
97
-
98
- #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
99
- #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
100
- #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
101
- #define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 /* cl_intel_device_partition_by_names */
102
- #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
103
-
104
- #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
105
- #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
106
- #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
107
- #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
108
- #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
109
-
110
- #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
111
- #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
112
- #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
113
- #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
114
- #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
115
- #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
116
-
117
- /* cl_intel_advanced_motion_estimation */
118
- #define CL_DEVICE_ME_VERSION_INTEL 0x407E
119
-
120
- /* cl_qcom_ext_host_ptr */
121
- #define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
122
- #define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
123
-
124
- /* cl_khr_spir */
125
- #define CL_DEVICE_SPIR_VERSIONS 0x40E0
126
-
127
- /* cl_altera_device_temperature */
128
- #define CL_DEVICE_CORE_TEMPERATURE_ALTERA 0x40F3
129
-
130
- /* cl_intel_simultaneous_sharing */
131
- #define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
132
- #define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
133
-
134
- #endif