pyopencl 2024.2.2__cp312-cp312-macosx_11_0_arm64.whl → 2024.2.4__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +16 -4
- pyopencl/_cl.cpython-312-darwin.so +0 -0
- pyopencl/algorithm.py +3 -1
- pyopencl/bitonic_sort.py +2 -0
- pyopencl/characterize/__init__.py +23 -0
- pyopencl/compyte/.git +1 -0
- pyopencl/compyte/.github/workflows/autopush.yml +21 -0
- pyopencl/compyte/.github/workflows/ci.yml +30 -0
- pyopencl/compyte/.gitignore +21 -0
- pyopencl/compyte/ndarray/Makefile +31 -0
- pyopencl/compyte/ndarray/gpu_ndarray.h +35 -0
- pyopencl/compyte/ndarray/pygpu_language.h +207 -0
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +622 -0
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +317 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +1546 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.h +71 -0
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +232 -0
- pyopencl/compyte/setup.cfg +9 -0
- pyopencl/tools.py +60 -56
- pyopencl/version.py +7 -3
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.4.dist-info}/METADATA +14 -14
- pyopencl-2024.2.4.dist-info/RECORD +59 -0
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.4.dist-info}/WHEEL +1 -1
- pyopencl-2024.2.2.data/data/CITATION.cff +0 -74
- pyopencl-2024.2.2.data/data/CMakeLists.txt +0 -83
- pyopencl-2024.2.2.data/data/Makefile.in +0 -21
- pyopencl-2024.2.2.data/data/README.rst +0 -70
- pyopencl-2024.2.2.data/data/README_SETUP.txt +0 -34
- pyopencl-2024.2.2.data/data/aksetup_helper.py +0 -1013
- pyopencl-2024.2.2.data/data/configure.py +0 -6
- pyopencl-2024.2.2.data/data/contrib/cldis.py +0 -91
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/README +0 -29
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/translate.py +0 -1441
- pyopencl-2024.2.2.data/data/contrib/pyopencl.vim +0 -84
- pyopencl-2024.2.2.data/data/doc/Makefile +0 -23
- pyopencl-2024.2.2.data/data/doc/algorithm.rst +0 -214
- pyopencl-2024.2.2.data/data/doc/array.rst +0 -305
- pyopencl-2024.2.2.data/data/doc/conf.py +0 -26
- pyopencl-2024.2.2.data/data/doc/howto.rst +0 -105
- pyopencl-2024.2.2.data/data/doc/index.rst +0 -137
- pyopencl-2024.2.2.data/data/doc/make_constants.py +0 -561
- pyopencl-2024.2.2.data/data/doc/misc.rst +0 -885
- pyopencl-2024.2.2.data/data/doc/runtime.rst +0 -51
- pyopencl-2024.2.2.data/data/doc/runtime_const.rst +0 -30
- pyopencl-2024.2.2.data/data/doc/runtime_gl.rst +0 -78
- pyopencl-2024.2.2.data/data/doc/runtime_memory.rst +0 -527
- pyopencl-2024.2.2.data/data/doc/runtime_platform.rst +0 -184
- pyopencl-2024.2.2.data/data/doc/runtime_program.rst +0 -364
- pyopencl-2024.2.2.data/data/doc/runtime_queue.rst +0 -182
- pyopencl-2024.2.2.data/data/doc/subst.rst +0 -36
- pyopencl-2024.2.2.data/data/doc/tools.rst +0 -4
- pyopencl-2024.2.2.data/data/doc/types.rst +0 -42
- pyopencl-2024.2.2.data/data/examples/black-hole-accretion.py +0 -2227
- pyopencl-2024.2.2.data/data/examples/demo-struct-reduce.py +0 -75
- pyopencl-2024.2.2.data/data/examples/demo.py +0 -39
- pyopencl-2024.2.2.data/data/examples/demo_array.py +0 -32
- pyopencl-2024.2.2.data/data/examples/demo_array_svm.py +0 -37
- pyopencl-2024.2.2.data/data/examples/demo_elementwise.py +0 -34
- pyopencl-2024.2.2.data/data/examples/demo_elementwise_complex.py +0 -53
- pyopencl-2024.2.2.data/data/examples/demo_mandelbrot.py +0 -183
- pyopencl-2024.2.2.data/data/examples/demo_meta_codepy.py +0 -56
- pyopencl-2024.2.2.data/data/examples/demo_meta_template.py +0 -55
- pyopencl-2024.2.2.data/data/examples/dump-performance.py +0 -38
- pyopencl-2024.2.2.data/data/examples/dump-properties.py +0 -86
- pyopencl-2024.2.2.data/data/examples/gl_interop_demo.py +0 -84
- pyopencl-2024.2.2.data/data/examples/gl_particle_animation.py +0 -218
- pyopencl-2024.2.2.data/data/examples/ipython-demo.ipynb +0 -203
- pyopencl-2024.2.2.data/data/examples/median-filter.py +0 -99
- pyopencl-2024.2.2.data/data/examples/n-body.py +0 -1070
- pyopencl-2024.2.2.data/data/examples/narray.py +0 -37
- pyopencl-2024.2.2.data/data/examples/noisyImage.jpg +0 -0
- pyopencl-2024.2.2.data/data/examples/pi-monte-carlo.py +0 -1166
- pyopencl-2024.2.2.data/data/examples/svm.py +0 -82
- pyopencl-2024.2.2.data/data/examples/transpose.py +0 -229
- pyopencl-2024.2.2.data/data/pytest.ini +0 -3
- pyopencl-2024.2.2.data/data/src/bitlog.cpp +0 -51
- pyopencl-2024.2.2.data/data/src/bitlog.hpp +0 -83
- pyopencl-2024.2.2.data/data/src/clinfo_ext.h +0 -134
- pyopencl-2024.2.2.data/data/src/mempool.hpp +0 -444
- pyopencl-2024.2.2.data/data/src/pyopencl_ext.h +0 -77
- pyopencl-2024.2.2.data/data/src/tools.hpp +0 -90
- pyopencl-2024.2.2.data/data/src/wrap_cl.cpp +0 -61
- pyopencl-2024.2.2.data/data/src/wrap_cl.hpp +0 -5853
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_1.cpp +0 -369
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_2.cpp +0 -702
- pyopencl-2024.2.2.data/data/src/wrap_constants.cpp +0 -1274
- pyopencl-2024.2.2.data/data/src/wrap_helpers.hpp +0 -213
- pyopencl-2024.2.2.data/data/src/wrap_mempool.cpp +0 -738
- pyopencl-2024.2.2.data/data/test/add-vectors-32.spv +0 -0
- pyopencl-2024.2.2.data/data/test/add-vectors-64.spv +0 -0
- pyopencl-2024.2.2.data/data/test/empty-header.h +0 -1
- pyopencl-2024.2.2.data/data/test/test_algorithm.py +0 -1180
- pyopencl-2024.2.2.data/data/test/test_array.py +0 -2392
- pyopencl-2024.2.2.data/data/test/test_arrays_in_structs.py +0 -100
- pyopencl-2024.2.2.data/data/test/test_clmath.py +0 -529
- pyopencl-2024.2.2.data/data/test/test_clrandom.py +0 -75
- pyopencl-2024.2.2.data/data/test/test_enqueue_copy.py +0 -271
- pyopencl-2024.2.2.data/data/test/test_wrapper.py +0 -1565
- pyopencl-2024.2.2.dist-info/LICENSE +0 -282
- pyopencl-2024.2.2.dist-info/RECORD +0 -123
- pyopencl-2024.2.2.dist-info/top_level.txt +0 -1
- {pyopencl-2024.2.2.data/data → pyopencl-2024.2.4.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,738 +0,0 @@
|
|
|
1
|
-
// Wrap memory pool
|
|
2
|
-
//
|
|
3
|
-
// Copyright (C) 2009 Andreas Kloeckner
|
|
4
|
-
//
|
|
5
|
-
// Permission is hereby granted, free of charge, to any person
|
|
6
|
-
// obtaining a copy of this software and associated documentation
|
|
7
|
-
// files (the "Software"), to deal in the Software without
|
|
8
|
-
// restriction, including without limitation the rights to use,
|
|
9
|
-
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
-
// copies of the Software, and to permit persons to whom the
|
|
11
|
-
// Software is furnished to do so, subject to the following
|
|
12
|
-
// conditions:
|
|
13
|
-
//
|
|
14
|
-
// The above copyright notice and this permission notice shall be
|
|
15
|
-
// included in all copies or substantial portions of the Software.
|
|
16
|
-
//
|
|
17
|
-
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
18
|
-
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
19
|
-
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20
|
-
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
21
|
-
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
22
|
-
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
23
|
-
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
24
|
-
// OTHER DEALINGS IN THE SOFTWARE.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
// Gregor Thalhammer (on Apr 13, 2011) said it's necessary to import Python.h
|
|
28
|
-
// first to prevent OS X from overriding a bunch of macros. (e.g. isspace)
|
|
29
|
-
#include <Python.h>
|
|
30
|
-
|
|
31
|
-
#define NO_IMPORT_ARRAY
|
|
32
|
-
#define PY_ARRAY_UNIQUE_SYMBOL pyopencl_ARRAY_API
|
|
33
|
-
|
|
34
|
-
#include <memory>
|
|
35
|
-
#include <vector>
|
|
36
|
-
#include "wrap_helpers.hpp"
|
|
37
|
-
#include "wrap_cl.hpp"
|
|
38
|
-
#include "mempool.hpp"
|
|
39
|
-
#include "tools.hpp"
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
namespace pyopencl {
|
|
44
|
-
// {{{ test_allocator
|
|
45
|
-
|
|
46
|
-
class test_allocator
|
|
47
|
-
{
|
|
48
|
-
public:
|
|
49
|
-
typedef void *pointer_type;
|
|
50
|
-
typedef size_t size_type;
|
|
51
|
-
|
|
52
|
-
bool is_deferred() const
|
|
53
|
-
{
|
|
54
|
-
return false;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
pointer_type allocate(size_type s)
|
|
58
|
-
{
|
|
59
|
-
return nullptr;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
pointer_type hand_out_existing_block(pointer_type &&p)
|
|
63
|
-
{
|
|
64
|
-
return p;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
~test_allocator()
|
|
68
|
-
{ }
|
|
69
|
-
|
|
70
|
-
void free(pointer_type &&p)
|
|
71
|
-
{ }
|
|
72
|
-
|
|
73
|
-
void try_release_blocks()
|
|
74
|
-
{ }
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
// }}}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
// {{{ buffer allocators
|
|
81
|
-
|
|
82
|
-
class buffer_allocator_base
|
|
83
|
-
{
|
|
84
|
-
protected:
|
|
85
|
-
std::shared_ptr<pyopencl::context> m_context;
|
|
86
|
-
cl_mem_flags m_flags;
|
|
87
|
-
|
|
88
|
-
public:
|
|
89
|
-
buffer_allocator_base(std::shared_ptr<pyopencl::context> const &ctx,
|
|
90
|
-
cl_mem_flags flags=CL_MEM_READ_WRITE)
|
|
91
|
-
: m_context(ctx), m_flags(flags)
|
|
92
|
-
{
|
|
93
|
-
if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))
|
|
94
|
-
throw pyopencl::error("Allocator", CL_INVALID_VALUE,
|
|
95
|
-
"cannot specify USE_HOST_PTR or COPY_HOST_PTR flags");
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
buffer_allocator_base(buffer_allocator_base const &src)
|
|
99
|
-
: m_context(src.m_context), m_flags(src.m_flags)
|
|
100
|
-
{ }
|
|
101
|
-
|
|
102
|
-
virtual ~buffer_allocator_base()
|
|
103
|
-
{ }
|
|
104
|
-
|
|
105
|
-
typedef cl_mem pointer_type;
|
|
106
|
-
typedef size_t size_type;
|
|
107
|
-
|
|
108
|
-
virtual bool is_deferred() const = 0;
|
|
109
|
-
virtual pointer_type allocate(size_type s) = 0;
|
|
110
|
-
|
|
111
|
-
pointer_type hand_out_existing_block(pointer_type &&p)
|
|
112
|
-
{
|
|
113
|
-
return p;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
void free(pointer_type &&p)
|
|
117
|
-
{
|
|
118
|
-
PYOPENCL_CALL_GUARDED(clReleaseMemObject, (p));
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
void try_release_blocks()
|
|
122
|
-
{
|
|
123
|
-
pyopencl::run_python_gc();
|
|
124
|
-
}
|
|
125
|
-
};
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
class deferred_buffer_allocator : public buffer_allocator_base
|
|
129
|
-
{
|
|
130
|
-
private:
|
|
131
|
-
typedef buffer_allocator_base super;
|
|
132
|
-
|
|
133
|
-
public:
|
|
134
|
-
deferred_buffer_allocator(std::shared_ptr<pyopencl::context> const &ctx,
|
|
135
|
-
cl_mem_flags flags=CL_MEM_READ_WRITE)
|
|
136
|
-
: super(ctx, flags)
|
|
137
|
-
{ }
|
|
138
|
-
|
|
139
|
-
bool is_deferred() const
|
|
140
|
-
{ return true; }
|
|
141
|
-
|
|
142
|
-
pointer_type allocate(size_type s)
|
|
143
|
-
{
|
|
144
|
-
if (s == 0)
|
|
145
|
-
return nullptr;
|
|
146
|
-
|
|
147
|
-
return pyopencl::create_buffer(m_context->data(), m_flags, s, 0);
|
|
148
|
-
}
|
|
149
|
-
};
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
class immediate_buffer_allocator : public buffer_allocator_base
|
|
153
|
-
{
|
|
154
|
-
private:
|
|
155
|
-
typedef buffer_allocator_base super;
|
|
156
|
-
pyopencl::command_queue m_queue;
|
|
157
|
-
|
|
158
|
-
public:
|
|
159
|
-
immediate_buffer_allocator(pyopencl::command_queue &queue,
|
|
160
|
-
cl_mem_flags flags=CL_MEM_READ_WRITE)
|
|
161
|
-
: super(std::shared_ptr<pyopencl::context>(queue.get_context()), flags),
|
|
162
|
-
m_queue(queue.data(), /*retain*/ true)
|
|
163
|
-
{ }
|
|
164
|
-
|
|
165
|
-
immediate_buffer_allocator(immediate_buffer_allocator const &src)
|
|
166
|
-
: super(src), m_queue(src.m_queue)
|
|
167
|
-
{ }
|
|
168
|
-
|
|
169
|
-
bool is_deferred() const
|
|
170
|
-
{ return false; }
|
|
171
|
-
|
|
172
|
-
pointer_type allocate(size_type s)
|
|
173
|
-
{
|
|
174
|
-
if (s == 0)
|
|
175
|
-
return nullptr;
|
|
176
|
-
|
|
177
|
-
pointer_type ptr = pyopencl::create_buffer(
|
|
178
|
-
m_context->data(), m_flags, s, 0);
|
|
179
|
-
|
|
180
|
-
// Make sure the buffer gets allocated right here and right now.
|
|
181
|
-
// This looks (and is) expensive. But immediate allocators
|
|
182
|
-
// have their main use in memory pools, whose basic assumption
|
|
183
|
-
// is that allocation is too expensive anyway--but they rely
|
|
184
|
-
// on 'out-of-memory' being reported on allocation. (If it is
|
|
185
|
-
// reported in a deferred manner, it has no way to react
|
|
186
|
-
// (e.g. by freeing unused memory) because it is not part of
|
|
187
|
-
// the call stack.)
|
|
188
|
-
if (m_queue.get_hex_device_version() < 0x1020)
|
|
189
|
-
{
|
|
190
|
-
unsigned zero = 0;
|
|
191
|
-
PYOPENCL_CALL_GUARDED(clEnqueueWriteBuffer, (
|
|
192
|
-
m_queue.data(),
|
|
193
|
-
ptr,
|
|
194
|
-
/* is blocking */ CL_FALSE,
|
|
195
|
-
0, std::min(s, sizeof(zero)), &zero,
|
|
196
|
-
0, NULL, NULL
|
|
197
|
-
));
|
|
198
|
-
}
|
|
199
|
-
else
|
|
200
|
-
{
|
|
201
|
-
PYOPENCL_CALL_GUARDED(clEnqueueMigrateMemObjects, (
|
|
202
|
-
m_queue.data(),
|
|
203
|
-
1, &ptr, CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED,
|
|
204
|
-
0, NULL, NULL
|
|
205
|
-
));
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
// No need to wait for completion here. clWaitForEvents (e.g.)
|
|
209
|
-
// cannot return mem object allocation failures. This implies that
|
|
210
|
-
// the buffer is faulted onto the device on enqueue.
|
|
211
|
-
|
|
212
|
-
return ptr;
|
|
213
|
-
}
|
|
214
|
-
};
|
|
215
|
-
|
|
216
|
-
// }}}
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
// {{{ pooled_buffer
|
|
220
|
-
|
|
221
|
-
class pooled_buffer
|
|
222
|
-
: public pyopencl::pooled_allocation<pyopencl::memory_pool<buffer_allocator_base> >,
|
|
223
|
-
public pyopencl::memory_object_holder
|
|
224
|
-
{
|
|
225
|
-
private:
|
|
226
|
-
typedef
|
|
227
|
-
pyopencl::pooled_allocation<pyopencl::memory_pool<buffer_allocator_base> >
|
|
228
|
-
super;
|
|
229
|
-
|
|
230
|
-
public:
|
|
231
|
-
pooled_buffer(
|
|
232
|
-
std::shared_ptr<super::pool_type> p, super::size_type s)
|
|
233
|
-
: super(p, s)
|
|
234
|
-
{ }
|
|
235
|
-
|
|
236
|
-
virtual ~pooled_buffer()
|
|
237
|
-
{ }
|
|
238
|
-
|
|
239
|
-
const super::pointer_type data() const
|
|
240
|
-
{ return m_ptr; }
|
|
241
|
-
|
|
242
|
-
size_t size() const
|
|
243
|
-
{
|
|
244
|
-
return m_size;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
// This shouldn't be necessary, but somehow nanobind gets unhappy if
|
|
248
|
-
// it's not there.
|
|
249
|
-
void free()
|
|
250
|
-
{
|
|
251
|
-
super::free();
|
|
252
|
-
}
|
|
253
|
-
};
|
|
254
|
-
|
|
255
|
-
// }}}
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
// {{{ allocate_from_buffer_allocator
|
|
259
|
-
|
|
260
|
-
inline
|
|
261
|
-
buffer *allocate_from_buffer_allocator(buffer_allocator_base &alloc, size_t size)
|
|
262
|
-
{
|
|
263
|
-
cl_mem mem = nullptr;
|
|
264
|
-
int try_count = 0;
|
|
265
|
-
while (try_count < 2)
|
|
266
|
-
{
|
|
267
|
-
try
|
|
268
|
-
{
|
|
269
|
-
mem = alloc.allocate(size);
|
|
270
|
-
break;
|
|
271
|
-
}
|
|
272
|
-
catch (pyopencl::error &e)
|
|
273
|
-
{
|
|
274
|
-
if (!e.is_out_of_memory())
|
|
275
|
-
throw;
|
|
276
|
-
if (++try_count == 2)
|
|
277
|
-
throw;
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
alloc.try_release_blocks();
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
if (!mem)
|
|
284
|
-
{
|
|
285
|
-
if (size == 0)
|
|
286
|
-
return nullptr;
|
|
287
|
-
else
|
|
288
|
-
throw pyopencl::error("Allocator", CL_INVALID_VALUE,
|
|
289
|
-
"allocator succeeded but returned NULL cl_mem");
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
try
|
|
293
|
-
{
|
|
294
|
-
return new pyopencl::buffer(mem, false);
|
|
295
|
-
}
|
|
296
|
-
catch (...)
|
|
297
|
-
{
|
|
298
|
-
PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem));
|
|
299
|
-
throw;
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
// }}}
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
// {{{ allocate_from_buffer_pool
|
|
307
|
-
|
|
308
|
-
pooled_buffer *allocate_from_buffer_pool(
|
|
309
|
-
std::shared_ptr<memory_pool<buffer_allocator_base> > pool,
|
|
310
|
-
memory_pool<buffer_allocator_base>::size_type sz)
|
|
311
|
-
{
|
|
312
|
-
return new pooled_buffer(pool, sz);
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
// }}}
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
#if PYOPENCL_CL_VERSION >= 0x2000
|
|
319
|
-
|
|
320
|
-
struct svm_held_pointer
|
|
321
|
-
{
|
|
322
|
-
void *ptr;
|
|
323
|
-
pyopencl::command_queue_ref queue;
|
|
324
|
-
};
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
// {{{ svm allocator
|
|
328
|
-
|
|
329
|
-
class svm_allocator
|
|
330
|
-
{
|
|
331
|
-
public:
|
|
332
|
-
typedef svm_held_pointer pointer_type;
|
|
333
|
-
typedef size_t size_type;
|
|
334
|
-
|
|
335
|
-
protected:
|
|
336
|
-
std::shared_ptr<pyopencl::context> m_context;
|
|
337
|
-
cl_uint m_alignment;
|
|
338
|
-
cl_svm_mem_flags m_flags;
|
|
339
|
-
pyopencl::command_queue_ref m_queue;
|
|
340
|
-
|
|
341
|
-
public:
|
|
342
|
-
svm_allocator(std::shared_ptr<pyopencl::context> const &ctx,
|
|
343
|
-
cl_uint alignment=0, cl_svm_mem_flags flags=CL_MEM_READ_WRITE,
|
|
344
|
-
pyopencl::command_queue *queue=nullptr)
|
|
345
|
-
: m_context(ctx), m_alignment(alignment), m_flags(flags)
|
|
346
|
-
{
|
|
347
|
-
if (queue)
|
|
348
|
-
m_queue.set(queue->data());
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
svm_allocator(svm_allocator const &src)
|
|
352
|
-
: m_context(src.m_context), m_alignment(src.m_alignment),
|
|
353
|
-
m_flags(src.m_flags)
|
|
354
|
-
{ }
|
|
355
|
-
|
|
356
|
-
~svm_allocator()
|
|
357
|
-
{ }
|
|
358
|
-
|
|
359
|
-
bool is_deferred() const
|
|
360
|
-
{
|
|
361
|
-
// According to experiments with the Nvidia implementation (and based
|
|
362
|
-
// on my reading of the CL spec), clSVMalloc will return an error
|
|
363
|
-
// immediately upon being out of memory. Therefore the
|
|
364
|
-
// immediate/deferred split on the buffer side is not needed here.
|
|
365
|
-
// -AK, 2022-09-07
|
|
366
|
-
|
|
367
|
-
return false;
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
std::shared_ptr<pyopencl::context> context() const
|
|
371
|
-
{
|
|
372
|
-
return m_context;
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
pointer_type allocate(size_type size)
|
|
376
|
-
{
|
|
377
|
-
if (size == 0)
|
|
378
|
-
return { nullptr, nullptr };
|
|
379
|
-
|
|
380
|
-
PYOPENCL_PRINT_CALL_TRACE("clSVMalloc");
|
|
381
|
-
return {
|
|
382
|
-
clSVMAlloc(m_context->data(), m_flags, size, m_alignment),
|
|
383
|
-
pyopencl::command_queue_ref(m_queue.is_valid() ? m_queue.data() : nullptr)
|
|
384
|
-
};
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
pointer_type hand_out_existing_block(pointer_type &&p)
|
|
388
|
-
{
|
|
389
|
-
if (m_queue.is_valid())
|
|
390
|
-
{
|
|
391
|
-
if (p.queue.is_valid())
|
|
392
|
-
{
|
|
393
|
-
if (p.queue.data() != m_queue.data())
|
|
394
|
-
{
|
|
395
|
-
// make sure synchronization promises stay valid in new queue
|
|
396
|
-
cl_event evt;
|
|
397
|
-
|
|
398
|
-
PYOPENCL_CALL_GUARDED(clEnqueueMarker, (p.queue.data(), &evt));
|
|
399
|
-
PYOPENCL_CALL_GUARDED(clEnqueueMarkerWithWaitList,
|
|
400
|
-
(m_queue.data(), 1, &evt, nullptr));
|
|
401
|
-
}
|
|
402
|
-
}
|
|
403
|
-
p.queue.set(m_queue.data());
|
|
404
|
-
}
|
|
405
|
-
else
|
|
406
|
-
{
|
|
407
|
-
if (p.queue.is_valid())
|
|
408
|
-
{
|
|
409
|
-
PYOPENCL_CALL_GUARDED_THREADED(clFinish, (p.queue.data()));
|
|
410
|
-
p.queue.reset();
|
|
411
|
-
}
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
return std::move(p);
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
void free(pointer_type &&p)
|
|
418
|
-
{
|
|
419
|
-
if (p.queue.is_valid())
|
|
420
|
-
{
|
|
421
|
-
PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueSVMFree, (
|
|
422
|
-
p.queue.data(), 1, &p.ptr,
|
|
423
|
-
nullptr, nullptr,
|
|
424
|
-
0, nullptr, nullptr));
|
|
425
|
-
p.queue.reset();
|
|
426
|
-
}
|
|
427
|
-
else
|
|
428
|
-
{
|
|
429
|
-
PYOPENCL_PRINT_CALL_TRACE("clSVMFree");
|
|
430
|
-
clSVMFree(m_context->data(), p.ptr);
|
|
431
|
-
}
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
void try_release_blocks()
|
|
435
|
-
{
|
|
436
|
-
pyopencl::run_python_gc();
|
|
437
|
-
}
|
|
438
|
-
};
|
|
439
|
-
|
|
440
|
-
// }}}
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
// {{{ pooled_svm
|
|
444
|
-
|
|
445
|
-
class pooled_svm
|
|
446
|
-
: public pyopencl::pooled_allocation<pyopencl::memory_pool<svm_allocator>>,
|
|
447
|
-
public pyopencl::svm_pointer
|
|
448
|
-
{
|
|
449
|
-
private:
|
|
450
|
-
typedef
|
|
451
|
-
pyopencl::pooled_allocation<pyopencl::memory_pool<svm_allocator>>
|
|
452
|
-
super;
|
|
453
|
-
|
|
454
|
-
public:
|
|
455
|
-
pooled_svm(
|
|
456
|
-
std::shared_ptr<super::pool_type> p, super::size_type s)
|
|
457
|
-
: super(p, s)
|
|
458
|
-
{ }
|
|
459
|
-
|
|
460
|
-
virtual ~pooled_svm()
|
|
461
|
-
{ }
|
|
462
|
-
|
|
463
|
-
void *svm_ptr() const
|
|
464
|
-
{ return m_ptr.ptr; }
|
|
465
|
-
|
|
466
|
-
size_t size() const
|
|
467
|
-
{ return m_size; }
|
|
468
|
-
|
|
469
|
-
void bind_to_queue(pyopencl::command_queue const &queue)
|
|
470
|
-
{
|
|
471
|
-
if (pyopencl::is_queue_out_of_order(queue.data()))
|
|
472
|
-
throw pyopencl::error("PooledSVM.bind_to_queue", CL_INVALID_VALUE,
|
|
473
|
-
"supplying an out-of-order queue to SVMAllocation is invalid");
|
|
474
|
-
|
|
475
|
-
if (m_ptr.queue.is_valid())
|
|
476
|
-
{
|
|
477
|
-
if (m_ptr.queue.data() != queue.data())
|
|
478
|
-
{
|
|
479
|
-
// make sure synchronization promises stay valid in new queue
|
|
480
|
-
cl_event evt;
|
|
481
|
-
|
|
482
|
-
PYOPENCL_CALL_GUARDED(clEnqueueMarker, (m_ptr.queue.data(), &evt));
|
|
483
|
-
PYOPENCL_CALL_GUARDED(clEnqueueMarkerWithWaitList,
|
|
484
|
-
(queue.data(), 1, &evt, nullptr));
|
|
485
|
-
}
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
m_ptr.queue.set(queue.data());
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
void unbind_from_queue()
|
|
492
|
-
{
|
|
493
|
-
if (m_ptr.queue.is_valid())
|
|
494
|
-
PYOPENCL_CALL_GUARDED_THREADED(clFinish, (m_ptr.queue.data()));
|
|
495
|
-
|
|
496
|
-
m_ptr.queue.reset();
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
// only use for testing/diagnostic/debugging purposes!
|
|
500
|
-
cl_command_queue queue() const
|
|
501
|
-
{
|
|
502
|
-
if (m_ptr.queue.is_valid())
|
|
503
|
-
return m_ptr.queue.data();
|
|
504
|
-
else
|
|
505
|
-
return nullptr;
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
// This shouldn't be necessary, but somehow nanobind gets unhappy if
|
|
509
|
-
// it's not there.
|
|
510
|
-
void free()
|
|
511
|
-
{
|
|
512
|
-
super::free();
|
|
513
|
-
}
|
|
514
|
-
};
|
|
515
|
-
|
|
516
|
-
// }}}
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
// {{{ svm_allocator_call
|
|
520
|
-
|
|
521
|
-
inline
|
|
522
|
-
pyopencl::svm_allocation *svm_allocator_call(svm_allocator &alloc, size_t size)
|
|
523
|
-
{
|
|
524
|
-
int try_count = 0;
|
|
525
|
-
while (true)
|
|
526
|
-
{
|
|
527
|
-
try
|
|
528
|
-
{
|
|
529
|
-
svm_held_pointer mem(alloc.allocate(size));
|
|
530
|
-
if (mem.queue.is_valid())
|
|
531
|
-
return new pyopencl::svm_allocation(
|
|
532
|
-
alloc.context(), mem.ptr, size, mem.queue.data());
|
|
533
|
-
else
|
|
534
|
-
return new pyopencl::svm_allocation(
|
|
535
|
-
alloc.context(), mem.ptr, size, nullptr);
|
|
536
|
-
}
|
|
537
|
-
catch (pyopencl::error &e)
|
|
538
|
-
{
|
|
539
|
-
if (!e.is_out_of_memory())
|
|
540
|
-
throw;
|
|
541
|
-
if (++try_count == 2)
|
|
542
|
-
throw;
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
alloc.try_release_blocks();
|
|
546
|
-
}
|
|
547
|
-
}
|
|
548
|
-
|
|
549
|
-
// }}}
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
// {{{ allocate_from_svm_pool
|
|
553
|
-
|
|
554
|
-
pooled_svm *allocate_from_svm_pool(
|
|
555
|
-
std::shared_ptr<pyopencl::memory_pool<svm_allocator> > pool,
|
|
556
|
-
pyopencl::memory_pool<svm_allocator>::size_type sz)
|
|
557
|
-
{
|
|
558
|
-
return new pooled_svm(pool, sz);
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
// }}}
|
|
562
|
-
|
|
563
|
-
#endif
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
namespace {
|
|
568
|
-
template<class Wrapper>
|
|
569
|
-
void expose_memory_pool(Wrapper &wrapper)
|
|
570
|
-
{
|
|
571
|
-
typedef typename Wrapper::Type cls;
|
|
572
|
-
wrapper
|
|
573
|
-
.def_prop_ro("held_blocks", &cls::held_blocks)
|
|
574
|
-
.def_prop_ro("active_blocks", &cls::active_blocks)
|
|
575
|
-
.def_prop_ro("managed_bytes", &cls::managed_bytes)
|
|
576
|
-
.def_prop_ro("active_bytes", &cls::active_bytes)
|
|
577
|
-
.DEF_SIMPLE_METHOD(bin_number)
|
|
578
|
-
.DEF_SIMPLE_METHOD(alloc_size)
|
|
579
|
-
.DEF_SIMPLE_METHOD(free_held)
|
|
580
|
-
.DEF_SIMPLE_METHOD(stop_holding)
|
|
581
|
-
|
|
582
|
-
// undoc for now
|
|
583
|
-
.def("_set_trace", &cls::set_trace)
|
|
584
|
-
;
|
|
585
|
-
}
|
|
586
|
-
}
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
void pyopencl_expose_mempool(py::module_ &m)
|
|
592
|
-
{
|
|
593
|
-
m.def("bitlog2", pyopencl::bitlog2);
|
|
594
|
-
|
|
595
|
-
{
|
|
596
|
-
typedef pyopencl::buffer_allocator_base cls;
|
|
597
|
-
py::class_<cls> wrapper(m, "AllocatorBase");
|
|
598
|
-
wrapper
|
|
599
|
-
.def("__call__", pyopencl::allocate_from_buffer_allocator, py::arg("size"))
|
|
600
|
-
;
|
|
601
|
-
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
{
|
|
605
|
-
typedef pyopencl::memory_pool<pyopencl::test_allocator> cls;
|
|
606
|
-
|
|
607
|
-
py::class_<cls> wrapper(m, "_TestMemoryPool");
|
|
608
|
-
wrapper
|
|
609
|
-
.def("__init__",
|
|
610
|
-
[](cls *self, unsigned leading_bits_in_bin_id)
|
|
611
|
-
{
|
|
612
|
-
new (self) cls(
|
|
613
|
-
std::shared_ptr<pyopencl::test_allocator>(
|
|
614
|
-
new pyopencl::test_allocator()),
|
|
615
|
-
leading_bits_in_bin_id);
|
|
616
|
-
},
|
|
617
|
-
py::arg("leading_bits_in_bin_id")=4
|
|
618
|
-
)
|
|
619
|
-
.def("allocate", [](std::shared_ptr<cls> pool, cls::size_type sz)
|
|
620
|
-
{
|
|
621
|
-
pool->allocate(sz);
|
|
622
|
-
return py::none();
|
|
623
|
-
})
|
|
624
|
-
;
|
|
625
|
-
|
|
626
|
-
expose_memory_pool(wrapper);
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
{
|
|
630
|
-
typedef pyopencl::deferred_buffer_allocator cls;
|
|
631
|
-
py::class_<cls, pyopencl::buffer_allocator_base> wrapper(
|
|
632
|
-
m, "DeferredAllocator");
|
|
633
|
-
wrapper
|
|
634
|
-
.def(py::init<std::shared_ptr<pyopencl::context> const &>())
|
|
635
|
-
.def(py::init<
|
|
636
|
-
std::shared_ptr<pyopencl::context> const &,
|
|
637
|
-
cl_mem_flags>(),
|
|
638
|
-
py::arg("queue"), py::arg("mem_flags"))
|
|
639
|
-
;
|
|
640
|
-
}
|
|
641
|
-
|
|
642
|
-
{
|
|
643
|
-
typedef pyopencl::immediate_buffer_allocator cls;
|
|
644
|
-
py::class_<cls, pyopencl::buffer_allocator_base> wrapper(
|
|
645
|
-
m, "ImmediateAllocator");
|
|
646
|
-
wrapper
|
|
647
|
-
.def(py::init<pyopencl::command_queue &>())
|
|
648
|
-
.def(py::init<pyopencl::command_queue &, cl_mem_flags>(),
|
|
649
|
-
py::arg("queue"), py::arg("mem_flags"))
|
|
650
|
-
;
|
|
651
|
-
}
|
|
652
|
-
|
|
653
|
-
{
|
|
654
|
-
typedef pyopencl::pooled_buffer cls;
|
|
655
|
-
py::class_<cls, pyopencl::memory_object_holder>(m, "PooledBuffer")
|
|
656
|
-
.def("release", &cls::free)
|
|
657
|
-
|
|
658
|
-
.def("bind_to_queue", [](cls &self, pyopencl::command_queue &queue) { /* no-op */ })
|
|
659
|
-
.def("unbind_from_queue", [](cls &self) { /* no-op */ })
|
|
660
|
-
;
|
|
661
|
-
}
|
|
662
|
-
|
|
663
|
-
{
|
|
664
|
-
typedef pyopencl::memory_pool<pyopencl::buffer_allocator_base> cls;
|
|
665
|
-
|
|
666
|
-
py::class_<cls> wrapper( m, "MemoryPool");
|
|
667
|
-
wrapper
|
|
668
|
-
.def(py::init<std::shared_ptr<pyopencl::buffer_allocator_base>, unsigned>(),
|
|
669
|
-
py::arg("allocator"),
|
|
670
|
-
py::arg("leading_bits_in_bin_id")=4
|
|
671
|
-
)
|
|
672
|
-
.def("allocate", pyopencl::allocate_from_buffer_pool, py::arg("size"))
|
|
673
|
-
.def("__call__", pyopencl::allocate_from_buffer_pool, py::arg("size"))
|
|
674
|
-
;
|
|
675
|
-
|
|
676
|
-
expose_memory_pool(wrapper);
|
|
677
|
-
}
|
|
678
|
-
|
|
679
|
-
#if PYOPENCL_CL_VERSION >= 0x2000
|
|
680
|
-
{
|
|
681
|
-
typedef pyopencl::svm_allocator cls;
|
|
682
|
-
py::class_<cls> wrapper(m, "SVMAllocator");
|
|
683
|
-
wrapper
|
|
684
|
-
.def(py::init<std::shared_ptr<pyopencl::context> const &, cl_uint, cl_uint, pyopencl::command_queue *>(),
|
|
685
|
-
py::arg("context"),
|
|
686
|
-
/* py::kw_only(), */
|
|
687
|
-
py::arg("alignment")=0,
|
|
688
|
-
py::arg("flags")=CL_MEM_READ_WRITE,
|
|
689
|
-
py::arg("queue").none(true)=nullptr
|
|
690
|
-
)
|
|
691
|
-
.def("__call__", pyopencl::svm_allocator_call, py::arg("size"))
|
|
692
|
-
;
|
|
693
|
-
}
|
|
694
|
-
|
|
695
|
-
{
|
|
696
|
-
typedef pyopencl::pooled_svm cls;
|
|
697
|
-
py::class_<cls, pyopencl::svm_pointer>(m, "PooledSVM")
|
|
698
|
-
.def("release", &cls::free)
|
|
699
|
-
.def("enqueue_release", &cls::free)
|
|
700
|
-
.def("__eq__", [](const cls &self, const cls &other)
|
|
701
|
-
{ return self.svm_ptr() == other.svm_ptr(); })
|
|
702
|
-
.def("__hash__", [](cls &self) { return (intptr_t) self.svm_ptr(); })
|
|
703
|
-
.DEF_SIMPLE_METHOD(bind_to_queue)
|
|
704
|
-
.DEF_SIMPLE_METHOD(unbind_from_queue)
|
|
705
|
-
|
|
706
|
-
// only for diagnostic/debugging/testing purposes!
|
|
707
|
-
.def_prop_ro("_queue",
|
|
708
|
-
[](cls const &self) -> py::object
|
|
709
|
-
{
|
|
710
|
-
cl_command_queue queue = self.queue();
|
|
711
|
-
if (queue)
|
|
712
|
-
return py::cast(new pyopencl::command_queue(queue, true));
|
|
713
|
-
else
|
|
714
|
-
return py::none();
|
|
715
|
-
})
|
|
716
|
-
;
|
|
717
|
-
}
|
|
718
|
-
|
|
719
|
-
{
|
|
720
|
-
typedef pyopencl::memory_pool<pyopencl::svm_allocator> cls;
|
|
721
|
-
|
|
722
|
-
py::class_<cls> wrapper( m, "SVMPool");
|
|
723
|
-
wrapper
|
|
724
|
-
.def(py::init<std::shared_ptr<pyopencl::svm_allocator>, unsigned>(),
|
|
725
|
-
py::arg("allocator"),
|
|
726
|
-
/* py::kw_only(), */
|
|
727
|
-
py::arg("leading_bits_in_bin_id")=4
|
|
728
|
-
)
|
|
729
|
-
.def("__call__", pyopencl::allocate_from_svm_pool, py::arg("size"))
|
|
730
|
-
;
|
|
731
|
-
|
|
732
|
-
expose_memory_pool(wrapper);
|
|
733
|
-
}
|
|
734
|
-
|
|
735
|
-
#endif
|
|
736
|
-
}
|
|
737
|
-
|
|
738
|
-
// vim: foldmethod=marker
|