pyopencl 2024.2.2__cp311-cp311-macosx_11_0_arm64.whl → 2024.2.4__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +16 -4
- pyopencl/_cl.cpython-311-darwin.so +0 -0
- pyopencl/algorithm.py +3 -1
- pyopencl/bitonic_sort.py +2 -0
- pyopencl/characterize/__init__.py +23 -0
- pyopencl/compyte/.git +1 -0
- pyopencl/compyte/.github/workflows/autopush.yml +21 -0
- pyopencl/compyte/.github/workflows/ci.yml +30 -0
- pyopencl/compyte/.gitignore +21 -0
- pyopencl/compyte/ndarray/Makefile +31 -0
- pyopencl/compyte/ndarray/gpu_ndarray.h +35 -0
- pyopencl/compyte/ndarray/pygpu_language.h +207 -0
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +622 -0
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +317 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +1546 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.h +71 -0
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +232 -0
- pyopencl/compyte/setup.cfg +9 -0
- pyopencl/tools.py +60 -56
- pyopencl/version.py +7 -3
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.4.dist-info}/METADATA +14 -14
- pyopencl-2024.2.4.dist-info/RECORD +59 -0
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.4.dist-info}/WHEEL +1 -1
- pyopencl-2024.2.2.data/data/CITATION.cff +0 -74
- pyopencl-2024.2.2.data/data/CMakeLists.txt +0 -83
- pyopencl-2024.2.2.data/data/Makefile.in +0 -21
- pyopencl-2024.2.2.data/data/README.rst +0 -70
- pyopencl-2024.2.2.data/data/README_SETUP.txt +0 -34
- pyopencl-2024.2.2.data/data/aksetup_helper.py +0 -1013
- pyopencl-2024.2.2.data/data/configure.py +0 -6
- pyopencl-2024.2.2.data/data/contrib/cldis.py +0 -91
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/README +0 -29
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/translate.py +0 -1441
- pyopencl-2024.2.2.data/data/contrib/pyopencl.vim +0 -84
- pyopencl-2024.2.2.data/data/doc/Makefile +0 -23
- pyopencl-2024.2.2.data/data/doc/algorithm.rst +0 -214
- pyopencl-2024.2.2.data/data/doc/array.rst +0 -305
- pyopencl-2024.2.2.data/data/doc/conf.py +0 -26
- pyopencl-2024.2.2.data/data/doc/howto.rst +0 -105
- pyopencl-2024.2.2.data/data/doc/index.rst +0 -137
- pyopencl-2024.2.2.data/data/doc/make_constants.py +0 -561
- pyopencl-2024.2.2.data/data/doc/misc.rst +0 -885
- pyopencl-2024.2.2.data/data/doc/runtime.rst +0 -51
- pyopencl-2024.2.2.data/data/doc/runtime_const.rst +0 -30
- pyopencl-2024.2.2.data/data/doc/runtime_gl.rst +0 -78
- pyopencl-2024.2.2.data/data/doc/runtime_memory.rst +0 -527
- pyopencl-2024.2.2.data/data/doc/runtime_platform.rst +0 -184
- pyopencl-2024.2.2.data/data/doc/runtime_program.rst +0 -364
- pyopencl-2024.2.2.data/data/doc/runtime_queue.rst +0 -182
- pyopencl-2024.2.2.data/data/doc/subst.rst +0 -36
- pyopencl-2024.2.2.data/data/doc/tools.rst +0 -4
- pyopencl-2024.2.2.data/data/doc/types.rst +0 -42
- pyopencl-2024.2.2.data/data/examples/black-hole-accretion.py +0 -2227
- pyopencl-2024.2.2.data/data/examples/demo-struct-reduce.py +0 -75
- pyopencl-2024.2.2.data/data/examples/demo.py +0 -39
- pyopencl-2024.2.2.data/data/examples/demo_array.py +0 -32
- pyopencl-2024.2.2.data/data/examples/demo_array_svm.py +0 -37
- pyopencl-2024.2.2.data/data/examples/demo_elementwise.py +0 -34
- pyopencl-2024.2.2.data/data/examples/demo_elementwise_complex.py +0 -53
- pyopencl-2024.2.2.data/data/examples/demo_mandelbrot.py +0 -183
- pyopencl-2024.2.2.data/data/examples/demo_meta_codepy.py +0 -56
- pyopencl-2024.2.2.data/data/examples/demo_meta_template.py +0 -55
- pyopencl-2024.2.2.data/data/examples/dump-performance.py +0 -38
- pyopencl-2024.2.2.data/data/examples/dump-properties.py +0 -86
- pyopencl-2024.2.2.data/data/examples/gl_interop_demo.py +0 -84
- pyopencl-2024.2.2.data/data/examples/gl_particle_animation.py +0 -218
- pyopencl-2024.2.2.data/data/examples/ipython-demo.ipynb +0 -203
- pyopencl-2024.2.2.data/data/examples/median-filter.py +0 -99
- pyopencl-2024.2.2.data/data/examples/n-body.py +0 -1070
- pyopencl-2024.2.2.data/data/examples/narray.py +0 -37
- pyopencl-2024.2.2.data/data/examples/noisyImage.jpg +0 -0
- pyopencl-2024.2.2.data/data/examples/pi-monte-carlo.py +0 -1166
- pyopencl-2024.2.2.data/data/examples/svm.py +0 -82
- pyopencl-2024.2.2.data/data/examples/transpose.py +0 -229
- pyopencl-2024.2.2.data/data/pytest.ini +0 -3
- pyopencl-2024.2.2.data/data/src/bitlog.cpp +0 -51
- pyopencl-2024.2.2.data/data/src/bitlog.hpp +0 -83
- pyopencl-2024.2.2.data/data/src/clinfo_ext.h +0 -134
- pyopencl-2024.2.2.data/data/src/mempool.hpp +0 -444
- pyopencl-2024.2.2.data/data/src/pyopencl_ext.h +0 -77
- pyopencl-2024.2.2.data/data/src/tools.hpp +0 -90
- pyopencl-2024.2.2.data/data/src/wrap_cl.cpp +0 -61
- pyopencl-2024.2.2.data/data/src/wrap_cl.hpp +0 -5853
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_1.cpp +0 -369
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_2.cpp +0 -702
- pyopencl-2024.2.2.data/data/src/wrap_constants.cpp +0 -1274
- pyopencl-2024.2.2.data/data/src/wrap_helpers.hpp +0 -213
- pyopencl-2024.2.2.data/data/src/wrap_mempool.cpp +0 -738
- pyopencl-2024.2.2.data/data/test/add-vectors-32.spv +0 -0
- pyopencl-2024.2.2.data/data/test/add-vectors-64.spv +0 -0
- pyopencl-2024.2.2.data/data/test/empty-header.h +0 -1
- pyopencl-2024.2.2.data/data/test/test_algorithm.py +0 -1180
- pyopencl-2024.2.2.data/data/test/test_array.py +0 -2392
- pyopencl-2024.2.2.data/data/test/test_arrays_in_structs.py +0 -100
- pyopencl-2024.2.2.data/data/test/test_clmath.py +0 -529
- pyopencl-2024.2.2.data/data/test/test_clrandom.py +0 -75
- pyopencl-2024.2.2.data/data/test/test_enqueue_copy.py +0 -271
- pyopencl-2024.2.2.data/data/test/test_wrapper.py +0 -1565
- pyopencl-2024.2.2.dist-info/LICENSE +0 -282
- pyopencl-2024.2.2.dist-info/RECORD +0 -123
- pyopencl-2024.2.2.dist-info/top_level.txt +0 -1
- {pyopencl-2024.2.2.data/data → pyopencl-2024.2.4.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,444 +0,0 @@
|
|
|
1
|
-
// Abstract memory pool implementation
|
|
2
|
-
//
|
|
3
|
-
// Copyright (C) 2009-17 Andreas Kloeckner
|
|
4
|
-
//
|
|
5
|
-
// Permission is hereby granted, free of charge, to any person
|
|
6
|
-
// obtaining a copy of this software and associated documentation
|
|
7
|
-
// files (the "Software"), to deal in the Software without
|
|
8
|
-
// restriction, including without limitation the rights to use,
|
|
9
|
-
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
-
// copies of the Software, and to permit persons to whom the
|
|
11
|
-
// Software is furnished to do so, subject to the following
|
|
12
|
-
// conditions:
|
|
13
|
-
//
|
|
14
|
-
// The above copyright notice and this permission notice shall be
|
|
15
|
-
// included in all copies or substantial portions of the Software.
|
|
16
|
-
//
|
|
17
|
-
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
18
|
-
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
19
|
-
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20
|
-
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
21
|
-
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
22
|
-
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
23
|
-
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
24
|
-
// OTHER DEALINGS IN THE SOFTWARE.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
#ifndef _AFJDFJSDFSD_PYGPU_HEADER_SEEN_MEMPOOL_HPP
|
|
28
|
-
#define _AFJDFJSDFSD_PYGPU_HEADER_SEEN_MEMPOOL_HPP
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
#include <cassert>
|
|
32
|
-
#include <vector>
|
|
33
|
-
#include <map>
|
|
34
|
-
#include <memory>
|
|
35
|
-
#include <ostream>
|
|
36
|
-
#include <iostream>
|
|
37
|
-
#include "bitlog.hpp"
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
namespace PYGPU_PACKAGE
|
|
41
|
-
{
|
|
42
|
-
// https://stackoverflow.com/a/44175911
|
|
43
|
-
class mp_noncopyable {
|
|
44
|
-
public:
|
|
45
|
-
mp_noncopyable() = default;
|
|
46
|
-
~mp_noncopyable() = default;
|
|
47
|
-
|
|
48
|
-
private:
|
|
49
|
-
mp_noncopyable(const mp_noncopyable&) = delete;
|
|
50
|
-
mp_noncopyable& operator=(const mp_noncopyable&) = delete;
|
|
51
|
-
};
|
|
52
|
-
|
|
53
|
-
#ifdef PYGPU_PYCUDA
|
|
54
|
-
#define PYGPU_SHARED_PTR boost::shared_ptr
|
|
55
|
-
#else
|
|
56
|
-
#define PYGPU_SHARED_PTR std::shared_ptr
|
|
57
|
-
#endif
|
|
58
|
-
|
|
59
|
-
template <class T>
|
|
60
|
-
inline T signed_left_shift(T x, signed shift_amount)
|
|
61
|
-
{
|
|
62
|
-
if (shift_amount < 0)
|
|
63
|
-
return x >> -shift_amount;
|
|
64
|
-
else
|
|
65
|
-
return x << shift_amount;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
template <class T>
|
|
72
|
-
inline T signed_right_shift(T x, signed shift_amount)
|
|
73
|
-
{
|
|
74
|
-
if (shift_amount < 0)
|
|
75
|
-
return x << -shift_amount;
|
|
76
|
-
else
|
|
77
|
-
return x >> shift_amount;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
#define always_assert(cond) \
|
|
84
|
-
do { \
|
|
85
|
-
if (!(cond)) \
|
|
86
|
-
throw std::logic_error("mem pool assertion violated: " #cond); \
|
|
87
|
-
} while (false);
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
template<class Allocator>
|
|
91
|
-
class memory_pool : mp_noncopyable
|
|
92
|
-
{
|
|
93
|
-
public:
|
|
94
|
-
typedef typename Allocator::pointer_type pointer_type;
|
|
95
|
-
typedef typename Allocator::size_type size_type;
|
|
96
|
-
|
|
97
|
-
private:
|
|
98
|
-
typedef uint32_t bin_nr_t;
|
|
99
|
-
typedef std::vector<pointer_type> bin_t;
|
|
100
|
-
|
|
101
|
-
typedef std::map<bin_nr_t, bin_t> container_t;
|
|
102
|
-
container_t m_container;
|
|
103
|
-
typedef typename container_t::value_type bin_pair_t;
|
|
104
|
-
|
|
105
|
-
std::shared_ptr<Allocator> m_allocator;
|
|
106
|
-
|
|
107
|
-
// A held block is one that's been released by the application, but that
|
|
108
|
-
// we are keeping around to dish out again.
|
|
109
|
-
size_type m_held_blocks;
|
|
110
|
-
|
|
111
|
-
// An active block is one that is in use by the application.
|
|
112
|
-
size_type m_active_blocks;
|
|
113
|
-
|
|
114
|
-
// "Managed" memory is "active" and "held" memory.
|
|
115
|
-
size_type m_managed_bytes;
|
|
116
|
-
|
|
117
|
-
// "Active" bytes are bytes under the control of the application.
|
|
118
|
-
// This may be smaller than the actual allocated size reflected
|
|
119
|
-
// in m_managed_bytes.
|
|
120
|
-
size_type m_active_bytes;
|
|
121
|
-
|
|
122
|
-
bool m_stop_holding;
|
|
123
|
-
int m_trace;
|
|
124
|
-
|
|
125
|
-
unsigned m_leading_bits_in_bin_id;
|
|
126
|
-
|
|
127
|
-
public:
|
|
128
|
-
memory_pool(std::shared_ptr<Allocator> alloc, unsigned leading_bits_in_bin_id=4)
|
|
129
|
-
: m_allocator(alloc),
|
|
130
|
-
m_held_blocks(0), m_active_blocks(0),
|
|
131
|
-
m_managed_bytes(0), m_active_bytes(0),
|
|
132
|
-
m_stop_holding(false),
|
|
133
|
-
m_trace(false), m_leading_bits_in_bin_id(leading_bits_in_bin_id)
|
|
134
|
-
{
|
|
135
|
-
if (m_allocator->is_deferred())
|
|
136
|
-
{
|
|
137
|
-
PyErr_WarnEx(PyExc_UserWarning, "Memory pools expect non-deferred "
|
|
138
|
-
"semantics from their allocators. You passed a deferred "
|
|
139
|
-
"allocator, i.e. an allocator whose allocations can turn out to "
|
|
140
|
-
"be unavailable long after allocation.", 1);
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
virtual ~memory_pool()
|
|
145
|
-
{ free_held(); }
|
|
146
|
-
|
|
147
|
-
private:
|
|
148
|
-
unsigned mantissa_mask() const
|
|
149
|
-
{
|
|
150
|
-
return (1 << m_leading_bits_in_bin_id) - 1;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
public:
|
|
154
|
-
bin_nr_t bin_number(size_type size)
|
|
155
|
-
{
|
|
156
|
-
signed l = bitlog2(size);
|
|
157
|
-
size_type shifted = signed_right_shift(size, l-signed(m_leading_bits_in_bin_id));
|
|
158
|
-
if (size && (shifted & (1 << m_leading_bits_in_bin_id)) == 0)
|
|
159
|
-
throw std::runtime_error("memory_pool::bin_number: bitlog2 fault");
|
|
160
|
-
size_type chopped = shifted & mantissa_mask();
|
|
161
|
-
return l << m_leading_bits_in_bin_id | chopped;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
void set_trace(bool flag)
|
|
165
|
-
{
|
|
166
|
-
if (flag)
|
|
167
|
-
++m_trace;
|
|
168
|
-
else
|
|
169
|
-
--m_trace;
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
size_type alloc_size(bin_nr_t bin)
|
|
173
|
-
{
|
|
174
|
-
bin_nr_t exponent = bin >> m_leading_bits_in_bin_id;
|
|
175
|
-
bin_nr_t mantissa = bin & mantissa_mask();
|
|
176
|
-
|
|
177
|
-
size_type ones = signed_left_shift((size_type) 1,
|
|
178
|
-
signed(exponent)-signed(m_leading_bits_in_bin_id)
|
|
179
|
-
);
|
|
180
|
-
if (ones) ones -= 1;
|
|
181
|
-
|
|
182
|
-
size_type head = signed_left_shift(
|
|
183
|
-
(size_type) ((1<<m_leading_bits_in_bin_id) | mantissa),
|
|
184
|
-
signed(exponent)-signed(m_leading_bits_in_bin_id));
|
|
185
|
-
if (ones & head)
|
|
186
|
-
throw std::runtime_error("memory_pool::alloc_size: bit-counting fault");
|
|
187
|
-
return head | ones;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
protected:
|
|
191
|
-
bin_t &get_bin(bin_nr_t bin_nr)
|
|
192
|
-
{
|
|
193
|
-
typename container_t::iterator it = m_container.find(bin_nr);
|
|
194
|
-
if (it == m_container.end())
|
|
195
|
-
{
|
|
196
|
-
auto it_and_inserted = m_container.insert(std::make_pair(bin_nr, bin_t()));
|
|
197
|
-
assert(it_and_inserted.second);
|
|
198
|
-
return it_and_inserted.first->second;
|
|
199
|
-
}
|
|
200
|
-
else
|
|
201
|
-
return it->second;
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
void inc_held_blocks()
|
|
205
|
-
{
|
|
206
|
-
if (m_held_blocks == 0)
|
|
207
|
-
start_holding_blocks();
|
|
208
|
-
++m_held_blocks;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
void dec_held_blocks()
|
|
212
|
-
{
|
|
213
|
-
--m_held_blocks;
|
|
214
|
-
if (m_held_blocks == 0)
|
|
215
|
-
stop_holding_blocks();
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
virtual void start_holding_blocks()
|
|
219
|
-
{ }
|
|
220
|
-
|
|
221
|
-
virtual void stop_holding_blocks()
|
|
222
|
-
{ }
|
|
223
|
-
|
|
224
|
-
public:
|
|
225
|
-
pointer_type allocate(size_type size)
|
|
226
|
-
{
|
|
227
|
-
bin_nr_t bin_nr = bin_number(size);
|
|
228
|
-
bin_t &bin = get_bin(bin_nr);
|
|
229
|
-
|
|
230
|
-
if (bin.size())
|
|
231
|
-
{
|
|
232
|
-
if (m_trace)
|
|
233
|
-
std::cout
|
|
234
|
-
<< "[pool] allocation of size " << size << " served from bin " << bin_nr
|
|
235
|
-
<< " which contained " << bin.size() << " entries" << std::endl;
|
|
236
|
-
return m_allocator->hand_out_existing_block(
|
|
237
|
-
pop_block_from_bin(bin, size));
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
size_type alloc_sz = alloc_size(bin_nr);
|
|
241
|
-
|
|
242
|
-
always_assert(bin_number(alloc_sz) == bin_nr);
|
|
243
|
-
always_assert(alloc_sz >= size);
|
|
244
|
-
|
|
245
|
-
if (m_trace)
|
|
246
|
-
std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl;
|
|
247
|
-
|
|
248
|
-
try { return get_from_allocator(alloc_sz, size); }
|
|
249
|
-
catch (PYGPU_PACKAGE::error &e)
|
|
250
|
-
{
|
|
251
|
-
if (!e.is_out_of_memory())
|
|
252
|
-
throw;
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
if (m_trace)
|
|
256
|
-
std::cout << "[pool] allocation triggered OOM, running GC" << std::endl;
|
|
257
|
-
|
|
258
|
-
m_allocator->try_release_blocks();
|
|
259
|
-
if (bin.size())
|
|
260
|
-
return m_allocator->hand_out_existing_block(
|
|
261
|
-
pop_block_from_bin(bin, size));
|
|
262
|
-
|
|
263
|
-
if (m_trace)
|
|
264
|
-
std::cout << "[pool] allocation still OOM after GC" << std::endl;
|
|
265
|
-
|
|
266
|
-
while (try_to_free_memory())
|
|
267
|
-
{
|
|
268
|
-
try { return get_from_allocator(alloc_sz, size); }
|
|
269
|
-
catch (PYGPU_PACKAGE::error &e)
|
|
270
|
-
{
|
|
271
|
-
if (!e.is_out_of_memory())
|
|
272
|
-
throw;
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
throw PYGPU_PACKAGE::error(
|
|
277
|
-
"memory_pool::allocate",
|
|
278
|
-
#ifdef PYGPU_PYCUDA
|
|
279
|
-
CUDA_ERROR_OUT_OF_MEMORY,
|
|
280
|
-
#endif
|
|
281
|
-
#ifdef PYGPU_PYOPENCL
|
|
282
|
-
CL_MEM_OBJECT_ALLOCATION_FAILURE,
|
|
283
|
-
#endif
|
|
284
|
-
"failed to free memory for allocation");
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
void free(pointer_type &&p, size_type size)
|
|
288
|
-
{
|
|
289
|
-
--m_active_blocks;
|
|
290
|
-
m_active_bytes -= size;
|
|
291
|
-
bin_nr_t bin_nr = bin_number(size);
|
|
292
|
-
|
|
293
|
-
if (!m_stop_holding)
|
|
294
|
-
{
|
|
295
|
-
inc_held_blocks();
|
|
296
|
-
get_bin(bin_nr).push_back(std::move(p));
|
|
297
|
-
|
|
298
|
-
if (m_trace)
|
|
299
|
-
std::cout << "[pool] block of size " << size << " returned to bin "
|
|
300
|
-
<< bin_nr << " which now contains " << get_bin(bin_nr).size()
|
|
301
|
-
<< " entries" << std::endl;
|
|
302
|
-
}
|
|
303
|
-
else
|
|
304
|
-
{
|
|
305
|
-
m_allocator->free(std::move(p));
|
|
306
|
-
m_managed_bytes -= alloc_size(bin_nr);
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
void free_held()
|
|
311
|
-
{
|
|
312
|
-
for (bin_pair_t &bin_pair: m_container)
|
|
313
|
-
{
|
|
314
|
-
bin_t &bin = bin_pair.second;
|
|
315
|
-
|
|
316
|
-
while (bin.size())
|
|
317
|
-
{
|
|
318
|
-
m_allocator->free(std::move(bin.back()));
|
|
319
|
-
m_managed_bytes -= alloc_size(bin_pair.first);
|
|
320
|
-
bin.pop_back();
|
|
321
|
-
|
|
322
|
-
dec_held_blocks();
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
assert(m_held_blocks == 0);
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
void stop_holding()
|
|
330
|
-
{
|
|
331
|
-
m_stop_holding = true;
|
|
332
|
-
free_held();
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
size_type active_blocks() const
|
|
336
|
-
{ return m_active_blocks; }
|
|
337
|
-
|
|
338
|
-
size_type held_blocks() const
|
|
339
|
-
{ return m_held_blocks; }
|
|
340
|
-
|
|
341
|
-
size_type managed_bytes() const
|
|
342
|
-
{ return m_managed_bytes; }
|
|
343
|
-
|
|
344
|
-
size_type active_bytes() const
|
|
345
|
-
{ return m_active_bytes; }
|
|
346
|
-
|
|
347
|
-
bool try_to_free_memory()
|
|
348
|
-
{
|
|
349
|
-
// free largest stuff first
|
|
350
|
-
for (typename container_t::reverse_iterator it = m_container.rbegin();
|
|
351
|
-
it != m_container.rend(); ++it)
|
|
352
|
-
{
|
|
353
|
-
bin_pair_t &bin_pair = *it;
|
|
354
|
-
bin_t &bin = bin_pair.second;
|
|
355
|
-
|
|
356
|
-
if (bin.size())
|
|
357
|
-
{
|
|
358
|
-
m_allocator->free(std::move(bin.back()));
|
|
359
|
-
m_managed_bytes -= alloc_size(bin_pair.first);
|
|
360
|
-
bin.pop_back();
|
|
361
|
-
|
|
362
|
-
dec_held_blocks();
|
|
363
|
-
|
|
364
|
-
return true;
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
return false;
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
private:
|
|
372
|
-
pointer_type get_from_allocator(size_type alloc_sz, size_type size)
|
|
373
|
-
{
|
|
374
|
-
pointer_type result = m_allocator->allocate(alloc_sz);
|
|
375
|
-
++m_active_blocks;
|
|
376
|
-
m_managed_bytes += alloc_sz;
|
|
377
|
-
m_active_bytes += size;
|
|
378
|
-
|
|
379
|
-
return result;
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
pointer_type pop_block_from_bin(bin_t &bin, size_type size)
|
|
383
|
-
{
|
|
384
|
-
pointer_type result(std::move(bin.back()));
|
|
385
|
-
bin.pop_back();
|
|
386
|
-
|
|
387
|
-
dec_held_blocks();
|
|
388
|
-
++m_active_blocks;
|
|
389
|
-
m_active_bytes += size;
|
|
390
|
-
|
|
391
|
-
return result;
|
|
392
|
-
}
|
|
393
|
-
};
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
template <class Pool>
|
|
397
|
-
class pooled_allocation : public mp_noncopyable
|
|
398
|
-
{
|
|
399
|
-
public:
|
|
400
|
-
typedef Pool pool_type;
|
|
401
|
-
typedef typename Pool::pointer_type pointer_type;
|
|
402
|
-
typedef typename Pool::size_type size_type;
|
|
403
|
-
|
|
404
|
-
protected:
|
|
405
|
-
PYGPU_SHARED_PTR<pool_type> m_pool;
|
|
406
|
-
|
|
407
|
-
pointer_type m_ptr;
|
|
408
|
-
size_type m_size;
|
|
409
|
-
bool m_valid;
|
|
410
|
-
|
|
411
|
-
public:
|
|
412
|
-
pooled_allocation(PYGPU_SHARED_PTR<pool_type> p, size_type size)
|
|
413
|
-
: m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true)
|
|
414
|
-
{ }
|
|
415
|
-
|
|
416
|
-
~pooled_allocation()
|
|
417
|
-
{
|
|
418
|
-
if (m_valid)
|
|
419
|
-
free();
|
|
420
|
-
}
|
|
421
|
-
|
|
422
|
-
void free()
|
|
423
|
-
{
|
|
424
|
-
if (m_valid)
|
|
425
|
-
{
|
|
426
|
-
m_pool->free(std::move(m_ptr), m_size);
|
|
427
|
-
m_valid = false;
|
|
428
|
-
}
|
|
429
|
-
else
|
|
430
|
-
throw PYGPU_PACKAGE::error(
|
|
431
|
-
"pooled_device_allocation::free",
|
|
432
|
-
#ifdef PYGPU_PYCUDA
|
|
433
|
-
CUDA_ERROR_INVALID_HANDLE
|
|
434
|
-
#endif
|
|
435
|
-
#ifdef PYGPU_PYOPENCL
|
|
436
|
-
CL_INVALID_VALUE
|
|
437
|
-
#endif
|
|
438
|
-
);
|
|
439
|
-
}
|
|
440
|
-
};
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
#endif
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
#ifndef _PYOPENCL_EXT_H
|
|
2
|
-
#define _PYOPENCL_EXT_H
|
|
3
|
-
|
|
4
|
-
#ifdef PYOPENCL_USE_SHIPPED_EXT
|
|
5
|
-
|
|
6
|
-
#include "clinfo_ext.h"
|
|
7
|
-
|
|
8
|
-
#else
|
|
9
|
-
|
|
10
|
-
#if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H))
|
|
11
|
-
|
|
12
|
-
#include <OpenCL/opencl.h>
|
|
13
|
-
|
|
14
|
-
#else
|
|
15
|
-
|
|
16
|
-
#include <CL/cl.h>
|
|
17
|
-
#include <CL/cl_ext.h>
|
|
18
|
-
|
|
19
|
-
#endif
|
|
20
|
-
|
|
21
|
-
#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
|
|
22
|
-
#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1
|
|
23
|
-
|
|
24
|
-
typedef union
|
|
25
|
-
{
|
|
26
|
-
struct { cl_uint type; cl_uint data[5]; } raw;
|
|
27
|
-
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
|
|
28
|
-
} cl_device_topology_amd;
|
|
29
|
-
#endif
|
|
30
|
-
|
|
31
|
-
#ifndef CL_DEVICE_P2P_DEVICES_AMD
|
|
32
|
-
#define CL_DEVICE_P2P_DEVICES_AMD 0x4089
|
|
33
|
-
|
|
34
|
-
typedef CL_API_ENTRY cl_int
|
|
35
|
-
(CL_API_CALL * clEnqueueCopyBufferP2PAMD_fn)(cl_command_queue /*command_queue*/,
|
|
36
|
-
cl_mem /*src_buffer*/,
|
|
37
|
-
cl_mem /*dst_buffer*/,
|
|
38
|
-
size_t /*src_offset*/,
|
|
39
|
-
size_t /*dst_offset*/,
|
|
40
|
-
size_t /*cb*/,
|
|
41
|
-
cl_uint /*num_events_in_wait_list*/,
|
|
42
|
-
const cl_event* /*event_wait_list*/,
|
|
43
|
-
cl_event* /*event*/);
|
|
44
|
-
#endif
|
|
45
|
-
|
|
46
|
-
/* {{{ these NV defines are often missing from the system headers */
|
|
47
|
-
|
|
48
|
-
#ifndef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV
|
|
49
|
-
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
|
50
|
-
#endif
|
|
51
|
-
#ifndef CL_DEVICE_INTEGRATED_MEMORY_NV
|
|
52
|
-
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
|
53
|
-
#endif
|
|
54
|
-
|
|
55
|
-
#ifndef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV
|
|
56
|
-
#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007
|
|
57
|
-
#endif
|
|
58
|
-
|
|
59
|
-
#ifndef CL_DEVICE_PCI_BUS_ID_NV
|
|
60
|
-
#define CL_DEVICE_PCI_BUS_ID_NV 0x4008
|
|
61
|
-
#endif
|
|
62
|
-
|
|
63
|
-
#ifndef CL_DEVICE_PCI_SLOT_ID_NV
|
|
64
|
-
#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009
|
|
65
|
-
#endif
|
|
66
|
-
|
|
67
|
-
#ifndef CL_DEVICE_PCI_DOMAIN_ID_NV
|
|
68
|
-
#define CL_DEVICE_PCI_DOMAIN_ID_NV 0x400A
|
|
69
|
-
#endif
|
|
70
|
-
|
|
71
|
-
/* }}} */
|
|
72
|
-
|
|
73
|
-
#endif
|
|
74
|
-
|
|
75
|
-
#endif
|
|
76
|
-
|
|
77
|
-
/* vim: foldmethod=marker */
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
// Various odds and ends
|
|
2
|
-
//
|
|
3
|
-
// Copyright (C) 2009 Andreas Kloeckner
|
|
4
|
-
//
|
|
5
|
-
// Permission is hereby granted, free of charge, to any person
|
|
6
|
-
// obtaining a copy of this software and associated documentation
|
|
7
|
-
// files (the "Software"), to deal in the Software without
|
|
8
|
-
// restriction, including without limitation the rights to use,
|
|
9
|
-
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
-
// copies of the Software, and to permit persons to whom the
|
|
11
|
-
// Software is furnished to do so, subject to the following
|
|
12
|
-
// conditions:
|
|
13
|
-
//
|
|
14
|
-
// The above copyright notice and this permission notice shall be
|
|
15
|
-
// included in all copies or substantial portions of the Software.
|
|
16
|
-
//
|
|
17
|
-
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
18
|
-
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
19
|
-
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20
|
-
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
21
|
-
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
22
|
-
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
23
|
-
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
24
|
-
// OTHER DEALINGS IN THE SOFTWARE.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
#ifndef _ASDFDAFVVAFF_PYCUDA_HEADER_SEEN_TOOLS_HPP
|
|
28
|
-
#define _ASDFDAFVVAFF_PYCUDA_HEADER_SEEN_TOOLS_HPP
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
#include <nanobind/nanobind.h>
|
|
32
|
-
|
|
33
|
-
#include <numeric>
|
|
34
|
-
#include <numpy/arrayobject.h>
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
namespace pyopencl
|
|
39
|
-
{
|
|
40
|
-
inline
|
|
41
|
-
npy_intp size_from_dims(int ndim, const npy_intp *dims)
|
|
42
|
-
{
|
|
43
|
-
if (ndim != 0)
|
|
44
|
-
return std::accumulate(dims, dims+ndim, 1, std::multiplies<npy_intp>());
|
|
45
|
-
else
|
|
46
|
-
return 1;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
inline void run_python_gc()
|
|
53
|
-
{
|
|
54
|
-
namespace py = nanobind;
|
|
55
|
-
|
|
56
|
-
py::module_::import_("gc").attr("collect")();
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
// https://stackoverflow.com/a/28139075
|
|
61
|
-
template <typename T>
|
|
62
|
-
struct reversion_wrapper { T& iterable; };
|
|
63
|
-
|
|
64
|
-
template <typename T>
|
|
65
|
-
auto begin (reversion_wrapper<T> w) { return w.iterable.rbegin(); }
|
|
66
|
-
|
|
67
|
-
template <typename T>
|
|
68
|
-
auto end (reversion_wrapper<T> w) { return w.iterable.rend(); }
|
|
69
|
-
|
|
70
|
-
template <typename T>
|
|
71
|
-
reversion_wrapper<T> reverse (T&& iterable) { return { iterable }; }
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
// https://stackoverflow.com/a/44175911
|
|
75
|
-
class noncopyable {
|
|
76
|
-
public:
|
|
77
|
-
noncopyable() = default;
|
|
78
|
-
~noncopyable() = default;
|
|
79
|
-
|
|
80
|
-
private:
|
|
81
|
-
noncopyable(const noncopyable&) = delete;
|
|
82
|
-
noncopyable& operator=(const noncopyable&) = delete;
|
|
83
|
-
};
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
#endif
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
// PyOpenCL-flavored C++ wrapper of the CL API
|
|
2
|
-
//
|
|
3
|
-
// Copyright (C) 2009 Andreas Kloeckner
|
|
4
|
-
//
|
|
5
|
-
// Permission is hereby granted, free of charge, to any person
|
|
6
|
-
// obtaining a copy of this software and associated documentation
|
|
7
|
-
// files (the "Software"), to deal in the Software without
|
|
8
|
-
// restriction, including without limitation the rights to use,
|
|
9
|
-
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
-
// copies of the Software, and to permit persons to whom the
|
|
11
|
-
// Software is furnished to do so, subject to the following
|
|
12
|
-
// conditions:
|
|
13
|
-
//
|
|
14
|
-
// The above copyright notice and this permission notice shall be
|
|
15
|
-
// included in all copies or substantial portions of the Software.
|
|
16
|
-
//
|
|
17
|
-
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
18
|
-
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
19
|
-
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20
|
-
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
21
|
-
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
22
|
-
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
23
|
-
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
24
|
-
// OTHER DEALINGS IN THE SOFTWARE.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
#define PY_ARRAY_UNIQUE_SYMBOL pyopencl_ARRAY_API
|
|
28
|
-
|
|
29
|
-
#include "wrap_cl.hpp"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
using namespace pyopencl;
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
extern void pyopencl_expose_constants(py::module_ &m);
|
|
40
|
-
extern void pyopencl_expose_part_1(py::module_ &m);
|
|
41
|
-
extern void pyopencl_expose_part_2(py::module_ &m);
|
|
42
|
-
extern void pyopencl_expose_mempool(py::module_ &m);
|
|
43
|
-
|
|
44
|
-
static bool import_numpy_helper()
|
|
45
|
-
{
|
|
46
|
-
import_array1(false);
|
|
47
|
-
return true;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
NB_MODULE(_cl, m)
|
|
51
|
-
{
|
|
52
|
-
if (!import_numpy_helper())
|
|
53
|
-
throw py::python_error();
|
|
54
|
-
|
|
55
|
-
pyopencl_expose_constants(m);
|
|
56
|
-
pyopencl_expose_part_1(m);
|
|
57
|
-
pyopencl_expose_part_2(m);
|
|
58
|
-
pyopencl_expose_mempool(m);
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
// vim: foldmethod=marker
|