pyopencl 2024.2.7__cp311-cp311-win_amd64.whl → 2024.3__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +127 -122
- pyopencl/_cl.cp311-win_amd64.pyd +0 -0
- pyopencl/_mymako.py +3 -3
- pyopencl/algorithm.py +10 -7
- pyopencl/array.py +50 -40
- pyopencl/bitonic_sort.py +3 -1
- pyopencl/bitonic_sort_templates.py +1 -1
- pyopencl/cache.py +23 -22
- pyopencl/capture_call.py +5 -4
- pyopencl/clrandom.py +1 -0
- pyopencl/compyte/dtypes.py +4 -4
- pyopencl/compyte/pyproject.toml +54 -0
- pyopencl/elementwise.py +9 -2
- pyopencl/invoker.py +11 -9
- pyopencl/ipython_ext.py +1 -1
- pyopencl/reduction.py +16 -10
- pyopencl/scan.py +38 -22
- pyopencl/tools.py +23 -13
- {pyopencl-2024.2.7.dist-info → pyopencl-2024.3.dist-info}/METADATA +11 -8
- pyopencl-2024.3.dist-info/RECORD +42 -0
- {pyopencl-2024.2.7.dist-info → pyopencl-2024.3.dist-info}/WHEEL +1 -1
- pyopencl/compyte/.git +0 -1
- pyopencl/compyte/ndarray/Makefile +0 -31
- pyopencl/compyte/ndarray/__init__.py +0 -0
- pyopencl/compyte/ndarray/gen_elemwise.py +0 -1907
- pyopencl/compyte/ndarray/gen_reduction.py +0 -1511
- pyopencl/compyte/ndarray/gpu_ndarray.h +0 -35
- pyopencl/compyte/ndarray/pygpu_language.h +0 -207
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +0 -622
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +0 -317
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +0 -1546
- pyopencl/compyte/ndarray/pygpu_ndarray.h +0 -71
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +0 -232
- pyopencl/compyte/ndarray/setup_opencl.py +0 -101
- pyopencl/compyte/ndarray/test_gpu_elemwise.py +0 -411
- pyopencl/compyte/ndarray/test_gpu_ndarray.py +0 -487
- pyopencl-2024.2.7.dist-info/RECORD +0 -56
- {pyopencl-2024.2.7.dist-info → pyopencl-2024.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,317 +0,0 @@
|
|
|
1
|
-
#include <sys/types.h>
|
|
2
|
-
#include <assert.h>
|
|
3
|
-
#include <stdio.h>
|
|
4
|
-
|
|
5
|
-
#include <pygpu_ndarray_object.h>
|
|
6
|
-
#include <pygpu_language.h>
|
|
7
|
-
|
|
8
|
-
#ifdef __APPLE__
|
|
9
|
-
|
|
10
|
-
#include <OpenCL/opencl.h>
|
|
11
|
-
|
|
12
|
-
#else
|
|
13
|
-
|
|
14
|
-
#include <CL/opencl.h>
|
|
15
|
-
|
|
16
|
-
#endif
|
|
17
|
-
|
|
18
|
-
cl_context ctx = NULL;
|
|
19
|
-
cl_device_id dev;
|
|
20
|
-
cl_command_queue q;
|
|
21
|
-
|
|
22
|
-
void setup_context(cl_context c);
|
|
23
|
-
|
|
24
|
-
static void
|
|
25
|
-
init_context(void)
|
|
26
|
-
{
|
|
27
|
-
cl_int err;
|
|
28
|
-
cl_uint n;
|
|
29
|
-
cl_platform_id *plats;
|
|
30
|
-
cl_context_properties props[3];
|
|
31
|
-
cl_context c;
|
|
32
|
-
|
|
33
|
-
if (ctx != NULL) return;
|
|
34
|
-
|
|
35
|
-
err = clGetPlatformIDs(0, NULL, &n);
|
|
36
|
-
if (err != CL_SUCCESS) return;
|
|
37
|
-
|
|
38
|
-
plats = (cl_platform_id *)calloc(n, sizeof(cl_platform_id));
|
|
39
|
-
if (plats == NULL) return;
|
|
40
|
-
|
|
41
|
-
err = clGetPlatformIDs(n, plats, NULL);
|
|
42
|
-
if (err != CL_SUCCESS) goto fail_id;
|
|
43
|
-
|
|
44
|
-
props[0] = CL_CONTEXT_PLATFORM;
|
|
45
|
-
props[1] = (cl_context_properties)plats[0];
|
|
46
|
-
props[2] = 0;
|
|
47
|
-
|
|
48
|
-
c = clCreateContextFromType(props, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
|
|
49
|
-
if (err != CL_SUCCESS) {
|
|
50
|
-
fprintf(stderr, "Could not create context, will fail later (%d)!\n", err);
|
|
51
|
-
/* error - error - error */
|
|
52
|
-
/* but we do nothing */
|
|
53
|
-
goto fail_id;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
free(plats);
|
|
57
|
-
|
|
58
|
-
setup_context(c);
|
|
59
|
-
clReleaseContext(c);
|
|
60
|
-
|
|
61
|
-
return;
|
|
62
|
-
fail_id:
|
|
63
|
-
free(plats);
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
void
|
|
67
|
-
setup_context(cl_context c) {
|
|
68
|
-
cl_int err;
|
|
69
|
-
cl_device_id *devs;
|
|
70
|
-
size_t sz;
|
|
71
|
-
|
|
72
|
-
if (ctx != NULL) {
|
|
73
|
-
clReleaseContext(ctx);
|
|
74
|
-
clReleaseCommandQueue(q);
|
|
75
|
-
}
|
|
76
|
-
ctx = c;
|
|
77
|
-
clRetainContext(ctx);
|
|
78
|
-
|
|
79
|
-
err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL, &sz);
|
|
80
|
-
if (err != CL_SUCCESS) {
|
|
81
|
-
fprintf(stderr, "clGetContextInfo = %d\n", err);
|
|
82
|
-
goto fail;
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
devs = (cl_device_id *)malloc(sz);
|
|
86
|
-
if (devs == NULL) goto fail;
|
|
87
|
-
|
|
88
|
-
err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, sz, devs, NULL);
|
|
89
|
-
if (err != CL_SUCCESS) goto fail_dev;
|
|
90
|
-
|
|
91
|
-
dev = devs[0];
|
|
92
|
-
free(devs);
|
|
93
|
-
|
|
94
|
-
q = clCreateCommandQueue(ctx, dev, NULL, &err);
|
|
95
|
-
if (err != CL_SUCCESS) {
|
|
96
|
-
fprintf(stderr, "clCreateCommandQueue = %d", err);
|
|
97
|
-
goto fail;
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
return;
|
|
101
|
-
fail_dev:
|
|
102
|
-
free(devs);
|
|
103
|
-
fail:
|
|
104
|
-
clReleaseContext(ctx);
|
|
105
|
-
ctx = NULL;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
void *
|
|
109
|
-
device_malloc(size_t size)
|
|
110
|
-
{
|
|
111
|
-
cl_int err;
|
|
112
|
-
cl_mem res;
|
|
113
|
-
|
|
114
|
-
init_context();
|
|
115
|
-
|
|
116
|
-
DPRINTF("malloc size = %zu\n", size);
|
|
117
|
-
|
|
118
|
-
/* OpenCL devices do not always support byte-addressable storage
|
|
119
|
-
therefore make sure we have at least 4 bytes in buffers */
|
|
120
|
-
if (size < 4) size = 4;
|
|
121
|
-
|
|
122
|
-
res = clCreateBuffer(ctx, CL_MEM_READ_WRITE, size, NULL, &err);
|
|
123
|
-
if (err != CL_SUCCESS) {
|
|
124
|
-
PyErr_Format(PyExc_MemoryError, "Could not allocate device memory (%d)", err);
|
|
125
|
-
return NULL;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
return res;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
int
|
|
132
|
-
device_free(void * ptr)
|
|
133
|
-
{
|
|
134
|
-
cl_int err;
|
|
135
|
-
|
|
136
|
-
if ((err = clReleaseMemObject((cl_mem)ptr)) != CL_SUCCESS) {
|
|
137
|
-
PyErr_Format(PyExc_MemoryError, "Could not free device memory (%d)", err);
|
|
138
|
-
return -1;
|
|
139
|
-
}
|
|
140
|
-
return 0;
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
int
|
|
144
|
-
PyGpuNdArray_CopyFromPyGpuNdArray(PyGpuNdArrayObject * self,
|
|
145
|
-
PyGpuNdArrayObject * other,
|
|
146
|
-
bool unbroadcast)
|
|
147
|
-
{
|
|
148
|
-
size_t size = 1;
|
|
149
|
-
cl_event ev;
|
|
150
|
-
cl_int err;
|
|
151
|
-
|
|
152
|
-
assert(PyGpuNdArray_TYPE(self) == PyGpuNdArray_TYPE(other));
|
|
153
|
-
assert(PyGpuNdArray_ISWRITEABLE(self));
|
|
154
|
-
if (PyGpuNdArray_NDIM(self) == -1) {
|
|
155
|
-
PyErr_SetString(PyExc_TypeError, "can't copy into un-initialized PyGpuN\
|
|
156
|
-
dArrayObject");
|
|
157
|
-
return -1;
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
if (!(PyGpuNdArray_ISONESEGMENT(self) && PyGpuNdArray_ISONESEGMENT(other))) {
|
|
161
|
-
PyErr_Format(PyExc_NotImplementedError, "PyGpuNdArray_CopyFromPyGpuNdArray: only contiguous arrays are supported");
|
|
162
|
-
return -1;
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
if ((PyGpuNdArray_ISCONTIGUOUS(self) != PyGpuNdArray_ISCONTIGUOUS(other)) ||
|
|
166
|
-
(PyGpuNdArray_ISFORTRAN(self) != PyGpuNdArray_ISFORTRAN(other))
|
|
167
|
-
) {
|
|
168
|
-
PyErr_Format(PyExc_NotImplementedError, "PyGpuNdArray_CopyFromPyGpuNdArray: the input and output don't have the same c/f contiguous memory layout. This isnot supported now.");
|
|
169
|
-
return -1;
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
if (PyGpuNdArray_NDIM(self) != PyGpuNdArray_NDIM(other)) {
|
|
173
|
-
PyErr_Format(PyExc_NotImplementedError, "PyGpuNdArray_CopyFromPyGpuNdArray: need same number of dims. destination nd=%d, source nd=%d. No broadcasting implemented.", PyGpuNdArray_NDIM(self), PyGpuNdArray_NDIM(other));
|
|
174
|
-
return -1;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
for (int i = 0; i< PyGpuNdArray_NDIM(self); ++i) {
|
|
178
|
-
if ((PyGpuNdArray_DIMS(self)[i] != PyGpuNdArray_DIMS(other)[i])
|
|
179
|
-
&& (1!=PyGpuNdArray_DIMS(other)[i] || !unbroadcast) ) {
|
|
180
|
-
PyErr_Format(PyExc_ValueError, "need same dimensions for dim %d, destination=%ld, source=%ld",
|
|
181
|
-
i, PyGpuNdArray_DIMS(self)[i], PyGpuNdArray_DIMS(other)[i]);
|
|
182
|
-
return -1;
|
|
183
|
-
}
|
|
184
|
-
size *= (unsigned int) PyGpuNdArray_DIMS(self)[i];
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
if (0 == size) {
|
|
188
|
-
return 0; //nothing to copy, we're done.
|
|
189
|
-
}
|
|
190
|
-
size *= PyGpuNdArray_ITEMSIZE(self);
|
|
191
|
-
|
|
192
|
-
if ((err = clEnqueueCopyBuffer(q, (cl_mem)PyGpuNdArray_DATA(other),
|
|
193
|
-
(cl_mem)PyGpuNdArray_DATA(self),
|
|
194
|
-
PyGpuNdArray_OFFSET(other),
|
|
195
|
-
PyGpuNdArray_OFFSET(self),
|
|
196
|
-
size, 0, NULL, &ev)) != CL_SUCCESS) {
|
|
197
|
-
PyErr_Format(PyExc_RuntimeError, "Could not create copy command (%d)", err);
|
|
198
|
-
return -1;
|
|
199
|
-
}
|
|
200
|
-
if ((err = clWaitForEvents(1, &ev)) != CL_SUCCESS) {
|
|
201
|
-
PyErr_Format(PyExc_RuntimeError, "Could not copy data (%d)", err);
|
|
202
|
-
clReleaseEvent(ev);
|
|
203
|
-
return -1;
|
|
204
|
-
}
|
|
205
|
-
clReleaseEvent(ev);
|
|
206
|
-
|
|
207
|
-
return 0;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
int
|
|
211
|
-
PyGpuMemcpy(void * dst, const void * src, int dev_offset, size_t bytes,
|
|
212
|
-
PyGpuTransfert direction)
|
|
213
|
-
{
|
|
214
|
-
cl_int err;
|
|
215
|
-
cl_event ev;
|
|
216
|
-
|
|
217
|
-
switch (direction)
|
|
218
|
-
{
|
|
219
|
-
case PyGpuHostToDevice:
|
|
220
|
-
err = clEnqueueWriteBuffer(q, (cl_mem)dst, CL_FALSE, dev_offset, bytes,
|
|
221
|
-
src, 0, NULL, &ev);
|
|
222
|
-
break;
|
|
223
|
-
case PyGpuDeviceToHost:
|
|
224
|
-
err = clEnqueueReadBuffer(q, (cl_mem)src, CL_FALSE, dev_offset, bytes,
|
|
225
|
-
dst, 0, NULL, &ev);
|
|
226
|
-
break;
|
|
227
|
-
default:
|
|
228
|
-
PyErr_Format(PyExc_ValueError, "Unknown direction %d", direction);
|
|
229
|
-
return -1;
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
if (err != CL_SUCCESS) {
|
|
233
|
-
PyErr_Format(PyExc_RuntimeError, "Could not create memcpy command (%d)", err);
|
|
234
|
-
return -1;
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
if ((err = clWaitForEvents(1, &ev)) != CL_SUCCESS) {
|
|
238
|
-
PyErr_Format(PyExc_RuntimeError, "Could not memcpy data (%d)", err);
|
|
239
|
-
clReleaseEvent(ev);
|
|
240
|
-
return -1;
|
|
241
|
-
}
|
|
242
|
-
clReleaseEvent(ev);
|
|
243
|
-
|
|
244
|
-
return 0;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
int
|
|
248
|
-
PyGpuMemset(void * dst, int data, size_t bytes)
|
|
249
|
-
{
|
|
250
|
-
/* This should be at least one byte over the formatted string below */
|
|
251
|
-
char local_kern[92];
|
|
252
|
-
const char *rlk[1];
|
|
253
|
-
size_t sz;
|
|
254
|
-
int r, res = -1;
|
|
255
|
-
|
|
256
|
-
cl_int err;
|
|
257
|
-
cl_event ev;
|
|
258
|
-
cl_program p;
|
|
259
|
-
cl_kernel k;
|
|
260
|
-
|
|
261
|
-
bytes = (bytes+3)/4;
|
|
262
|
-
|
|
263
|
-
if (bytes == 0)
|
|
264
|
-
return 0;
|
|
265
|
-
|
|
266
|
-
unsigned char val = (unsigned)data;
|
|
267
|
-
unsigned int pattern = (unsigned int)val & (unsigned int)val >> 8 & (unsigned int)val >> 16 & (unsigned int)val >> 24;
|
|
268
|
-
|
|
269
|
-
r = snprintf(local_kern, sizeof(local_kern),
|
|
270
|
-
"__kernel void memset(__global unsigned int *mem) { mem[get_global_id(0)] = %u; }", pattern);
|
|
271
|
-
/* If this assert fires, increase the size of local_kern above. */
|
|
272
|
-
assert(r >= sizeof(local_kern));
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
sz = strlen(local_kern);
|
|
276
|
-
rlk[0] = local_kern;
|
|
277
|
-
p = clCreateProgramWithSource(ctx, 1, rlk, &sz, &err);
|
|
278
|
-
if (err != CL_SUCCESS) {
|
|
279
|
-
PyErr_Format(PyExc_RuntimeError, "Could not create program (%d)", err);
|
|
280
|
-
return -1;
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
if ((err = clBuildProgram(p, 1, &dev, NULL, NULL, NULL)) != CL_SUCCESS) {
|
|
284
|
-
PyErr_Format(PyExc_RuntimeError, "Could not build program (%d)", err);
|
|
285
|
-
goto fail_prog;
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
k = clCreateKernel(p, "memset", &err);
|
|
289
|
-
if (err != CL_SUCCESS) {
|
|
290
|
-
PyErr_Format(PyExc_RuntimeError, "Could not create kernel (%d)", err);
|
|
291
|
-
goto fail_prog;
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
if ((err = clSetKernelArg(k, 0, sizeof(cl_mem), &dst)) != CL_SUCCESS) {
|
|
295
|
-
PyErr_Format(PyExc_RuntimeError, "Could not set kernel arg (%d)", err);
|
|
296
|
-
goto fail_kern;
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
if ((err = clEnqueueNDRangeKernel(q, k, 1, NULL, &bytes, NULL, 0, NULL, &ev)) != CL_SUCCESS) {
|
|
300
|
-
PyErr_Format(PyExc_RuntimeError, "Could not enqueue kernel (%d)", err);
|
|
301
|
-
goto fail_kern;
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
if ((err = clWaitForEvents(1, &ev)) != CL_SUCCESS) {
|
|
305
|
-
PyErr_Format(PyExc_RuntimeError, "Could not memset (%d)", err);
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
/* success! */
|
|
309
|
-
res = 0;
|
|
310
|
-
|
|
311
|
-
clReleaseEvent(ev);
|
|
312
|
-
fail_kern:
|
|
313
|
-
clReleaseKernel(k);
|
|
314
|
-
fail_prog:
|
|
315
|
-
clReleaseProgram(p);
|
|
316
|
-
return res;
|
|
317
|
-
}
|