pyopencl 2024.2.7__cp39-cp39-macosx_11_0_arm64.whl → 2024.3__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (38) hide show
  1. pyopencl/__init__.py +127 -122
  2. pyopencl/_cl.cpython-39-darwin.so +0 -0
  3. pyopencl/_mymako.py +3 -3
  4. pyopencl/algorithm.py +10 -7
  5. pyopencl/array.py +50 -40
  6. pyopencl/bitonic_sort.py +3 -1
  7. pyopencl/bitonic_sort_templates.py +1 -1
  8. pyopencl/cache.py +23 -22
  9. pyopencl/capture_call.py +5 -4
  10. pyopencl/clrandom.py +1 -0
  11. pyopencl/compyte/dtypes.py +4 -4
  12. pyopencl/compyte/pyproject.toml +54 -0
  13. pyopencl/elementwise.py +9 -2
  14. pyopencl/invoker.py +11 -9
  15. pyopencl/ipython_ext.py +1 -1
  16. pyopencl/reduction.py +16 -10
  17. pyopencl/scan.py +38 -22
  18. pyopencl/tools.py +23 -13
  19. {pyopencl-2024.2.7.dist-info → pyopencl-2024.3.dist-info}/METADATA +11 -8
  20. pyopencl-2024.3.dist-info/RECORD +42 -0
  21. {pyopencl-2024.2.7.dist-info → pyopencl-2024.3.dist-info}/WHEEL +1 -1
  22. pyopencl/compyte/.git +0 -1
  23. pyopencl/compyte/ndarray/Makefile +0 -31
  24. pyopencl/compyte/ndarray/__init__.py +0 -0
  25. pyopencl/compyte/ndarray/gen_elemwise.py +0 -1907
  26. pyopencl/compyte/ndarray/gen_reduction.py +0 -1511
  27. pyopencl/compyte/ndarray/gpu_ndarray.h +0 -35
  28. pyopencl/compyte/ndarray/pygpu_language.h +0 -207
  29. pyopencl/compyte/ndarray/pygpu_language_cuda.cu +0 -622
  30. pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +0 -317
  31. pyopencl/compyte/ndarray/pygpu_ndarray.cpp +0 -1546
  32. pyopencl/compyte/ndarray/pygpu_ndarray.h +0 -71
  33. pyopencl/compyte/ndarray/pygpu_ndarray_object.h +0 -232
  34. pyopencl/compyte/ndarray/setup_opencl.py +0 -101
  35. pyopencl/compyte/ndarray/test_gpu_elemwise.py +0 -411
  36. pyopencl/compyte/ndarray/test_gpu_ndarray.py +0 -487
  37. pyopencl-2024.2.7.dist-info/RECORD +0 -56
  38. {pyopencl-2024.2.7.dist-info → pyopencl-2024.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,1546 +0,0 @@
1
- #include <Python.h>
2
- #include <structmember.h>
3
-
4
- #include <numpy/arrayobject.h>
5
- #include <iostream>
6
-
7
- #include "pygpu_ndarray.h"
8
- #include "pygpu_language.h"
9
-
10
- /////////////////////////
11
- // Static helper methods
12
- /////////////////////////
13
-
14
- static void
15
- PyGpuNdArray_null_init(PyGpuNdArrayObject *self)
16
- {
17
- DPRINTF("PyGpuNdArrayObject_null_init\n");
18
-
19
- PyGpuNdArray_DATA(self) = NULL;
20
- PyGpuNdArray_OFFSET(self) = 0;
21
- PyGpuNdArray_NDIM(self) = -1;
22
- self->base = NULL;
23
- PyGpuNdArray_DIMS(self) = NULL;
24
- PyGpuNdArray_STRIDES(self) = NULL;
25
- PyGpuNdArray_FLAGS(self) = NPY_DEFAULT;
26
- self->descr = NULL;
27
-
28
- self->data_allocated = 0;
29
- }
30
-
31
-
32
-
33
- /////////////////////////////
34
- // Satisfying reqs to be Type
35
- /////////////////////////////
36
-
37
- //DON'T use directly(if their is other PyGpuNdArrayObject that point to it, it will cause problem)! use Py_DECREF() instead
38
- static void
39
- PyGpuNdArrayObject_dealloc(PyGpuNdArrayObject* self)
40
- {
41
- DPRINTF("PyGpuNdArrayObject_dealloc\n");
42
- DPRINTF("PyGpuNdArrayObject dealloc %p %d %p\n", self, self->data_allocated, PyGpuNdArray_DATA(self));
43
-
44
- if(self->ob_refcnt>1)
45
- printf("WARNING:PyGpuNdArrayObject_dealloc called when their is still active reference to it.\n");
46
-
47
- if (self->data_allocated){
48
- assert(PyGpuNdArray_DATA(self));
49
- if (PyGpuNdArray_DATA(self)){
50
- if (device_free(PyGpuNdArray_DATA(self))){
51
- fprintf(stderr,
52
- "!!!! error freeing device memory %p (self=%p)\n",
53
- PyGpuNdArray_DATA(self), self);
54
- }
55
- PyGpuNdArray_DATA(self) = NULL;
56
- }
57
- }
58
- PyGpuNdArray_OFFSET(self) = 0;
59
- PyGpuNdArray_NDIM(self) = -1;
60
- Py_XDECREF(self->base);
61
- self->base = NULL;
62
- if (PyGpuNdArray_DIMS(self)){
63
- free(PyGpuNdArray_DIMS(self));
64
- PyGpuNdArray_DIMS(self) = NULL;
65
- }
66
- if (PyGpuNdArray_STRIDES(self)){
67
- free(PyGpuNdArray_STRIDES(self));
68
- PyGpuNdArray_STRIDES(self) = NULL;
69
- }
70
- PyGpuNdArray_FLAGS(self) = NPY_DEFAULT;
71
- //Py_XDECREF(self->descr);//TODO: How to handle the refcont on this object?
72
- self->descr = NULL;
73
- self->data_allocated = 0;
74
-
75
- self->ob_type->tp_free((PyObject*)self);
76
- --_outstanding_mallocs[1];
77
- DPRINTF("device_malloc_counts: (device) %i (obj) %i\n",
78
- _outstanding_mallocs[0],
79
- _outstanding_mallocs[1]);
80
- DPRINTF("PyGpuNdArrayObject_dealloc end\n");
81
- }
82
-
83
- static PyObject *
84
- PyGpuNdArray_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
85
- {
86
- DPRINTF("PyGpuNdArray_new\n");
87
- PyGpuNdArrayObject *self;
88
-
89
- self = (PyGpuNdArrayObject *)type->tp_alloc(type, 0);
90
- if (self != NULL){
91
- PyGpuNdArray_null_init(self);
92
- ++_outstanding_mallocs[1];
93
- }
94
- DPRINTF("PyGpuNdArray_new end %p\n", self);
95
- return (PyObject *)self;
96
- }
97
-
98
- static int
99
- PyGpuNdArray_init(PyGpuNdArrayObject *self, PyObject *args, PyObject *kwds)
100
- {
101
- DPRINTF("PyGpuNdArray_init\n");
102
- PyObject *arr=NULL;
103
-
104
- if (! PyArg_ParseTuple(args, "O", &arr))
105
- return -1;
106
- if (! PyArray_Check(arr)){
107
- PyErr_SetString(PyExc_TypeError, "PyGpuNdArrayObject_init: PyArray or PyGpuNdArrayObject arg required");
108
- return -1;
109
- }
110
-
111
- // TODO: We must create a new copy of the PyArray_Descr(or this only increment the refcount?) or still the reference?
112
- PyArray_Descr * type = PyArray_DescrFromType(PyArray_TYPE(arr));
113
- self->descr = type;
114
- Py_XINCREF(self->descr);//TODO: How to handle the refcont on this object?
115
- int rval = PyGpuNdArray_CopyFromArray(self, (PyArrayObject*)arr);
116
- DPRINTF("PyGpuNdArray_init: end %p type=%p\n", self, self->descr);
117
- return rval;
118
- }
119
-
120
-
121
- int
122
- PyGpuNdArray_CopyFromArray(PyGpuNdArrayObject * self, PyArrayObject*obj)
123
- {
124
- DPRINTF("PyGpuNdArray_CopyFromArray: start descr=%p\n", self->descr);
125
- //modif done to the new array won't be updated!
126
- assert(!PyGpuNdArray_CHKFLAGS(self, NPY_UPDATEIFCOPY));
127
- //Aligned are not tested, so don't allow it for now
128
- assert(PyGpuNdArray_CHKFLAGS(self, NPY_ALIGNED));
129
-
130
- int typenum = PyArray_TYPE(obj);
131
- PyObject * py_src = NULL;
132
- if (PyArray_ISONESEGMENT(obj)) {
133
- Py_INCREF(obj);
134
- py_src = (PyObject *) obj;
135
- }else{
136
- py_src = PyArray_ContiguousFromAny((PyObject*)obj, typenum,
137
- PyArray_NDIM(obj),
138
- PyArray_NDIM(obj));
139
- }
140
- DPRINTF("PyGpuNdArray_CopyFromArray: contiguous!\n");
141
- if (!py_src) {
142
- return -1;
143
- }
144
-
145
- int err;
146
- if(PyArray_ISFORTRAN(obj) && ! PyArray_ISCONTIGUOUS(obj)){
147
- DPRINTF("PyGpuNdArray_CopyFromArray: fortran!\n");
148
- err = PyGpuNdArray_alloc_contiguous(self, obj->nd, obj->dimensions,
149
- NPY_FORTRANORDER);
150
- }else{
151
- err = PyGpuNdArray_alloc_contiguous(self, obj->nd, obj->dimensions);
152
- }
153
- if (err) {
154
- return err;
155
- }
156
-
157
- //check that the flag are the same
158
- if (PyArray_ISCONTIGUOUS(py_src) != PyGpuNdArray_ISCONTIGUOUS(self) &&
159
- PyArray_ISFORTRAN(obj) && 0) {
160
- PyErr_Format(PyExc_RuntimeError, "ISCONTIGUOUS %d %d\n", PyArray_ISCONTIGUOUS(py_src), PyGpuNdArray_ISCONTIGUOUS(self));
161
- return -1;
162
- }
163
- assert(PyArray_ISCONTIGUOUS(py_src) == PyGpuNdArray_ISCONTIGUOUS(self) ||
164
- PyArray_ISFORTRAN(obj));
165
- assert(PyArray_ISFORTRAN(py_src) == PyGpuNdArray_ISFORTRAN(self));
166
- assert(PyArray_ISALIGNED(py_src) == PyGpuNdArray_ISALIGNED(self));
167
-
168
- // New memory, so we should own it.
169
- assert(PyGpuNdArray_CHKFLAGS(self, NPY_OWNDATA));
170
- // New memory, so it should be writable
171
- assert(PyGpuNdArray_ISWRITEABLE(self));
172
-
173
- err = PyGpuMemcpy(PyGpuNdArray_DATA(self),
174
- PyArray_DATA(py_src),
175
- PyGpuNdArray_OFFSET(self),
176
- PyArray_SIZE(py_src) * PyArray_ITEMSIZE(py_src),
177
- PyGpuHostToDevice);
178
- if (err) {
179
- Py_DECREF(py_src);
180
- return -1;
181
- }
182
- Py_DECREF(py_src);
183
- DPRINTF("PyGpuNdArray_CopyFromArray: end\n");
184
- return 0;
185
- }
186
-
187
- static PyObject * PyGpuNdArray_copy(PyObject * self, PyObject *args,
188
- PyObject *kargs)
189
- {
190
- DPRINTF("PyGpuNdArray_copy start\n");
191
- static const char *kwlist[] = {"order", NULL};
192
- NPY_ORDER order = PyArray_CORDER;
193
-
194
- if(!PyGpuNdArray_Check(self)){
195
- PyErr_SetString(PyExc_ValueError, "PyGpuNdArray_copy: expected a PyGpuNdArrayObject.");
196
- return NULL;
197
- }
198
-
199
- DPRINTF("PyGpuNdArray_copy before parse inputs\n");
200
- if (!PyArg_ParseTupleAndKeywords(args, kargs, "|O&",
201
- (char**)kwlist,
202
- PyArray_OrderConverter,
203
- &order)) {
204
- DPRINTF("PyGpuNdArray_copy start1.2\n");
205
- return NULL;
206
- }
207
- DPRINTF("PyGpuNdArray_copy after parse inputs\n");
208
-
209
- DPRINTF("PyGpuNdArray_copy before copy\n");
210
- PyObject *ret = PyGpuNdArray_Copy((PyGpuNdArrayObject*)self, order);
211
- DPRINTF("PyGpuNdArray_copy end\n");
212
- return ret;
213
- }
214
-
215
- static PyObject * PyGpuNdArray_Copy(PyGpuNdArrayObject * self, NPY_ORDER order)
216
- {
217
- DPRINTF("PyGpuNdArray_Copy start\n");
218
- PyObject * rval = PyGpuNdArray_New();
219
- //TODO find how to refcount descr.
220
- PyGpuNdArray_DESCR(rval) = PyGpuNdArray_DESCR(self);
221
- if ((!rval) || (-1 == PyGpuNdArray_NDIM(self))) {
222
- return rval;
223
- }
224
- if (PyGpuNdArray_alloc_contiguous((PyGpuNdArrayObject*)rval,
225
- PyGpuNdArray_NDIM(self),
226
- PyGpuNdArray_DIMS(self),
227
- order)) {
228
- Py_DECREF(rval);
229
- return NULL;
230
- }
231
-
232
- if (PyGpuNdArray_CopyFromPyGpuNdArray((PyGpuNdArrayObject*)rval, self)) {
233
- Py_DECREF(rval);
234
- return NULL;
235
- }
236
- if (order == NPY_F_CONTIGUOUS)
237
- PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
238
-
239
- #ifdef DEBUG
240
- PyGpuNdArray_fprint(stderr, self);
241
- PyGpuNdArray_fprint(stderr, (PyGpuNdArrayObject *)rval);
242
- #endif
243
- DPRINTF("PyGpuNdArray_Copy end\n");
244
- return rval;
245
- }
246
-
247
- PyObject * PyGpuNdArray_DeepCopy(PyGpuNdArrayObject * self, PyObject * memo)
248
- {
249
- assert(PyDict_Check(memo));
250
- PyObject * selfkey = PyInt_FromLong((long)self);
251
- assert(selfkey);
252
-
253
- if (PyDict_Contains(memo, selfkey)) {
254
- PyObject * rval = PyDict_GetItem(memo, selfkey);
255
- Py_DECREF(selfkey);
256
- Py_XINCREF(rval);
257
- return rval;
258
- } else {
259
- DPRINTF("PyGpuNdArray_DeepCopy: startd deepcopy\n");
260
- PyObject * rval = PyGpuNdArray_Copy(self);
261
- if (NULL == rval) {
262
- Py_DECREF(selfkey);
263
- return NULL;
264
- }
265
-
266
- DPRINTF("DeepCopy created %p\n", rval);
267
- DPRINTF("DeepCopy created %p %p\n", PyGpuNdArray_DESCR(rval), PyGpuNdArray_DATA(rval));
268
- if (PyDict_SetItem(memo, selfkey, rval)) {
269
- Py_DECREF(rval);
270
- Py_DECREF(selfkey);
271
- return NULL;
272
- }
273
- Py_DECREF(selfkey);
274
- DPRINTF("PyGpuNdArray_DeepCopy: startd end\n");
275
- return rval;
276
- }
277
- }
278
-
279
- PyObject * PyGpuNdArray_View(PyGpuNdArrayObject * self)
280
- {
281
- PyGpuNdArrayObject * rval = (PyGpuNdArrayObject*)PyGpuNdArray_New(PyGpuNdArray_NDIM(self));
282
- if (!rval || PyGpuNdArray_set_data(rval, PyGpuNdArray_DATA(self),
283
- (PyObject *)self, PyGpuNdArray_OFFSET(self))) {
284
- Py_XDECREF(rval);
285
- DPRINTF("PyGpuNdArray_View: no rval or PyGpuNdArray_set_data "
286
- "failed: self=%p, rval=%p rval_base=%p\n",
287
- self, rval, rval->base);
288
- return NULL;
289
- } else {
290
- for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i) {
291
- PyGpuNdArray_DIM(rval, i) = PyGpuNdArray_DIMS(self)[i];
292
- PyGpuNdArray_STRIDE(rval, i) = PyGpuNdArray_STRIDES(self)[i];
293
- }
294
- }
295
- DPRINTF("PyGpuNdArray_View: self=%p, self->base=%p"
296
- " rval=%p rval->base=%p\n",
297
- self, self->base, rval, rval->base);
298
- //TODO: find how to refcount on the descr!
299
- //Py_INCREF(PyGpuNdArray_DESCR(self));
300
- PyGpuNdArray_DESCR(rval) = PyGpuNdArray_DESCR(self);
301
- PyGpuNdArray_FLAGS(rval) = PyGpuNdArray_FLAGS(self);
302
- PyGpuNdArray_FLAGS(rval) &= ~NPY_OWNDATA;
303
-
304
-
305
- return (PyObject*)rval;
306
- }
307
-
308
- //updated for offset
309
- PyObject * PyGpuNdArray_CreateArrayObj(PyGpuNdArrayObject * self)
310
- {
311
- DPRINTF("PyGpuNdArray_CreateArrayObj\n");
312
-
313
- if(PyGpuNdArray_NDIM(self)>=0 && PyGpuNdArray_SIZE(self)==0){
314
- npy_intp * npydims = (npy_intp*)malloc(PyGpuNdArray_NDIM(self) * sizeof(npy_intp));
315
- assert (npydims);
316
- for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i)
317
- npydims[i] = (npy_intp)(PyGpuNdArray_DIMS(self)[i]);
318
-
319
- // Numpy will do a decref on the description.
320
- Py_INCREF(PyGpuNdArray_DESCR(self));
321
-
322
- // We can't use PyArray_{Empty,EMPTY} as they segfault when size == 0
323
- PyObject * rval = PyArray_NewFromDescr(&PyArray_Type,
324
- PyGpuNdArray_DESCR(self),
325
- PyGpuNdArray_NDIM(self),
326
- npydims,
327
- NULL,
328
- NULL,
329
- 0,
330
- NULL);
331
-
332
- free(npydims);
333
- if (!rval){
334
- return NULL;
335
- }
336
- assert (PyArray_ITEMSIZE(rval) == PyGpuNdArray_ITEMSIZE(self));
337
- return rval;
338
- }
339
- if ((PyGpuNdArray_NDIM(self) < 0) || (PyGpuNdArray_DATA(self) == 0)) {
340
- PyErr_SetString(PyExc_ValueError, "can't copy from un-initialized PyGpuNdArray");
341
- return NULL;
342
- }
343
- PyGpuNdArrayObject * contiguous_self = NULL;
344
- bool pos_stride = true;
345
- for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i)
346
- if (PyGpuNdArray_STRIDE(self,i)<0)
347
- pos_stride = false;
348
- if (PyGpuNdArray_ISONESEGMENT(self) && pos_stride) {
349
- contiguous_self = self;
350
- Py_INCREF(contiguous_self);
351
- DPRINTF("PyGpuNdArray_CreateArrayObj: gpu array already contiguous %p\n", contiguous_self);
352
- //}else if(PyGpuNdArray_ISONESEGMENT(self)){
353
- //TODO implement special object handling to speed up transfer
354
- // DPRINTF("CreateArrayObj one segment, with special handling %p\n", contiguous_self);
355
- //PyErr_SetString(PyExc_ValueError, "PyGpuNdArray_CreateArrayObj: Need PyGpuNdArray_Copy or some other nd array mandling to transfer contiguous bloc with negative stride.");
356
- //return NULL;
357
- } else {
358
- contiguous_self = (PyGpuNdArrayObject*)PyGpuNdArray_Copy(self);
359
- DPRINTF("CreateArrayObj created contiguous %p\n", contiguous_self);
360
- }
361
- if (!contiguous_self) {
362
- return NULL;
363
- }
364
-
365
- npy_intp * npydims = (npy_intp*)malloc(PyGpuNdArray_NDIM(self) * sizeof(npy_intp));
366
- assert (npydims);
367
- for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i) npydims[i] = (npy_intp)(PyGpuNdArray_DIMS(self)[i]);
368
- Py_INCREF(PyGpuNdArray_DESCR(self));
369
- PyObject * rval = PyArray_Empty(PyGpuNdArray_NDIM(self),
370
- npydims,
371
- PyGpuNdArray_DESCR(self),
372
- PyGpuNdArray_ISFORTRAN(self));
373
- free(npydims);
374
- if (!rval) {
375
- Py_DECREF(contiguous_self);
376
- return NULL;
377
- }
378
-
379
- int err = PyGpuMemcpy(PyArray_DATA(rval),
380
- PyGpuNdArray_DATA(contiguous_self),
381
- PyGpuNdArray_OFFSET(contiguous_self),
382
- PyArray_SIZE(rval) * PyArray_ITEMSIZE(rval),
383
- PyGpuDeviceToHost);
384
- if (err) {
385
- Py_DECREF(contiguous_self);
386
- Py_DECREF(rval);
387
- rval = NULL;
388
- }
389
- Py_DECREF(contiguous_self);
390
- return rval;
391
- }
392
-
393
- static PyObject *
394
- PyGpuNdArray_Empty(int nd, npy_intp* dims, PyArray_Descr* dtype, int fortran)
395
- {
396
- DPRINTF("PyGpuNdArray_Empty: start!\n");
397
- PyGpuNdArrayObject* rval = (PyGpuNdArrayObject*)PyGpuNdArray_New();
398
- PyGpuNdArray_DESCR(rval) = dtype;
399
- if (!rval) {
400
- DPRINTF("PyGpuNdArray_Empty: fail!\n");
401
- return NULL;
402
- }
403
- NPY_ORDER order = NPY_CORDER;
404
- if (fortran!=0)
405
- order = NPY_FORTRANORDER;
406
-
407
- if (PyGpuNdArray_alloc_contiguous(rval, nd, dims, order)) {
408
- Py_DECREF(rval);
409
- return NULL;
410
- }
411
-
412
- DPRINTF("PyGpuNdArray_Empty: end!\n");
413
- return (PyObject*) rval;
414
- }
415
-
416
- //DONE: dtype, offset not needed, flags
417
- static PyObject *
418
- PyGpuNdArray_Zeros(int nd, npy_intp* dims, PyArray_Descr* dtype, int fortran)
419
- {
420
- DPRINTF("PyGpuNdArray_Zeros: start!\n");
421
- PyObject * rval = PyGpuNdArray_Empty(nd, dims, dtype, fortran);
422
- if (!rval) {
423
- return rval;
424
- }
425
-
426
- int total_elements = 1;
427
- for(int i=0;i<nd;i++)
428
- total_elements*=dims[i];
429
-
430
- // total_elements now contains the size of the array, in reals
431
- int total_size = total_elements * dtype->elsize;
432
-
433
- // Fill with zeros
434
- int err = PyGpuMemset(PyGpuNdArray_DATA(rval), 0, total_size);
435
- if (err) {
436
- Py_DECREF(rval);
437
- return NULL;
438
- }
439
-
440
- DPRINTF("PyGpuNdArray_Zeros: end!\n");
441
- return (PyObject*) rval;
442
- }
443
-
444
- // declared as a static method (hence "dummy" is not used)
445
- // numpy.zeros(shape, dtype=float, order='C')
446
- static PyObject *
447
- PyGpuNdArray_zeros(PyObject* dummy, PyObject* args, PyObject *kargs)
448
- {
449
- static const char *kwlist[] = {"shape","dtype","order",NULL}; /* XXX ? */
450
- PyArray_Descr *typecode = NULL;
451
- PyObject * shape = NULL;
452
- NPY_ORDER order = PyArray_CORDER;
453
- bool fortran = false;
454
- PyObject *ret = NULL;
455
-
456
- if (!PyArg_ParseTupleAndKeywords(args, kargs, "O|O&O&",
457
- (char**)kwlist,
458
- &shape,
459
- PyArray_DescrConverter,
460
- &typecode,
461
- PyArray_OrderConverter,
462
- &order)) {
463
- Py_XDECREF(typecode);
464
- Py_XDECREF(shape);
465
- return ret;
466
- }
467
- if (order == PyArray_FORTRANORDER) {
468
- fortran = true;
469
- }
470
- else {
471
- fortran = false;
472
- }
473
-
474
- if(!PySequence_Check(shape))
475
- {
476
- PyErr_SetString(PyExc_TypeError, "shape argument must be a sequence");
477
- return NULL;
478
- }
479
-
480
- if (!typecode)
481
- typecode = PyArray_DescrFromType(NPY_FLOAT64);
482
-
483
- int shplen = PySequence_Length(shape);
484
-
485
- if (shplen == 0)
486
- {
487
- return PyGpuNdArray_Zeros(0, NULL, typecode, fortran);
488
- }
489
-
490
- npy_intp* newdims = (npy_intp *)malloc(sizeof(npy_intp) * shplen);
491
-
492
- if (!newdims)
493
- {
494
- PyErr_SetString(PyExc_MemoryError,
495
- "PyGpuNdArray_Zeros: Failed to allocate temporary space");
496
- return NULL;
497
- }
498
-
499
- // start from the end to compute strides
500
- for (int i = shplen-1; i >= 0; --i)
501
- {
502
- PyObject* shp_el_obj = PySequence_GetItem(shape, i);
503
- if(shp_el_obj == NULL)
504
- {
505
- // shouldn't happen since we checked length before...
506
- PyErr_SetString(PyExc_RuntimeError, "PyGpuNdArray_Zeros: Index out of bound in sequence");
507
- free(newdims);
508
- return NULL;
509
- }
510
-
511
- int shp_el = PyInt_AsLong(shp_el_obj);
512
- Py_DECREF(shp_el_obj);
513
-
514
- newdims[i] = shp_el;
515
- }
516
-
517
- PyObject* rval = PyGpuNdArray_Zeros(shplen, newdims, typecode, fortran);
518
-
519
- free(newdims);
520
-
521
- return (PyObject*)rval;
522
- }
523
-
524
- // declared as a static method (hence "dummy" is not used)
525
- // numpy.empty(shape, dtype=float, order='C')
526
- static PyObject *
527
- PyGpuNdArray_empty(PyObject* dummy, PyObject* args, PyObject *kargs)
528
- {
529
- static const char *kwlist[] = {"shape","dtype","order",NULL}; /* XXX ? */
530
- PyArray_Descr *typecode = NULL;
531
- PyObject * shape = NULL;
532
- NPY_ORDER order = PyArray_CORDER;
533
- bool fortran = false;
534
- PyObject *ret = NULL;
535
-
536
- if (!PyArg_ParseTupleAndKeywords(args, kargs, "O|O&O&",
537
- (char **)kwlist,
538
- &shape,
539
- PyArray_DescrConverter,
540
- &typecode,
541
- PyArray_OrderConverter,
542
- &order)) {
543
- Py_XDECREF(typecode);
544
- Py_XDECREF(shape);
545
- return ret;
546
- }
547
- if (order == PyArray_FORTRANORDER) {
548
- fortran = true;
549
- }
550
- else {
551
- fortran = false;
552
- }
553
-
554
- if(!PySequence_Check(shape))
555
- {
556
- PyErr_SetString(PyExc_TypeError, "shape argument must be a sequence");
557
- return NULL;
558
- }
559
-
560
- if (!typecode)
561
- typecode = PyArray_DescrFromType(NPY_FLOAT64);
562
-
563
- int shplen = PySequence_Length(shape);
564
-
565
- if (shplen == 0)
566
- {
567
- return PyGpuNdArray_Empty(0, NULL, typecode, fortran);
568
- }
569
-
570
- npy_intp* newdims = (npy_intp *)malloc(sizeof(npy_intp) * shplen);
571
-
572
- if (!newdims)
573
- {
574
- PyErr_SetString(PyExc_MemoryError,
575
- "PyGpuNdArray_empty: Failed to allocate temporary space");
576
- return NULL;
577
- }
578
-
579
- // start from the end to compute strides
580
- for (int i = shplen-1; i >= 0; --i)
581
- {
582
- PyObject* shp_el_obj = PySequence_GetItem(shape, i);
583
- if(shp_el_obj == NULL)
584
- {
585
- // shouldn't happen since we checked length before...
586
- PyErr_SetString(PyExc_RuntimeError, "PyGpuNdArray_empty: Index out of bound in sequence");
587
- free(newdims);
588
- return NULL;
589
- }
590
-
591
- int shp_el = PyInt_AsLong(shp_el_obj);
592
- Py_DECREF(shp_el_obj);
593
-
594
- newdims[i] = shp_el;
595
- }
596
-
597
- PyObject* rval = PyGpuNdArray_Empty(shplen, newdims, typecode, fortran);
598
-
599
- free(newdims);
600
-
601
- return (PyObject*)rval;
602
- }
603
-
604
- static PyMethodDef PyGpuNdArray_methods[] =
605
- {
606
- {"__array__",
607
- (PyCFunction)PyGpuNdArray_CreateArrayObj, METH_NOARGS,
608
- "Copy from the device to a numpy ndarray"},
609
- {"copy",
610
- (PyCFunction)PyGpuNdArray_copy, METH_VARARGS|METH_KEYWORDS,
611
- "Create a deep copy of this object."},
612
- {"view",
613
- (PyCFunction)PyGpuNdArray_View, METH_NOARGS,
614
- "Create a view of this object."},
615
- {"__copy__",
616
- (PyCFunction)PyGpuNdArray_Copy, METH_NOARGS,
617
- "Create a copy of this object as numpy does. Why numpy do a copy of the data when the object is a view?"},
618
- {"__deepcopy__",
619
- (PyCFunction)PyGpuNdArray_DeepCopy, METH_O,
620
- "Create a copy of this object"},
621
- /*
622
- {"reduce_sum",
623
- (PyCFunction)PyGpuNdArray_ReduceSum, METH_O,
624
- "Reduce over the given dimensions by summation"},
625
- {"exp",
626
- (PyCFunction)PyGpuNdArray_Exp, METH_NOARGS,
627
- "Return the exponential of all elements"},
628
- {"reshape",
629
- (PyCFunction)PyGpuNdArray_Reshape, METH_O,
630
- "Return a reshaped view (or copy) of this ndarray\n\
631
- The required argument is a tuple of integers specifying the shape of the new ndarray."},
632
- {"_set_stride",
633
- (PyCFunction)PyGpuNdArray_SetStride, METH_VARARGS,
634
- "For integer arguments (i, s), set the 'i'th stride to 's'"},
635
- {"_set_shape_i",
636
- (PyCFunction)PyGpuNdArray_SetShapeI, METH_VARARGS,
637
- "For integer arguments (i, s), set the 'i'th shape to 's'"},
638
- */
639
- {NULL, NULL, NULL, NULL} /* Sentinel */
640
- };
641
-
642
- //PyArray_CopyInto(PyArrayObject* dest, PyArrayObject* src)¶
643
- //PyObject* PyArray_NewCopy(PyArrayObject* old, NPY_ORDER order)¶
644
-
645
-
646
- static PyObject *
647
- PyGpuNdArray_get_shape(PyGpuNdArrayObject *self, void *closure)
648
- {
649
- DPRINTF("PyGpuNdArray_get_shape\n");
650
-
651
- if (PyGpuNdArray_NDIM(self) < 0)
652
- {
653
- PyErr_SetString(PyExc_ValueError, "PyGpuNdArray not initialized");
654
- return NULL;
655
- }
656
- PyObject * rval = PyTuple_New(PyGpuNdArray_NDIM(self));
657
- for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i)
658
- {
659
- if (!rval || PyTuple_SetItem(rval, i, PyInt_FromLong(PyGpuNdArray_DIMS(self)[i])))
660
- {
661
- Py_XDECREF(rval);
662
- return NULL;
663
- }
664
-
665
- }
666
- return rval;
667
- }
668
-
669
- static int
670
- PyGpuNdArray_set_shape(PyGpuNdArrayObject *self, PyObject *value, void *closure)
671
- {
672
- PyErr_SetString(PyExc_NotImplementedError, "TODO: call reshape");
673
- return -1;
674
- }
675
-
676
- static PyObject *
677
- PyGpuNdArray_get_strides(PyGpuNdArrayObject *self, void *closure)
678
- {
679
- if ( PyGpuNdArray_NDIM(self) < 0){
680
- PyErr_SetString(PyExc_ValueError, "PyGpuNdArrayObject not initialized");
681
- return NULL;
682
- }
683
- PyObject * rval = PyTuple_New( PyGpuNdArray_NDIM(self));
684
- for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i){
685
- if (!rval || PyTuple_SetItem(rval, i, PyInt_FromLong(PyGpuNdArray_STRIDES(self)[i]))){
686
- Py_XDECREF(rval);
687
- return NULL;
688
- }
689
- }
690
- return rval;
691
- }
692
-
693
- static PyObject *
694
- PyGpuNdArray_get_data(PyGpuNdArrayObject *self, void *closure)
695
- {
696
- return PyInt_FromLong((long int) PyGpuNdArray_DATA(self));
697
- }
698
-
699
- static PyObject *
700
- PyGpuNdArray_get_flags(PyGpuNdArrayObject *self, void *closure)
701
- {
702
- PyObject * dict = PyDict_New();
703
-
704
- PyObject * str= PyString_FromString("C_CONTIGUOUS");
705
- PyObject * i = PyBool_FromLong(PyGpuNdArray_ISCONTIGUOUS(self));
706
- PyDict_SetItem(dict, str, i);
707
- Py_DECREF(str);
708
- Py_DECREF(i);
709
-
710
- str= PyString_FromString("F_CONTIGUOUS");
711
- i = PyBool_FromLong(PyGpuNdArray_CHKFLAGS(self, NPY_F_CONTIGUOUS));
712
- PyDict_SetItem(dict, str, i);
713
- Py_DECREF(str);
714
- Py_DECREF(i);
715
-
716
- str= PyString_FromString("WRITEABLE");
717
- i = PyBool_FromLong(PyGpuNdArray_ISWRITEABLE(self));
718
- PyDict_SetItem(dict, str, i);
719
- Py_DECREF(str);
720
- Py_DECREF(i);
721
-
722
- str= PyString_FromString("ALIGNED");
723
- i = PyBool_FromLong(PyGpuNdArray_ISALIGNED(self));
724
- PyDict_SetItem(dict, str, i);
725
- Py_DECREF(str);
726
- Py_DECREF(i);
727
-
728
- str= PyString_FromString("UPDATEIFCOPY");
729
- i = PyBool_FromLong(PyGpuNdArray_CHKFLAGS(self, NPY_UPDATEIFCOPY));
730
- PyDict_SetItem(dict, str, i);
731
- Py_DECREF(str);
732
- Py_DECREF(i);
733
-
734
- str= PyString_FromString("OWNDATA");
735
- i = PyBool_FromLong(PyGpuNdArray_CHKFLAGS(self, NPY_OWNDATA));
736
- PyDict_SetItem(dict, str, i);
737
- Py_DECREF(str);
738
- Py_DECREF(i);
739
-
740
- return dict;
741
- }
742
- static PyObject *
743
- PyGpuNdArray_get_ndim(PyGpuNdArrayObject *self, void *closure)
744
- {
745
- return PyInt_FromLong((long int) PyGpuNdArray_NDIM(self));
746
- }
747
- static PyObject *
748
- PyGpuNdArray_get_offset(PyGpuNdArrayObject *self, void *closure)
749
- {
750
- return PyInt_FromLong((long int) PyGpuNdArray_OFFSET(self));
751
- }
752
- static PyObject *
753
- PyGpuNdArray_get_data_allocated(PyGpuNdArrayObject *self, void *closure)
754
- {
755
- return PyInt_FromLong((long int) self->data_allocated);
756
- }
757
- static PyObject *
758
- PyGpuNdArray_get_size(PyGpuNdArrayObject *self, void *closure)
759
- {
760
- return PyInt_FromLong((long int) PyGpuNdArray_SIZE(self));
761
- }
762
-
763
- static PyObject *
764
- PyGpuNdArray_get_base(PyGpuNdArrayObject *self, void *closure)
765
- {
766
- if (!PyGpuNdArray_BASE(self)){
767
- Py_INCREF(Py_None);
768
- return Py_None;
769
- }
770
- PyObject * ret = PyGpuNdArray_BASE(self);
771
- Py_INCREF(ret);
772
- return ret;
773
- }
774
-
775
- static PyObject *
776
- PyGpuNdArray_get_dtype(PyArrayObject *self)
777
- {
778
- Py_INCREF(PyGpuNdArray_DESCR(self));
779
- PyObject * ret = (PyObject *)PyGpuNdArray_DESCR(self);
780
- return ret;
781
- }
782
-
783
- static PyObject *
784
- PyGpuNdArray_get_itemsize(PyArrayObject *self)
785
- {
786
- return (PyObject *)PyInt_FromLong(PyGpuNdArray_ITEMSIZE(self));
787
- }
788
-
789
- static PyGetSetDef PyGpuNdArray_getset[] = {
790
- {(char*)"base",
791
- (getter)PyGpuNdArray_get_base,
792
- NULL,
793
- (char*)"Return the object stored in the base attribute",
794
- NULL},
795
- {(char*)"bytes",
796
- (getter)PyGpuNdArray_get_data,
797
- NULL,
798
- (char*)"device data pointer",
799
- NULL},
800
- {(char*)"shape",
801
- (getter)PyGpuNdArray_get_shape,
802
- (setter)PyGpuNdArray_set_shape,
803
- (char*)"shape of this ndarray (tuple)",
804
- NULL},
805
- {(char*)"strides",
806
- (getter)PyGpuNdArray_get_strides,
807
- NULL,//(setter)PyGpuNdArray_set_strides,
808
- (char*)"data pointer strides (in elements)",
809
- NULL},
810
- {(char*)"ndim",
811
- (getter)PyGpuNdArray_get_ndim,
812
- NULL,
813
- (char*)"The number of dimensions in this object",
814
- NULL},
815
- {(char*)"offset",
816
- (getter)PyGpuNdArray_get_offset,
817
- NULL,
818
- (char*)"Return the offset value",
819
- NULL},
820
- {(char*)"size",
821
- (getter)PyGpuNdArray_get_size,
822
- NULL,
823
- (char*)"The number of elements in this object.",
824
- NULL},
825
- {(char*)"data_allocated",
826
- (getter)PyGpuNdArray_get_data_allocated,
827
- NULL,
828
- (char*)"The size of the allocated memory on the device.",
829
- NULL},
830
- {(char*)"itemsize",
831
- (getter)PyGpuNdArray_get_itemsize,
832
- NULL,
833
- (char*)"The size of the base element.",
834
- NULL},
835
- {(char*)"dtype",
836
- (getter)PyGpuNdArray_get_dtype,
837
- NULL,
838
- (char*)"The dtype of the element",
839
- NULL},
840
- {(char*)"flags",
841
- (getter)PyGpuNdArray_get_flags,
842
- NULL,
843
- (char*)"Return the flags as a dictionary",
844
- NULL},
845
- {NULL, NULL, NULL, NULL} /* Sentinel */
846
- };
847
-
848
- // Will by called by __len__ in Python
849
- static Py_ssize_t
850
- PyGpuNdArray_len(PyObject * py_self)
851
- {
852
- PyGpuNdArrayObject * self = (PyGpuNdArrayObject*) py_self;
853
- if (PyGpuNdArray_NDIM(self) <= 0)
854
- {
855
- return (Py_ssize_t) 0;
856
- }
857
- else
858
- {
859
- return (Py_ssize_t) PyGpuNdArray_DIMS(self)[0];
860
- }
861
- }
862
-
863
- static int
864
- PyGpuNdArray_add_offset(PyGpuNdArrayObject * self, int offset)
865
- {
866
- DPRINTF("PyGpuNdArray_add_offset: %p %d\n", self, offset);
867
-
868
- #if OFFSET
869
- PyGpuNdArray_OFFSET(self) += offset;
870
- #else
871
- PyGpuNdArray_DATA(self) += offset;
872
- #endif
873
- return 0;
874
- }
875
-
876
-
877
- static int
878
- PyGpuNdArray_set_data(PyGpuNdArrayObject * self, char * data, PyObject * base, int offset)
879
- {
880
- DPRINTF("PyGpuNdArray_set_data: %p %p %p %d\n", self, data, base, offset);
881
- if (self->data_allocated)
882
- {
883
- assert(PyGpuNdArray_DATA(self));
884
- if (device_free(PyGpuNdArray_DATA(self)))
885
- {
886
- PyGpuNdArray_DATA(self) = NULL;
887
- self->data_allocated = 0;
888
- DPRINTF("PyGpuNdArray_set_data: device_free failed!\n");
889
- PyErr_SetString(PyExc_ValueError, "PyGpuNdArray_set_data: device_free failed");
890
- return -1;
891
- }
892
- }
893
-
894
- // Get the original base object (base.base.base...)
895
- // TODO: check that base is indeed a CudaNdarray?
896
- PyObject * orig_base = base;
897
- // base is not always a PyGpuNdArrayObject. It can be a GpuArray from pycuda, ...
898
- while (orig_base && PyGpuNdArray_Check(orig_base) && ((PyGpuNdArrayObject*) orig_base)->base)
899
- {
900
- // base_base is itself a view
901
- orig_base = ((PyGpuNdArrayObject*) orig_base)->base;
902
- }
903
-
904
- //N.B. XDECREF and XINCREF are no-ops for NULL pointers
905
- if (PyGpuNdArray_BASE(self) != orig_base)
906
- {
907
- Py_XDECREF(PyGpuNdArray_BASE(self));
908
- PyGpuNdArray_BASE(self) = orig_base;
909
- Py_XINCREF(PyGpuNdArray_BASE(self));
910
- }
911
- self->data_allocated = 0;
912
- #if OFFSET
913
- PyGpuNdArray_DATA(self) = data;
914
- PyGpuNdArray_OFFSET(self) = offset;
915
- #else
916
- PyGpuNdArray_DATA(self) = data + offset;
917
- #endif
918
-
919
- return 0;
920
- }
921
-
922
- // Will by called by __getitem__ in Python
923
- static PyObject *
924
- PyGpuNdArray_Subscript(PyObject * py_self, PyObject * key)
925
- {
926
- DPRINTF("Subscript start\n");
927
- PyGpuNdArrayObject * self = (PyGpuNdArrayObject*) py_self;
928
- PyObject * py_rval = NULL;
929
- PyGpuNdArrayObject * rval = NULL;
930
- PyObject * intobj = NULL;
931
-
932
- //PyObject_Print(key, stderr, 0);
933
-
934
- if (key == Py_Ellipsis)
935
- {
936
- DPRINTF("Subscript with ellipse \n");
937
- Py_INCREF(py_self);
938
- DPRINTF("Subscript with ellipse end\n");
939
- return py_self;
940
- }
941
- if ((intobj=PyNumber_Int(key))) //INDEXING BY INTEGER
942
- {
943
- #ifdef DEBUG
944
- PyGpuNdArray_fprint(stderr, self);
945
- #endif
946
- DPRINTF("Subscript with int \n");
947
-
948
- int d_idx = PyInt_AsLong(intobj);
949
- Py_DECREF(intobj); intobj=NULL;
950
-
951
- DPRINTF("Subscript with int 1\n");
952
- if (PyGpuNdArray_NDIM(self) == 0) {
953
- PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed");
954
- return NULL;
955
- }else if (PyGpuNdArray_NDIM(self)< 0){
956
- PyErr_SetString(PyExc_IndexError, "nd arrays must have a number of dim > 0!");
957
- return NULL;
958
- }
959
- int d_dim = PyGpuNdArray_DIMS(self)[0];
960
- int offset = 0;
961
- DPRINTF("Subscript with int 2\n");
962
-
963
- if ((d_idx >= 0) && (d_idx < d_dim)) {
964
- //normal indexing
965
- offset += d_idx * PyGpuNdArray_STRIDES(self)[0];
966
- }
967
- else if ((d_idx < 0) && (d_idx >= -d_dim)) {
968
- //end-based indexing
969
- // d_idx is negative
970
- offset += (d_dim + d_idx) * PyGpuNdArray_STRIDES(self)[0];
971
- } else {
972
- PyErr_SetString(PyExc_IndexError, "index out of bounds");
973
- return NULL;
974
- }
975
- DPRINTF("Subscript with int 3\n");
976
-
977
- //Add the original offset
978
- offset += PyGpuNdArray_OFFSET(self);
979
-
980
- //allocate our subtensor view
981
- py_rval = PyGpuNdArray_New(PyGpuNdArray_NDIM(self) - 1);
982
- rval = (PyGpuNdArrayObject*) py_rval;
983
- if (!rval) return NULL;
984
-
985
- //TODO: find how to refcount on the descr!
986
- PyGpuNdArray_DESCR(py_rval) = PyGpuNdArray_DESCR(self);
987
-
988
- DPRINTF("Subscript with int 4\n");
989
- //initialize the view's data pointer to our own.
990
- assert (0 == rval->data_allocated);
991
- if (PyGpuNdArray_set_data(rval, PyGpuNdArray_DATA(self), (PyObject *) self, offset)){
992
- Py_DECREF(rval);
993
- return NULL;
994
- }
995
- DPRINTF("Subscript with int 5\n");
996
-
997
- for (int d = 1; d < PyGpuNdArray_NDIM(self); ++d) {
998
- PyGpuNdArray_STRIDE(rval, d-1) = PyGpuNdArray_STRIDES(self)[d];
999
- PyGpuNdArray_DIM(rval, d-1) = PyGpuNdArray_DIMS(self)[d];
1000
- }
1001
- }
1002
- else {
1003
- PyErr_Clear();
1004
- }
1005
- if (PySlice_Check(key)) //INDEXING BY SLICE
1006
- {
1007
- DPRINTF("Subscript with slice \n");
1008
- if (PyGpuNdArray_NDIM(self) == 0)
1009
- {
1010
- PyErr_SetString(PyExc_ValueError, "cannot slice a 0-d array");
1011
- return NULL;
1012
- }
1013
-
1014
- int d_dim = PyGpuNdArray_DIMS(self)[0];
1015
- Py_ssize_t start, stop, step, slen;
1016
- if (PySlice_GetIndicesEx((PySliceObject*)key, d_dim, &start, &stop, &step, &slen)) {
1017
- return NULL;
1018
- }
1019
-
1020
- DPRINTF("start %zd\nstop %zd\n step %zd\n slen %zd\n",
1021
- start, stop, step, slen);
1022
-
1023
- //allocate our subtensor view
1024
- py_rval = PyGpuNdArray_New(PyGpuNdArray_NDIM(self));
1025
- rval = (PyGpuNdArrayObject*) py_rval;
1026
- if (!rval) return NULL;
1027
-
1028
- //TODO: find how to refcount on the descr!
1029
- PyGpuNdArray_DESCR(py_rval) = PyGpuNdArray_DESCR(self);
1030
- assert (0 == rval->data_allocated);
1031
- if (PyGpuNdArray_set_data(rval,
1032
- PyGpuNdArray_DATA(self),
1033
- py_self,
1034
- start * PyGpuNdArray_STRIDE(self, 0)
1035
- + PyGpuNdArray_OFFSET(self))) {
1036
- Py_DECREF(rval);
1037
- return NULL;
1038
- }
1039
-
1040
- //initialize dimension 0 of rval
1041
- PyGpuNdArray_STRIDE(rval, 0) = step * PyGpuNdArray_STRIDES(self)[0];
1042
- PyGpuNdArray_DIM(rval, 0) = slen;
1043
- DPRINTF("rval stride %zd\n", PyGpuNdArray_STRIDES(rval)[0]);
1044
- // initialize dimensions > 0 of rval
1045
- for (int d = 1; d < PyGpuNdArray_NDIM(self); ++d) {
1046
- PyGpuNdArray_STRIDE(rval, d) = PyGpuNdArray_STRIDES(self)[d];
1047
- PyGpuNdArray_DIM(rval, d) = PyGpuNdArray_DIMS(self)[d];
1048
- }
1049
- }
1050
- if (PyTuple_Check(key)) //INDEXING BY TUPLE
1051
- {
1052
- DPRINTF("Subscript with tuple \n");
1053
- //elements of the tuple can be either integers or slices
1054
- //the dimensionality of the view we will return is diminished for each slice in the tuple
1055
- int tuple_start_index = 0;
1056
- if (PyTuple_Size(key) > PyGpuNdArray_NDIM(self))
1057
- {
1058
- if (PyTuple_GetItem(key, 0) == Py_Ellipsis &&
1059
- PyTuple_Size(key) == PyGpuNdArray_NDIM(self) + 1)
1060
- {
1061
- tuple_start_index = 1;
1062
- DPRINTF("Subscript with tuple staring with an extra ellipse"
1063
- " at the start.\n");
1064
- }
1065
- else{
1066
- PyErr_SetString(PyExc_IndexError,
1067
- "index error, specified more dimensions then"
1068
- " the number of existing dimensions");
1069
- return NULL;
1070
- }
1071
- }
1072
-
1073
- //calculate the number of dimensions in the return value
1074
- int rval_nd = PyGpuNdArray_NDIM(self);
1075
- for (int tuple_d = tuple_start_index; tuple_d < PyTuple_Size(key);
1076
- ++tuple_d)
1077
- {
1078
- //On some paltform PyInt_Check(<type 'numpy.int64'>) return true, other it return false.
1079
- //So we use PyArray_IsAnyScalar that should covert everything.
1080
- rval_nd -= PyArray_IsAnyScalar(PyTuple_GetItem(key, tuple_d));
1081
- }
1082
-
1083
- //allocate our subtensor view
1084
- py_rval = PyGpuNdArray_New(rval_nd);
1085
- rval = (PyGpuNdArrayObject*) py_rval;
1086
- if (!rval) return NULL;
1087
- assert (0 == rval->data_allocated);
1088
-
1089
- //TODO: find how to refcount on the descr!
1090
- PyGpuNdArray_DESCR(py_rval) = PyGpuNdArray_DESCR(self);
1091
-
1092
- //initialize the view's data pointer to our own.
1093
- if (PyGpuNdArray_set_data(rval, PyGpuNdArray_DATA(self),
1094
- py_self, PyGpuNdArray_OFFSET(self)))
1095
- {
1096
- Py_DECREF(rval);
1097
- return NULL;
1098
- }
1099
-
1100
- // rval_d will refer to the current dimension in the rval.
1101
- // It will not be incremented for integer keys, but will be incremented for slice
1102
- // keys
1103
- int rval_d = 0;
1104
-
1105
- for (int self_d = 0, tuple_d = tuple_start_index;
1106
- self_d < PyGpuNdArray_NDIM(self); ++self_d, ++tuple_d)
1107
- {
1108
- // keys can be shorter than PyGpuNdArray_NDIM(self).
1109
- // when that happens, it means that the remaining dimensions are "full slices"
1110
- if (tuple_d >= PyTuple_Size(key))
1111
- {
1112
- PyGpuNdArray_STRIDE(rval, rval_d) =
1113
- PyGpuNdArray_STRIDES(self)[tuple_d];
1114
- PyGpuNdArray_DIM(rval, rval_d) =
1115
- PyGpuNdArray_DIMS(self)[tuple_d];
1116
- ++rval_d;
1117
- DPRINTF("Subscript extra dims to append %zd %zd\n",
1118
- PyGpuNdArray_STRIDE(rval, rval_d),
1119
- PyGpuNdArray_DIM(rval, rval_d));
1120
- }
1121
- else
1122
- {
1123
- PyObject * key_d = PyTuple_GetItem(key, tuple_d);
1124
-
1125
- if (PySlice_Check(key_d))
1126
- {
1127
- Py_ssize_t start, stop, step, slen;
1128
- if (PySlice_GetIndicesEx((PySliceObject*)key_d,
1129
- PyGpuNdArray_DIMS(self)[self_d],
1130
- &start, &stop, &step, &slen))
1131
- {
1132
- Py_DECREF(rval);
1133
- return NULL;
1134
- }
1135
- PyGpuNdArray_add_offset(rval, start * PyGpuNdArray_STRIDES(self)[self_d]);
1136
- PyGpuNdArray_STRIDE(rval, rval_d) = step * PyGpuNdArray_STRIDES(self)[self_d];
1137
- PyGpuNdArray_DIM(rval, rval_d) = slen;
1138
-
1139
- DPRINTF("rval_d %d self_d %d\n start %zd\nstop %zd\n step %zd\n slen %zd\n",
1140
- rval_d, self_d, start, stop, step, slen);
1141
- ++rval_d;
1142
- }
1143
- else if ((intobj=PyNumber_Int(key_d)))
1144
- {
1145
- assert(PyArray_IsAnyScalar(key_d));
1146
- int d_idx = PyInt_AsLong(intobj);
1147
- Py_DECREF(intobj);
1148
- intobj = NULL;
1149
- int d_dim = PyGpuNdArray_DIMS(self)[self_d];
1150
-
1151
- if ((d_idx >= 0) && (d_idx < d_dim))
1152
- {
1153
- //normal indexing
1154
- PyGpuNdArray_add_offset(rval, d_idx * PyGpuNdArray_STRIDES(self)[self_d]);
1155
- }
1156
- else if ((d_idx < 0) && (d_idx >= -d_dim))
1157
- {
1158
- //end-based indexing
1159
- PyGpuNdArray_add_offset(rval, (d_dim + d_idx) * PyGpuNdArray_STRIDES(self)[self_d]);
1160
- }
1161
- else
1162
- {
1163
- PyErr_SetString(PyExc_IndexError, "index out of bounds");
1164
- Py_DECREF(rval);
1165
- return NULL;
1166
- }
1167
- }
1168
- else if (key_d == Py_Ellipsis)
1169
- {
1170
- if (self_d != 0){
1171
- PyErr_Format(PyExc_IndexError,
1172
- "Ellipsis supported only at the start of"
1173
- " the tuple");
1174
- Py_DECREF(rval);
1175
- return NULL;
1176
- }
1177
- DPRINTF("Substript with tuple with the first element an ellipse\n");
1178
- for( ; self_d < (rval_nd - PyTuple_Size(key) + 1); self_d++)
1179
- {
1180
- PyGpuNdArray_STRIDE(rval, rval_d) =
1181
- PyGpuNdArray_STRIDES(self)[self_d];
1182
- PyGpuNdArray_DIM(rval, rval_d) =
1183
- PyGpuNdArray_DIMS(self)[self_d];
1184
- DPRINTF("Ellipse append dimensions self_%d with %zd %zd\n",
1185
- self_d,
1186
- PyGpuNdArray_STRIDE(rval, rval_d),
1187
- PyGpuNdArray_DIM(rval, rval_d));
1188
- ++rval_d;
1189
- }
1190
- tuple_start_index = 1;
1191
- self_d--;
1192
- }
1193
- else
1194
- {
1195
- PyErr_Clear(); // clear the error set by PyNumber_Int
1196
- PyErr_Format(PyExc_IndexError,
1197
- "index must be either int or slice. Got %s",
1198
- PyString_AsString(PyObject_Str(key_d)));
1199
- Py_DECREF(rval);
1200
- return NULL;
1201
- }
1202
- }
1203
- }
1204
- }
1205
- if (py_rval)
1206
- {
1207
- #ifdef DEBUG
1208
- PyGpuNdArray_fprint(stderr, self);
1209
- PyGpuNdArray_fprint(stderr, rval);
1210
- #endif
1211
- }
1212
- else
1213
- {
1214
- PyErr_SetString(PyExc_NotImplementedError, "Unknown key type");
1215
- return NULL;
1216
- }
1217
-
1218
- // Set flags
1219
- if (PyGpuNdArray_ISWRITEABLE(self)) {
1220
- PyGpuNdArray_FLAGS(rval) |= NPY_WRITEABLE;
1221
- } else {
1222
- PyGpuNdArray_FLAGS(rval) &= ~NPY_WRITEABLE;
1223
- }
1224
- PyGpuNdArray_FLAGS(rval) &= ~NPY_OWNDATA;
1225
- if (PyGpuNdArray_ISALIGNED(self)) {
1226
- PyGpuNdArray_FLAGS(rval) |= NPY_ALIGNED;
1227
- } else {
1228
- PyGpuNdArray_FLAGS(rval) &= ~NPY_ALIGNED;
1229
- }
1230
- PyGpuNdArray_FLAGS(rval) &= ~NPY_UPDATEIFCOPY;
1231
-
1232
- if (false && PyGpuNdArray_NDIM(rval) == 0) {
1233
- //Numpy is not consistent here
1234
- //When we create a new numpy ndarray of 0 dim, it is not f contiguous
1235
- //But when we take a subtensor that is of 0 dim, it is f contiguous!
1236
- //We make as them for now...
1237
- PyGpuNdArray_FLAGS(rval) &= ~NPY_F_CONTIGUOUS;
1238
- PyGpuNdArray_FLAGS(rval) |= NPY_C_CONTIGUOUS;
1239
- } else {
1240
- if (PyGpuNdArray_is_c_contiguous(rval)) {
1241
- PyGpuNdArray_FLAGS(rval) |= NPY_C_CONTIGUOUS;
1242
- } else {
1243
- PyGpuNdArray_FLAGS(rval) &= ~NPY_C_CONTIGUOUS;
1244
- }
1245
- if (PyGpuNdArray_is_f_contiguous(rval)) {
1246
- PyGpuNdArray_FLAGS(rval) |= NPY_F_CONTIGUOUS;
1247
- } else {
1248
- PyGpuNdArray_FLAGS(rval) &= ~NPY_F_CONTIGUOUS;
1249
- }
1250
- }
1251
-
1252
- DPRINTF("Subscript end\n");
1253
- return py_rval;
1254
- }
1255
-
1256
- PyMappingMethods PyGpuNdArrayMappingMethods = {
1257
- PyGpuNdArray_len, //lenfunc mp_length; __len__
1258
- PyGpuNdArray_Subscript, //binaryfunc mp_subscript; __getitem__
1259
- 0 //PyGpuNdArray_setitem //objobjargproc mp_ass_subscript; __setitem__
1260
- };
1261
-
1262
- static PyTypeObject PyGpuNdArrayType =
1263
- {
1264
- PyObject_HEAD_INIT(NULL)
1265
- 0, /*ob_size*/
1266
- "GpuNdArray", /*tp_name*/
1267
- sizeof(PyGpuNdArrayObject), /*tp_basicsize*/
1268
- 0, /*tp_itemsize*/
1269
- (destructor)PyGpuNdArrayObject_dealloc, /*tp_dealloc*/
1270
- 0, /*tp_print*/
1271
- 0, /*tp_getattr*/
1272
- 0, /*tp_setattr*/
1273
- 0, /*tp_compare*/
1274
- 0, /*tp_repr*/
1275
- 0, //&PyGpuNdArrayObjectNumberMethods, /*tp_as_number*/
1276
- 0, /*tp_as_sequence*/
1277
- &PyGpuNdArrayMappingMethods,/*tp_as_mapping*/
1278
- 0, /*tp_hash */
1279
- 0, /*tp_call*/
1280
- 0, /*tp_str*/
1281
- 0, /*tp_getattro*/
1282
- 0, /*tp_setattro*/
1283
- 0, /*tp_as_buffer*/
1284
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES, /*tp_flags*/
1285
- "PyGpuNdArrayObject objects", /* tp_doc */
1286
- 0, /* tp_traverse */
1287
- 0, /* tp_clear */
1288
- 0, /* tp_richcompare */
1289
- 0, /* tp_weaklistoffset */
1290
- 0, /* tp_iter */
1291
- 0, /* tp_iternext */
1292
- PyGpuNdArray_methods, /* tp_methods */
1293
- 0, //PyGpuNdArray_members, /* tp_members */ //TODO
1294
- PyGpuNdArray_getset, /* tp_getset */
1295
- 0, /* tp_base */
1296
- 0, /* tp_dict */
1297
- 0, /* tp_descr_get */
1298
- 0, /* tp_descr_set */
1299
- 0, /* tp_dictoffset */
1300
- (initproc)PyGpuNdArray_init,/* tp_init */
1301
- 0, /* tp_alloc */
1302
- PyGpuNdArray_new, /* tp_new */
1303
- };
1304
-
1305
- //////////////////////////////////////
1306
- //
1307
- // C API FOR PyGpuNdArrayObject
1308
- //
1309
- //////////////////////////////////////
1310
- PyObject *
1311
- PyGpuNdArray_New(int nd)
1312
- {
1313
- DPRINTF("PyGpuNdArray_New start\n");
1314
- PyGpuNdArrayObject *self = (PyGpuNdArrayObject *)PyGpuNdArrayType.tp_alloc(&PyGpuNdArrayType, 0);
1315
- if (self == NULL) {
1316
- PyErr_SetString(PyExc_RuntimeError, "PyGpuNdArray_New failed to allocate self");
1317
- return NULL;
1318
- }
1319
- PyGpuNdArray_null_init(self);
1320
-
1321
- if (nd == 0) {
1322
- PyGpuNdArray_NDIM(self) = 0;
1323
- }
1324
- else if (nd > 0) {
1325
- if (PyGpuNdArray_set_nd(self, nd)) {
1326
- Py_DECREF(self);
1327
- return NULL;
1328
- }
1329
- }
1330
- ++_outstanding_mallocs[1];
1331
- DPRINTF("PyGpuNdArray_New end\n");
1332
- return (PyObject *)self;
1333
- }
1334
-
1335
- int
1336
- PyGpuNdArray_Check(const PyObject * ob)
1337
- {
1338
- DPRINTF("PyGpuNdArray_Check\n");
1339
- //TODO: doesn't work with inheritance
1340
- return PyGpuNdArray_CheckExact(ob);
1341
- }
1342
- int
1343
- PyGpuNdArray_CheckExact(const PyObject * ob)
1344
- {
1345
- DPRINTF("PyGpuNdArray_CheckExact\n");
1346
- return ((ob->ob_type == &PyGpuNdArrayType) ? 1 : 0);
1347
- }
1348
-
1349
- static PyObject *
1350
- PyGpuNdArray_as_c_contiguous(PyObject* dummy, PyObject* args, PyObject *kargs)
1351
- {
1352
- DPRINTF("PyGpuNdArray_as_c_contiguous:start\n");
1353
- static const char *kwlist[] = {"a", "dtype", NULL};
1354
- PyArray_Descr *typecode = NULL;
1355
- PyObject *self_ = NULL;
1356
-
1357
- if (!PyArg_ParseTupleAndKeywords(args, kargs, "O|O&",
1358
- (char **)kwlist,
1359
- &self_,
1360
- PyArray_DescrConverter,
1361
- &typecode)) {
1362
- Py_XDECREF(typecode);
1363
- Py_XDECREF(self_);
1364
- return NULL;
1365
- }
1366
- assert(typecode == NULL);
1367
- if (!PyGpuNdArray_Check(self_)){
1368
- PyErr_SetString(PyExc_TypeError,
1369
- "PyGpuNdArray_as_c_contiguous:"
1370
- " PyGpuNdArrayObject required");
1371
- return NULL;
1372
- }
1373
-
1374
- PyGpuNdArrayObject *self = (PyGpuNdArrayObject*)self_;
1375
- if (PyGpuNdArray_is_c_contiguous(self)){
1376
- Py_INCREF(self);
1377
- if (PyGpuNdArray_NDIM(self) == 0){
1378
- //numpy.ascontiguous() always return object with 1d.
1379
- DPRINTF("PyGpuNdArray_as_c_contiguous: upcast to 1d tensor end\n");
1380
- PyObject * rval = PyGpuNdArray_View(self);
1381
- if (!rval)
1382
- return NULL;
1383
- PyGpuNdArray_set_nd((PyGpuNdArrayObject*)rval, 1);
1384
- PyGpuNdArray_DIM(rval, 0) = 1;
1385
- PyGpuNdArray_STRIDE(rval, 0) = PyGpuNdArray_ITEMSIZE(rval);
1386
- return rval;
1387
- }
1388
- DPRINTF("PyGpuNdArray_as_c_contiguous: no copy end\n");
1389
- return (PyObject*)self;
1390
- }
1391
-
1392
- PyObject * ret = PyGpuNdArray_Copy(self);
1393
- DPRINTF("PyGpuNdArray_as_c_contiguous: copy end\n");
1394
- return ret;
1395
- }
1396
- static PyObject *
1397
- PyGpuNdArray_as_f_contiguous(PyObject* dummy, PyObject* args, PyObject *kargs)
1398
- {
1399
- DPRINTF("PyGpuNdArray_as_f_contiguous:start\n");
1400
- static const char *kwlist[] = {"a", "dtype", NULL};
1401
- PyArray_Descr *typecode = NULL;
1402
- PyObject *self_ = NULL;
1403
-
1404
- if (!PyArg_ParseTupleAndKeywords(args, kargs, "O|O&",
1405
- (char **)kwlist,
1406
- &self_,
1407
- PyArray_DescrConverter,
1408
- &typecode)) {
1409
- Py_XDECREF(typecode);
1410
- Py_XDECREF(self_);
1411
- return NULL;
1412
- }
1413
- assert(typecode == NULL);
1414
- if (!PyGpuNdArray_Check(self_)){
1415
- PyErr_SetString(PyExc_TypeError,
1416
- "PyGpuNdArray_as_f_contiguous:"
1417
- " PyGpuNdArrayObject required");
1418
- return NULL;
1419
- }
1420
-
1421
- PyGpuNdArrayObject *self = (PyGpuNdArrayObject*)self_;
1422
- if (PyGpuNdArray_is_f_contiguous(self)){
1423
- Py_INCREF(self);
1424
- if (PyGpuNdArray_NDIM(self) == 0){
1425
- //numpy.ascontiguous() always return object with 1d.
1426
- PyObject * rval = PyGpuNdArray_View(self);
1427
- if (!rval)
1428
- return NULL;
1429
- PyGpuNdArray_set_nd((PyGpuNdArrayObject*)rval, 1);
1430
- PyGpuNdArray_DIM(rval, 0) = 1;
1431
- PyGpuNdArray_STRIDE(rval, 0) = PyGpuNdArray_ITEMSIZE(rval);
1432
- DPRINTF("PyGpuNdArray_as_f_contiguous: upcast to 1d tensor end\n");
1433
- return rval;
1434
- }
1435
- DPRINTF("PyGpuNdArray_as_f_contiguous: no copy end\n");
1436
- return (PyObject*)self;
1437
- }
1438
-
1439
- PyObject * ret = PyGpuNdArray_Copy(self, NPY_FORTRANORDER);
1440
- DPRINTF("PyGpuNdArray_as_f_contiguous: copy end\n");
1441
- return ret;
1442
- }
1443
-
1444
- #ifdef WITH_OPENCL
1445
- #ifdef __APPLE__
1446
- #include <OpenCL/opencl.h>
1447
- #else
1448
- #include <CL/opencl.h>
1449
- #endif
1450
- extern void setup_context(cl_context c);
1451
-
1452
- PyObject *
1453
- PyGpuNdArray_set_opencl_context(PyObject *mod, PyObject *ctx) {
1454
- Py_ssize_t v;
1455
-
1456
- v = PyInt_AsSsize_t(ctx);
1457
- if (v == -1 && PyErr_Occurred())
1458
- return NULL;
1459
-
1460
- setup_context((cl_context)v);
1461
-
1462
- Py_INCREF(Py_None);
1463
- return Py_None;
1464
- }
1465
- #endif
1466
-
1467
- static PyMethodDef module_methods[] = {
1468
- //{"dimshuffle", PyGpuNdArray_Dimshuffle, METH_VARARGS, "Returns the dimshuffle of a PyGpuNdArray."},
1469
- {"outstanding_mallocs", outstanding_mallocs, METH_VARARGS, "how many more mallocs have been called than free's"},
1470
- {"zeros",
1471
- (PyCFunction)PyGpuNdArray_zeros, METH_VARARGS|METH_KEYWORDS,
1472
- "Create a new PyGpuNdArray with specified shape, filled with zeros."},
1473
- {"empty",
1474
- (PyCFunction)PyGpuNdArray_empty, METH_VARARGS|METH_KEYWORDS,
1475
- "Create a new PyGpuNdArray with specified shape, filled with zeros."},
1476
- {"ascontiguousarray",
1477
- (PyCFunction)PyGpuNdArray_as_c_contiguous, METH_VARARGS|METH_KEYWORDS,
1478
- "If the array is not c contiguous, copy it to a new c contiguous region."},
1479
- {"asfortranarray",
1480
- (PyCFunction)PyGpuNdArray_as_f_contiguous, METH_VARARGS|METH_KEYWORDS,
1481
- "If the array is not f contiguous, copy it to a new c contiguous region."},
1482
- #ifdef WITH_OPENCL
1483
- {"set_opencl_context",
1484
- PyGpuNdArray_set_opencl_context, METH_O,
1485
- "Set the OpenCL context to use for allocations and work."},
1486
- #endif
1487
- {NULL, NULL, NULL, NULL} /* Sentinel */
1488
- };
1489
-
1490
- #ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
1491
- #define PyMODINIT_FUNC void
1492
- #endif
1493
- PyMODINIT_FUNC
1494
- initpygpu_ndarray(void)
1495
- {
1496
- import_array();
1497
-
1498
- PyObject* m;
1499
-
1500
- if (PyType_Ready(&PyGpuNdArrayType) < 0)
1501
- return;
1502
-
1503
- m = Py_InitModule3("pygpu_ndarray", module_methods,
1504
- "Example module that creates an extension type.");
1505
-
1506
- if (m == NULL)
1507
- return;
1508
-
1509
- Py_INCREF(&PyGpuNdArrayType);
1510
- PyModule_AddObject(m, "GpuNdArrayObject", (PyObject *)&PyGpuNdArrayType);
1511
- #if COMPUTE_GPU_MEM_USED
1512
- for(int i=0;i<TABLE_SIZE;i++){
1513
- _alloc_size_table[i].ptr=NULL;
1514
- _alloc_size_table[i].size=0;
1515
- }
1516
- #endif
1517
- // cublasInit();
1518
- //if (0&&CUBLAS_STATUS_SUCCESS != cublasGetError())
1519
- //{
1520
- //std::cerr << "WARNING: initcuda_ndarray: error initializing device\n";
1521
- //}
1522
- /*
1523
- if (0) //TODO: is this necessary?
1524
- {
1525
- int deviceId = 0; // TODO: what number goes here?
1526
- cudaSetDevice(deviceId);
1527
- cudaError_t err = cudaGetLastError();
1528
- if( cudaSuccess != err)
1529
- {
1530
- std::cerr << "Error in SetDevice:" << cudaGetErrorString(err) << "\n";
1531
- }
1532
- }
1533
- */
1534
- }
1535
-
1536
- /*
1537
- Local Variables:
1538
- mode:c++
1539
- c-basic-offset:4
1540
- c-file-style:"stroustrup"
1541
- c-file-offsets:((innamespace . 0)(inline-open . 0))
1542
- indent-tabs-mode:nil
1543
- fill-column:79
1544
- End:
1545
- */
1546
- // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:textwidth=79 :