pyopencl 2024.2.7__cp311-cp311-macosx_11_0_arm64.whl → 2025.1__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +127 -122
- pyopencl/_cl.cpython-311-darwin.so +0 -0
- pyopencl/_mymako.py +3 -3
- pyopencl/algorithm.py +10 -7
- pyopencl/array.py +58 -123
- pyopencl/bitonic_sort.py +3 -1
- pyopencl/bitonic_sort_templates.py +1 -1
- pyopencl/cache.py +23 -22
- pyopencl/capture_call.py +5 -4
- pyopencl/clrandom.py +1 -0
- pyopencl/cltypes.py +2 -2
- pyopencl/compyte/dtypes.py +4 -4
- pyopencl/compyte/pyproject.toml +54 -0
- pyopencl/elementwise.py +9 -2
- pyopencl/invoker.py +11 -9
- pyopencl/ipython_ext.py +1 -1
- pyopencl/reduction.py +16 -10
- pyopencl/scan.py +38 -22
- pyopencl/tools.py +23 -13
- pyopencl/version.py +1 -1
- {pyopencl-2024.2.7.dist-info → pyopencl-2025.1.dist-info}/METADATA +11 -8
- pyopencl-2025.1.dist-info/RECORD +42 -0
- {pyopencl-2024.2.7.dist-info → pyopencl-2025.1.dist-info}/WHEEL +1 -1
- pyopencl/compyte/.git +0 -1
- pyopencl/compyte/ndarray/Makefile +0 -31
- pyopencl/compyte/ndarray/__init__.py +0 -0
- pyopencl/compyte/ndarray/gen_elemwise.py +0 -1907
- pyopencl/compyte/ndarray/gen_reduction.py +0 -1511
- pyopencl/compyte/ndarray/gpu_ndarray.h +0 -35
- pyopencl/compyte/ndarray/pygpu_language.h +0 -207
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +0 -622
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +0 -317
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +0 -1546
- pyopencl/compyte/ndarray/pygpu_ndarray.h +0 -71
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +0 -232
- pyopencl/compyte/ndarray/setup_opencl.py +0 -101
- pyopencl/compyte/ndarray/test_gpu_elemwise.py +0 -411
- pyopencl/compyte/ndarray/test_gpu_ndarray.py +0 -487
- pyopencl-2024.2.7.dist-info/RECORD +0 -56
- {pyopencl-2024.2.7.dist-info → pyopencl-2025.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,1546 +0,0 @@
|
|
|
1
|
-
#include <Python.h>
|
|
2
|
-
#include <structmember.h>
|
|
3
|
-
|
|
4
|
-
#include <numpy/arrayobject.h>
|
|
5
|
-
#include <iostream>
|
|
6
|
-
|
|
7
|
-
#include "pygpu_ndarray.h"
|
|
8
|
-
#include "pygpu_language.h"
|
|
9
|
-
|
|
10
|
-
/////////////////////////
|
|
11
|
-
// Static helper methods
|
|
12
|
-
/////////////////////////
|
|
13
|
-
|
|
14
|
-
static void
|
|
15
|
-
PyGpuNdArray_null_init(PyGpuNdArrayObject *self)
|
|
16
|
-
{
|
|
17
|
-
DPRINTF("PyGpuNdArrayObject_null_init\n");
|
|
18
|
-
|
|
19
|
-
PyGpuNdArray_DATA(self) = NULL;
|
|
20
|
-
PyGpuNdArray_OFFSET(self) = 0;
|
|
21
|
-
PyGpuNdArray_NDIM(self) = -1;
|
|
22
|
-
self->base = NULL;
|
|
23
|
-
PyGpuNdArray_DIMS(self) = NULL;
|
|
24
|
-
PyGpuNdArray_STRIDES(self) = NULL;
|
|
25
|
-
PyGpuNdArray_FLAGS(self) = NPY_DEFAULT;
|
|
26
|
-
self->descr = NULL;
|
|
27
|
-
|
|
28
|
-
self->data_allocated = 0;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
/////////////////////////////
|
|
34
|
-
// Satisfying reqs to be Type
|
|
35
|
-
/////////////////////////////
|
|
36
|
-
|
|
37
|
-
//DON'T use directly(if their is other PyGpuNdArrayObject that point to it, it will cause problem)! use Py_DECREF() instead
|
|
38
|
-
static void
|
|
39
|
-
PyGpuNdArrayObject_dealloc(PyGpuNdArrayObject* self)
|
|
40
|
-
{
|
|
41
|
-
DPRINTF("PyGpuNdArrayObject_dealloc\n");
|
|
42
|
-
DPRINTF("PyGpuNdArrayObject dealloc %p %d %p\n", self, self->data_allocated, PyGpuNdArray_DATA(self));
|
|
43
|
-
|
|
44
|
-
if(self->ob_refcnt>1)
|
|
45
|
-
printf("WARNING:PyGpuNdArrayObject_dealloc called when their is still active reference to it.\n");
|
|
46
|
-
|
|
47
|
-
if (self->data_allocated){
|
|
48
|
-
assert(PyGpuNdArray_DATA(self));
|
|
49
|
-
if (PyGpuNdArray_DATA(self)){
|
|
50
|
-
if (device_free(PyGpuNdArray_DATA(self))){
|
|
51
|
-
fprintf(stderr,
|
|
52
|
-
"!!!! error freeing device memory %p (self=%p)\n",
|
|
53
|
-
PyGpuNdArray_DATA(self), self);
|
|
54
|
-
}
|
|
55
|
-
PyGpuNdArray_DATA(self) = NULL;
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
PyGpuNdArray_OFFSET(self) = 0;
|
|
59
|
-
PyGpuNdArray_NDIM(self) = -1;
|
|
60
|
-
Py_XDECREF(self->base);
|
|
61
|
-
self->base = NULL;
|
|
62
|
-
if (PyGpuNdArray_DIMS(self)){
|
|
63
|
-
free(PyGpuNdArray_DIMS(self));
|
|
64
|
-
PyGpuNdArray_DIMS(self) = NULL;
|
|
65
|
-
}
|
|
66
|
-
if (PyGpuNdArray_STRIDES(self)){
|
|
67
|
-
free(PyGpuNdArray_STRIDES(self));
|
|
68
|
-
PyGpuNdArray_STRIDES(self) = NULL;
|
|
69
|
-
}
|
|
70
|
-
PyGpuNdArray_FLAGS(self) = NPY_DEFAULT;
|
|
71
|
-
//Py_XDECREF(self->descr);//TODO: How to handle the refcont on this object?
|
|
72
|
-
self->descr = NULL;
|
|
73
|
-
self->data_allocated = 0;
|
|
74
|
-
|
|
75
|
-
self->ob_type->tp_free((PyObject*)self);
|
|
76
|
-
--_outstanding_mallocs[1];
|
|
77
|
-
DPRINTF("device_malloc_counts: (device) %i (obj) %i\n",
|
|
78
|
-
_outstanding_mallocs[0],
|
|
79
|
-
_outstanding_mallocs[1]);
|
|
80
|
-
DPRINTF("PyGpuNdArrayObject_dealloc end\n");
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
static PyObject *
|
|
84
|
-
PyGpuNdArray_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|
85
|
-
{
|
|
86
|
-
DPRINTF("PyGpuNdArray_new\n");
|
|
87
|
-
PyGpuNdArrayObject *self;
|
|
88
|
-
|
|
89
|
-
self = (PyGpuNdArrayObject *)type->tp_alloc(type, 0);
|
|
90
|
-
if (self != NULL){
|
|
91
|
-
PyGpuNdArray_null_init(self);
|
|
92
|
-
++_outstanding_mallocs[1];
|
|
93
|
-
}
|
|
94
|
-
DPRINTF("PyGpuNdArray_new end %p\n", self);
|
|
95
|
-
return (PyObject *)self;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
static int
|
|
99
|
-
PyGpuNdArray_init(PyGpuNdArrayObject *self, PyObject *args, PyObject *kwds)
|
|
100
|
-
{
|
|
101
|
-
DPRINTF("PyGpuNdArray_init\n");
|
|
102
|
-
PyObject *arr=NULL;
|
|
103
|
-
|
|
104
|
-
if (! PyArg_ParseTuple(args, "O", &arr))
|
|
105
|
-
return -1;
|
|
106
|
-
if (! PyArray_Check(arr)){
|
|
107
|
-
PyErr_SetString(PyExc_TypeError, "PyGpuNdArrayObject_init: PyArray or PyGpuNdArrayObject arg required");
|
|
108
|
-
return -1;
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
// TODO: We must create a new copy of the PyArray_Descr(or this only increment the refcount?) or still the reference?
|
|
112
|
-
PyArray_Descr * type = PyArray_DescrFromType(PyArray_TYPE(arr));
|
|
113
|
-
self->descr = type;
|
|
114
|
-
Py_XINCREF(self->descr);//TODO: How to handle the refcont on this object?
|
|
115
|
-
int rval = PyGpuNdArray_CopyFromArray(self, (PyArrayObject*)arr);
|
|
116
|
-
DPRINTF("PyGpuNdArray_init: end %p type=%p\n", self, self->descr);
|
|
117
|
-
return rval;
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
int
|
|
122
|
-
PyGpuNdArray_CopyFromArray(PyGpuNdArrayObject * self, PyArrayObject*obj)
|
|
123
|
-
{
|
|
124
|
-
DPRINTF("PyGpuNdArray_CopyFromArray: start descr=%p\n", self->descr);
|
|
125
|
-
//modif done to the new array won't be updated!
|
|
126
|
-
assert(!PyGpuNdArray_CHKFLAGS(self, NPY_UPDATEIFCOPY));
|
|
127
|
-
//Aligned are not tested, so don't allow it for now
|
|
128
|
-
assert(PyGpuNdArray_CHKFLAGS(self, NPY_ALIGNED));
|
|
129
|
-
|
|
130
|
-
int typenum = PyArray_TYPE(obj);
|
|
131
|
-
PyObject * py_src = NULL;
|
|
132
|
-
if (PyArray_ISONESEGMENT(obj)) {
|
|
133
|
-
Py_INCREF(obj);
|
|
134
|
-
py_src = (PyObject *) obj;
|
|
135
|
-
}else{
|
|
136
|
-
py_src = PyArray_ContiguousFromAny((PyObject*)obj, typenum,
|
|
137
|
-
PyArray_NDIM(obj),
|
|
138
|
-
PyArray_NDIM(obj));
|
|
139
|
-
}
|
|
140
|
-
DPRINTF("PyGpuNdArray_CopyFromArray: contiguous!\n");
|
|
141
|
-
if (!py_src) {
|
|
142
|
-
return -1;
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
int err;
|
|
146
|
-
if(PyArray_ISFORTRAN(obj) && ! PyArray_ISCONTIGUOUS(obj)){
|
|
147
|
-
DPRINTF("PyGpuNdArray_CopyFromArray: fortran!\n");
|
|
148
|
-
err = PyGpuNdArray_alloc_contiguous(self, obj->nd, obj->dimensions,
|
|
149
|
-
NPY_FORTRANORDER);
|
|
150
|
-
}else{
|
|
151
|
-
err = PyGpuNdArray_alloc_contiguous(self, obj->nd, obj->dimensions);
|
|
152
|
-
}
|
|
153
|
-
if (err) {
|
|
154
|
-
return err;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
//check that the flag are the same
|
|
158
|
-
if (PyArray_ISCONTIGUOUS(py_src) != PyGpuNdArray_ISCONTIGUOUS(self) &&
|
|
159
|
-
PyArray_ISFORTRAN(obj) && 0) {
|
|
160
|
-
PyErr_Format(PyExc_RuntimeError, "ISCONTIGUOUS %d %d\n", PyArray_ISCONTIGUOUS(py_src), PyGpuNdArray_ISCONTIGUOUS(self));
|
|
161
|
-
return -1;
|
|
162
|
-
}
|
|
163
|
-
assert(PyArray_ISCONTIGUOUS(py_src) == PyGpuNdArray_ISCONTIGUOUS(self) ||
|
|
164
|
-
PyArray_ISFORTRAN(obj));
|
|
165
|
-
assert(PyArray_ISFORTRAN(py_src) == PyGpuNdArray_ISFORTRAN(self));
|
|
166
|
-
assert(PyArray_ISALIGNED(py_src) == PyGpuNdArray_ISALIGNED(self));
|
|
167
|
-
|
|
168
|
-
// New memory, so we should own it.
|
|
169
|
-
assert(PyGpuNdArray_CHKFLAGS(self, NPY_OWNDATA));
|
|
170
|
-
// New memory, so it should be writable
|
|
171
|
-
assert(PyGpuNdArray_ISWRITEABLE(self));
|
|
172
|
-
|
|
173
|
-
err = PyGpuMemcpy(PyGpuNdArray_DATA(self),
|
|
174
|
-
PyArray_DATA(py_src),
|
|
175
|
-
PyGpuNdArray_OFFSET(self),
|
|
176
|
-
PyArray_SIZE(py_src) * PyArray_ITEMSIZE(py_src),
|
|
177
|
-
PyGpuHostToDevice);
|
|
178
|
-
if (err) {
|
|
179
|
-
Py_DECREF(py_src);
|
|
180
|
-
return -1;
|
|
181
|
-
}
|
|
182
|
-
Py_DECREF(py_src);
|
|
183
|
-
DPRINTF("PyGpuNdArray_CopyFromArray: end\n");
|
|
184
|
-
return 0;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
static PyObject * PyGpuNdArray_copy(PyObject * self, PyObject *args,
|
|
188
|
-
PyObject *kargs)
|
|
189
|
-
{
|
|
190
|
-
DPRINTF("PyGpuNdArray_copy start\n");
|
|
191
|
-
static const char *kwlist[] = {"order", NULL};
|
|
192
|
-
NPY_ORDER order = PyArray_CORDER;
|
|
193
|
-
|
|
194
|
-
if(!PyGpuNdArray_Check(self)){
|
|
195
|
-
PyErr_SetString(PyExc_ValueError, "PyGpuNdArray_copy: expected a PyGpuNdArrayObject.");
|
|
196
|
-
return NULL;
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
DPRINTF("PyGpuNdArray_copy before parse inputs\n");
|
|
200
|
-
if (!PyArg_ParseTupleAndKeywords(args, kargs, "|O&",
|
|
201
|
-
(char**)kwlist,
|
|
202
|
-
PyArray_OrderConverter,
|
|
203
|
-
&order)) {
|
|
204
|
-
DPRINTF("PyGpuNdArray_copy start1.2\n");
|
|
205
|
-
return NULL;
|
|
206
|
-
}
|
|
207
|
-
DPRINTF("PyGpuNdArray_copy after parse inputs\n");
|
|
208
|
-
|
|
209
|
-
DPRINTF("PyGpuNdArray_copy before copy\n");
|
|
210
|
-
PyObject *ret = PyGpuNdArray_Copy((PyGpuNdArrayObject*)self, order);
|
|
211
|
-
DPRINTF("PyGpuNdArray_copy end\n");
|
|
212
|
-
return ret;
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
static PyObject * PyGpuNdArray_Copy(PyGpuNdArrayObject * self, NPY_ORDER order)
|
|
216
|
-
{
|
|
217
|
-
DPRINTF("PyGpuNdArray_Copy start\n");
|
|
218
|
-
PyObject * rval = PyGpuNdArray_New();
|
|
219
|
-
//TODO find how to refcount descr.
|
|
220
|
-
PyGpuNdArray_DESCR(rval) = PyGpuNdArray_DESCR(self);
|
|
221
|
-
if ((!rval) || (-1 == PyGpuNdArray_NDIM(self))) {
|
|
222
|
-
return rval;
|
|
223
|
-
}
|
|
224
|
-
if (PyGpuNdArray_alloc_contiguous((PyGpuNdArrayObject*)rval,
|
|
225
|
-
PyGpuNdArray_NDIM(self),
|
|
226
|
-
PyGpuNdArray_DIMS(self),
|
|
227
|
-
order)) {
|
|
228
|
-
Py_DECREF(rval);
|
|
229
|
-
return NULL;
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
if (PyGpuNdArray_CopyFromPyGpuNdArray((PyGpuNdArrayObject*)rval, self)) {
|
|
233
|
-
Py_DECREF(rval);
|
|
234
|
-
return NULL;
|
|
235
|
-
}
|
|
236
|
-
if (order == NPY_F_CONTIGUOUS)
|
|
237
|
-
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
238
|
-
|
|
239
|
-
#ifdef DEBUG
|
|
240
|
-
PyGpuNdArray_fprint(stderr, self);
|
|
241
|
-
PyGpuNdArray_fprint(stderr, (PyGpuNdArrayObject *)rval);
|
|
242
|
-
#endif
|
|
243
|
-
DPRINTF("PyGpuNdArray_Copy end\n");
|
|
244
|
-
return rval;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
PyObject * PyGpuNdArray_DeepCopy(PyGpuNdArrayObject * self, PyObject * memo)
|
|
248
|
-
{
|
|
249
|
-
assert(PyDict_Check(memo));
|
|
250
|
-
PyObject * selfkey = PyInt_FromLong((long)self);
|
|
251
|
-
assert(selfkey);
|
|
252
|
-
|
|
253
|
-
if (PyDict_Contains(memo, selfkey)) {
|
|
254
|
-
PyObject * rval = PyDict_GetItem(memo, selfkey);
|
|
255
|
-
Py_DECREF(selfkey);
|
|
256
|
-
Py_XINCREF(rval);
|
|
257
|
-
return rval;
|
|
258
|
-
} else {
|
|
259
|
-
DPRINTF("PyGpuNdArray_DeepCopy: startd deepcopy\n");
|
|
260
|
-
PyObject * rval = PyGpuNdArray_Copy(self);
|
|
261
|
-
if (NULL == rval) {
|
|
262
|
-
Py_DECREF(selfkey);
|
|
263
|
-
return NULL;
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
DPRINTF("DeepCopy created %p\n", rval);
|
|
267
|
-
DPRINTF("DeepCopy created %p %p\n", PyGpuNdArray_DESCR(rval), PyGpuNdArray_DATA(rval));
|
|
268
|
-
if (PyDict_SetItem(memo, selfkey, rval)) {
|
|
269
|
-
Py_DECREF(rval);
|
|
270
|
-
Py_DECREF(selfkey);
|
|
271
|
-
return NULL;
|
|
272
|
-
}
|
|
273
|
-
Py_DECREF(selfkey);
|
|
274
|
-
DPRINTF("PyGpuNdArray_DeepCopy: startd end\n");
|
|
275
|
-
return rval;
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
PyObject * PyGpuNdArray_View(PyGpuNdArrayObject * self)
|
|
280
|
-
{
|
|
281
|
-
PyGpuNdArrayObject * rval = (PyGpuNdArrayObject*)PyGpuNdArray_New(PyGpuNdArray_NDIM(self));
|
|
282
|
-
if (!rval || PyGpuNdArray_set_data(rval, PyGpuNdArray_DATA(self),
|
|
283
|
-
(PyObject *)self, PyGpuNdArray_OFFSET(self))) {
|
|
284
|
-
Py_XDECREF(rval);
|
|
285
|
-
DPRINTF("PyGpuNdArray_View: no rval or PyGpuNdArray_set_data "
|
|
286
|
-
"failed: self=%p, rval=%p rval_base=%p\n",
|
|
287
|
-
self, rval, rval->base);
|
|
288
|
-
return NULL;
|
|
289
|
-
} else {
|
|
290
|
-
for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i) {
|
|
291
|
-
PyGpuNdArray_DIM(rval, i) = PyGpuNdArray_DIMS(self)[i];
|
|
292
|
-
PyGpuNdArray_STRIDE(rval, i) = PyGpuNdArray_STRIDES(self)[i];
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
DPRINTF("PyGpuNdArray_View: self=%p, self->base=%p"
|
|
296
|
-
" rval=%p rval->base=%p\n",
|
|
297
|
-
self, self->base, rval, rval->base);
|
|
298
|
-
//TODO: find how to refcount on the descr!
|
|
299
|
-
//Py_INCREF(PyGpuNdArray_DESCR(self));
|
|
300
|
-
PyGpuNdArray_DESCR(rval) = PyGpuNdArray_DESCR(self);
|
|
301
|
-
PyGpuNdArray_FLAGS(rval) = PyGpuNdArray_FLAGS(self);
|
|
302
|
-
PyGpuNdArray_FLAGS(rval) &= ~NPY_OWNDATA;
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
return (PyObject*)rval;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
//updated for offset
|
|
309
|
-
PyObject * PyGpuNdArray_CreateArrayObj(PyGpuNdArrayObject * self)
|
|
310
|
-
{
|
|
311
|
-
DPRINTF("PyGpuNdArray_CreateArrayObj\n");
|
|
312
|
-
|
|
313
|
-
if(PyGpuNdArray_NDIM(self)>=0 && PyGpuNdArray_SIZE(self)==0){
|
|
314
|
-
npy_intp * npydims = (npy_intp*)malloc(PyGpuNdArray_NDIM(self) * sizeof(npy_intp));
|
|
315
|
-
assert (npydims);
|
|
316
|
-
for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i)
|
|
317
|
-
npydims[i] = (npy_intp)(PyGpuNdArray_DIMS(self)[i]);
|
|
318
|
-
|
|
319
|
-
// Numpy will do a decref on the description.
|
|
320
|
-
Py_INCREF(PyGpuNdArray_DESCR(self));
|
|
321
|
-
|
|
322
|
-
// We can't use PyArray_{Empty,EMPTY} as they segfault when size == 0
|
|
323
|
-
PyObject * rval = PyArray_NewFromDescr(&PyArray_Type,
|
|
324
|
-
PyGpuNdArray_DESCR(self),
|
|
325
|
-
PyGpuNdArray_NDIM(self),
|
|
326
|
-
npydims,
|
|
327
|
-
NULL,
|
|
328
|
-
NULL,
|
|
329
|
-
0,
|
|
330
|
-
NULL);
|
|
331
|
-
|
|
332
|
-
free(npydims);
|
|
333
|
-
if (!rval){
|
|
334
|
-
return NULL;
|
|
335
|
-
}
|
|
336
|
-
assert (PyArray_ITEMSIZE(rval) == PyGpuNdArray_ITEMSIZE(self));
|
|
337
|
-
return rval;
|
|
338
|
-
}
|
|
339
|
-
if ((PyGpuNdArray_NDIM(self) < 0) || (PyGpuNdArray_DATA(self) == 0)) {
|
|
340
|
-
PyErr_SetString(PyExc_ValueError, "can't copy from un-initialized PyGpuNdArray");
|
|
341
|
-
return NULL;
|
|
342
|
-
}
|
|
343
|
-
PyGpuNdArrayObject * contiguous_self = NULL;
|
|
344
|
-
bool pos_stride = true;
|
|
345
|
-
for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i)
|
|
346
|
-
if (PyGpuNdArray_STRIDE(self,i)<0)
|
|
347
|
-
pos_stride = false;
|
|
348
|
-
if (PyGpuNdArray_ISONESEGMENT(self) && pos_stride) {
|
|
349
|
-
contiguous_self = self;
|
|
350
|
-
Py_INCREF(contiguous_self);
|
|
351
|
-
DPRINTF("PyGpuNdArray_CreateArrayObj: gpu array already contiguous %p\n", contiguous_self);
|
|
352
|
-
//}else if(PyGpuNdArray_ISONESEGMENT(self)){
|
|
353
|
-
//TODO implement special object handling to speed up transfer
|
|
354
|
-
// DPRINTF("CreateArrayObj one segment, with special handling %p\n", contiguous_self);
|
|
355
|
-
//PyErr_SetString(PyExc_ValueError, "PyGpuNdArray_CreateArrayObj: Need PyGpuNdArray_Copy or some other nd array mandling to transfer contiguous bloc with negative stride.");
|
|
356
|
-
//return NULL;
|
|
357
|
-
} else {
|
|
358
|
-
contiguous_self = (PyGpuNdArrayObject*)PyGpuNdArray_Copy(self);
|
|
359
|
-
DPRINTF("CreateArrayObj created contiguous %p\n", contiguous_self);
|
|
360
|
-
}
|
|
361
|
-
if (!contiguous_self) {
|
|
362
|
-
return NULL;
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
npy_intp * npydims = (npy_intp*)malloc(PyGpuNdArray_NDIM(self) * sizeof(npy_intp));
|
|
366
|
-
assert (npydims);
|
|
367
|
-
for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i) npydims[i] = (npy_intp)(PyGpuNdArray_DIMS(self)[i]);
|
|
368
|
-
Py_INCREF(PyGpuNdArray_DESCR(self));
|
|
369
|
-
PyObject * rval = PyArray_Empty(PyGpuNdArray_NDIM(self),
|
|
370
|
-
npydims,
|
|
371
|
-
PyGpuNdArray_DESCR(self),
|
|
372
|
-
PyGpuNdArray_ISFORTRAN(self));
|
|
373
|
-
free(npydims);
|
|
374
|
-
if (!rval) {
|
|
375
|
-
Py_DECREF(contiguous_self);
|
|
376
|
-
return NULL;
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
int err = PyGpuMemcpy(PyArray_DATA(rval),
|
|
380
|
-
PyGpuNdArray_DATA(contiguous_self),
|
|
381
|
-
PyGpuNdArray_OFFSET(contiguous_self),
|
|
382
|
-
PyArray_SIZE(rval) * PyArray_ITEMSIZE(rval),
|
|
383
|
-
PyGpuDeviceToHost);
|
|
384
|
-
if (err) {
|
|
385
|
-
Py_DECREF(contiguous_self);
|
|
386
|
-
Py_DECREF(rval);
|
|
387
|
-
rval = NULL;
|
|
388
|
-
}
|
|
389
|
-
Py_DECREF(contiguous_self);
|
|
390
|
-
return rval;
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
static PyObject *
|
|
394
|
-
PyGpuNdArray_Empty(int nd, npy_intp* dims, PyArray_Descr* dtype, int fortran)
|
|
395
|
-
{
|
|
396
|
-
DPRINTF("PyGpuNdArray_Empty: start!\n");
|
|
397
|
-
PyGpuNdArrayObject* rval = (PyGpuNdArrayObject*)PyGpuNdArray_New();
|
|
398
|
-
PyGpuNdArray_DESCR(rval) = dtype;
|
|
399
|
-
if (!rval) {
|
|
400
|
-
DPRINTF("PyGpuNdArray_Empty: fail!\n");
|
|
401
|
-
return NULL;
|
|
402
|
-
}
|
|
403
|
-
NPY_ORDER order = NPY_CORDER;
|
|
404
|
-
if (fortran!=0)
|
|
405
|
-
order = NPY_FORTRANORDER;
|
|
406
|
-
|
|
407
|
-
if (PyGpuNdArray_alloc_contiguous(rval, nd, dims, order)) {
|
|
408
|
-
Py_DECREF(rval);
|
|
409
|
-
return NULL;
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
DPRINTF("PyGpuNdArray_Empty: end!\n");
|
|
413
|
-
return (PyObject*) rval;
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
//DONE: dtype, offset not needed, flags
|
|
417
|
-
static PyObject *
|
|
418
|
-
PyGpuNdArray_Zeros(int nd, npy_intp* dims, PyArray_Descr* dtype, int fortran)
|
|
419
|
-
{
|
|
420
|
-
DPRINTF("PyGpuNdArray_Zeros: start!\n");
|
|
421
|
-
PyObject * rval = PyGpuNdArray_Empty(nd, dims, dtype, fortran);
|
|
422
|
-
if (!rval) {
|
|
423
|
-
return rval;
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
int total_elements = 1;
|
|
427
|
-
for(int i=0;i<nd;i++)
|
|
428
|
-
total_elements*=dims[i];
|
|
429
|
-
|
|
430
|
-
// total_elements now contains the size of the array, in reals
|
|
431
|
-
int total_size = total_elements * dtype->elsize;
|
|
432
|
-
|
|
433
|
-
// Fill with zeros
|
|
434
|
-
int err = PyGpuMemset(PyGpuNdArray_DATA(rval), 0, total_size);
|
|
435
|
-
if (err) {
|
|
436
|
-
Py_DECREF(rval);
|
|
437
|
-
return NULL;
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
DPRINTF("PyGpuNdArray_Zeros: end!\n");
|
|
441
|
-
return (PyObject*) rval;
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
// declared as a static method (hence "dummy" is not used)
|
|
445
|
-
// numpy.zeros(shape, dtype=float, order='C')
|
|
446
|
-
static PyObject *
|
|
447
|
-
PyGpuNdArray_zeros(PyObject* dummy, PyObject* args, PyObject *kargs)
|
|
448
|
-
{
|
|
449
|
-
static const char *kwlist[] = {"shape","dtype","order",NULL}; /* XXX ? */
|
|
450
|
-
PyArray_Descr *typecode = NULL;
|
|
451
|
-
PyObject * shape = NULL;
|
|
452
|
-
NPY_ORDER order = PyArray_CORDER;
|
|
453
|
-
bool fortran = false;
|
|
454
|
-
PyObject *ret = NULL;
|
|
455
|
-
|
|
456
|
-
if (!PyArg_ParseTupleAndKeywords(args, kargs, "O|O&O&",
|
|
457
|
-
(char**)kwlist,
|
|
458
|
-
&shape,
|
|
459
|
-
PyArray_DescrConverter,
|
|
460
|
-
&typecode,
|
|
461
|
-
PyArray_OrderConverter,
|
|
462
|
-
&order)) {
|
|
463
|
-
Py_XDECREF(typecode);
|
|
464
|
-
Py_XDECREF(shape);
|
|
465
|
-
return ret;
|
|
466
|
-
}
|
|
467
|
-
if (order == PyArray_FORTRANORDER) {
|
|
468
|
-
fortran = true;
|
|
469
|
-
}
|
|
470
|
-
else {
|
|
471
|
-
fortran = false;
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
if(!PySequence_Check(shape))
|
|
475
|
-
{
|
|
476
|
-
PyErr_SetString(PyExc_TypeError, "shape argument must be a sequence");
|
|
477
|
-
return NULL;
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
if (!typecode)
|
|
481
|
-
typecode = PyArray_DescrFromType(NPY_FLOAT64);
|
|
482
|
-
|
|
483
|
-
int shplen = PySequence_Length(shape);
|
|
484
|
-
|
|
485
|
-
if (shplen == 0)
|
|
486
|
-
{
|
|
487
|
-
return PyGpuNdArray_Zeros(0, NULL, typecode, fortran);
|
|
488
|
-
}
|
|
489
|
-
|
|
490
|
-
npy_intp* newdims = (npy_intp *)malloc(sizeof(npy_intp) * shplen);
|
|
491
|
-
|
|
492
|
-
if (!newdims)
|
|
493
|
-
{
|
|
494
|
-
PyErr_SetString(PyExc_MemoryError,
|
|
495
|
-
"PyGpuNdArray_Zeros: Failed to allocate temporary space");
|
|
496
|
-
return NULL;
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
// start from the end to compute strides
|
|
500
|
-
for (int i = shplen-1; i >= 0; --i)
|
|
501
|
-
{
|
|
502
|
-
PyObject* shp_el_obj = PySequence_GetItem(shape, i);
|
|
503
|
-
if(shp_el_obj == NULL)
|
|
504
|
-
{
|
|
505
|
-
// shouldn't happen since we checked length before...
|
|
506
|
-
PyErr_SetString(PyExc_RuntimeError, "PyGpuNdArray_Zeros: Index out of bound in sequence");
|
|
507
|
-
free(newdims);
|
|
508
|
-
return NULL;
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
int shp_el = PyInt_AsLong(shp_el_obj);
|
|
512
|
-
Py_DECREF(shp_el_obj);
|
|
513
|
-
|
|
514
|
-
newdims[i] = shp_el;
|
|
515
|
-
}
|
|
516
|
-
|
|
517
|
-
PyObject* rval = PyGpuNdArray_Zeros(shplen, newdims, typecode, fortran);
|
|
518
|
-
|
|
519
|
-
free(newdims);
|
|
520
|
-
|
|
521
|
-
return (PyObject*)rval;
|
|
522
|
-
}
|
|
523
|
-
|
|
524
|
-
// declared as a static method (hence "dummy" is not used)
|
|
525
|
-
// numpy.empty(shape, dtype=float, order='C')
|
|
526
|
-
static PyObject *
|
|
527
|
-
PyGpuNdArray_empty(PyObject* dummy, PyObject* args, PyObject *kargs)
|
|
528
|
-
{
|
|
529
|
-
static const char *kwlist[] = {"shape","dtype","order",NULL}; /* XXX ? */
|
|
530
|
-
PyArray_Descr *typecode = NULL;
|
|
531
|
-
PyObject * shape = NULL;
|
|
532
|
-
NPY_ORDER order = PyArray_CORDER;
|
|
533
|
-
bool fortran = false;
|
|
534
|
-
PyObject *ret = NULL;
|
|
535
|
-
|
|
536
|
-
if (!PyArg_ParseTupleAndKeywords(args, kargs, "O|O&O&",
|
|
537
|
-
(char **)kwlist,
|
|
538
|
-
&shape,
|
|
539
|
-
PyArray_DescrConverter,
|
|
540
|
-
&typecode,
|
|
541
|
-
PyArray_OrderConverter,
|
|
542
|
-
&order)) {
|
|
543
|
-
Py_XDECREF(typecode);
|
|
544
|
-
Py_XDECREF(shape);
|
|
545
|
-
return ret;
|
|
546
|
-
}
|
|
547
|
-
if (order == PyArray_FORTRANORDER) {
|
|
548
|
-
fortran = true;
|
|
549
|
-
}
|
|
550
|
-
else {
|
|
551
|
-
fortran = false;
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
if(!PySequence_Check(shape))
|
|
555
|
-
{
|
|
556
|
-
PyErr_SetString(PyExc_TypeError, "shape argument must be a sequence");
|
|
557
|
-
return NULL;
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
if (!typecode)
|
|
561
|
-
typecode = PyArray_DescrFromType(NPY_FLOAT64);
|
|
562
|
-
|
|
563
|
-
int shplen = PySequence_Length(shape);
|
|
564
|
-
|
|
565
|
-
if (shplen == 0)
|
|
566
|
-
{
|
|
567
|
-
return PyGpuNdArray_Empty(0, NULL, typecode, fortran);
|
|
568
|
-
}
|
|
569
|
-
|
|
570
|
-
npy_intp* newdims = (npy_intp *)malloc(sizeof(npy_intp) * shplen);
|
|
571
|
-
|
|
572
|
-
if (!newdims)
|
|
573
|
-
{
|
|
574
|
-
PyErr_SetString(PyExc_MemoryError,
|
|
575
|
-
"PyGpuNdArray_empty: Failed to allocate temporary space");
|
|
576
|
-
return NULL;
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
// start from the end to compute strides
|
|
580
|
-
for (int i = shplen-1; i >= 0; --i)
|
|
581
|
-
{
|
|
582
|
-
PyObject* shp_el_obj = PySequence_GetItem(shape, i);
|
|
583
|
-
if(shp_el_obj == NULL)
|
|
584
|
-
{
|
|
585
|
-
// shouldn't happen since we checked length before...
|
|
586
|
-
PyErr_SetString(PyExc_RuntimeError, "PyGpuNdArray_empty: Index out of bound in sequence");
|
|
587
|
-
free(newdims);
|
|
588
|
-
return NULL;
|
|
589
|
-
}
|
|
590
|
-
|
|
591
|
-
int shp_el = PyInt_AsLong(shp_el_obj);
|
|
592
|
-
Py_DECREF(shp_el_obj);
|
|
593
|
-
|
|
594
|
-
newdims[i] = shp_el;
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
PyObject* rval = PyGpuNdArray_Empty(shplen, newdims, typecode, fortran);
|
|
598
|
-
|
|
599
|
-
free(newdims);
|
|
600
|
-
|
|
601
|
-
return (PyObject*)rval;
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
static PyMethodDef PyGpuNdArray_methods[] =
|
|
605
|
-
{
|
|
606
|
-
{"__array__",
|
|
607
|
-
(PyCFunction)PyGpuNdArray_CreateArrayObj, METH_NOARGS,
|
|
608
|
-
"Copy from the device to a numpy ndarray"},
|
|
609
|
-
{"copy",
|
|
610
|
-
(PyCFunction)PyGpuNdArray_copy, METH_VARARGS|METH_KEYWORDS,
|
|
611
|
-
"Create a deep copy of this object."},
|
|
612
|
-
{"view",
|
|
613
|
-
(PyCFunction)PyGpuNdArray_View, METH_NOARGS,
|
|
614
|
-
"Create a view of this object."},
|
|
615
|
-
{"__copy__",
|
|
616
|
-
(PyCFunction)PyGpuNdArray_Copy, METH_NOARGS,
|
|
617
|
-
"Create a copy of this object as numpy does. Why numpy do a copy of the data when the object is a view?"},
|
|
618
|
-
{"__deepcopy__",
|
|
619
|
-
(PyCFunction)PyGpuNdArray_DeepCopy, METH_O,
|
|
620
|
-
"Create a copy of this object"},
|
|
621
|
-
/*
|
|
622
|
-
{"reduce_sum",
|
|
623
|
-
(PyCFunction)PyGpuNdArray_ReduceSum, METH_O,
|
|
624
|
-
"Reduce over the given dimensions by summation"},
|
|
625
|
-
{"exp",
|
|
626
|
-
(PyCFunction)PyGpuNdArray_Exp, METH_NOARGS,
|
|
627
|
-
"Return the exponential of all elements"},
|
|
628
|
-
{"reshape",
|
|
629
|
-
(PyCFunction)PyGpuNdArray_Reshape, METH_O,
|
|
630
|
-
"Return a reshaped view (or copy) of this ndarray\n\
|
|
631
|
-
The required argument is a tuple of integers specifying the shape of the new ndarray."},
|
|
632
|
-
{"_set_stride",
|
|
633
|
-
(PyCFunction)PyGpuNdArray_SetStride, METH_VARARGS,
|
|
634
|
-
"For integer arguments (i, s), set the 'i'th stride to 's'"},
|
|
635
|
-
{"_set_shape_i",
|
|
636
|
-
(PyCFunction)PyGpuNdArray_SetShapeI, METH_VARARGS,
|
|
637
|
-
"For integer arguments (i, s), set the 'i'th shape to 's'"},
|
|
638
|
-
*/
|
|
639
|
-
{NULL, NULL, NULL, NULL} /* Sentinel */
|
|
640
|
-
};
|
|
641
|
-
|
|
642
|
-
//PyArray_CopyInto(PyArrayObject* dest, PyArrayObject* src)¶
|
|
643
|
-
//PyObject* PyArray_NewCopy(PyArrayObject* old, NPY_ORDER order)¶
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
static PyObject *
|
|
647
|
-
PyGpuNdArray_get_shape(PyGpuNdArrayObject *self, void *closure)
|
|
648
|
-
{
|
|
649
|
-
DPRINTF("PyGpuNdArray_get_shape\n");
|
|
650
|
-
|
|
651
|
-
if (PyGpuNdArray_NDIM(self) < 0)
|
|
652
|
-
{
|
|
653
|
-
PyErr_SetString(PyExc_ValueError, "PyGpuNdArray not initialized");
|
|
654
|
-
return NULL;
|
|
655
|
-
}
|
|
656
|
-
PyObject * rval = PyTuple_New(PyGpuNdArray_NDIM(self));
|
|
657
|
-
for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i)
|
|
658
|
-
{
|
|
659
|
-
if (!rval || PyTuple_SetItem(rval, i, PyInt_FromLong(PyGpuNdArray_DIMS(self)[i])))
|
|
660
|
-
{
|
|
661
|
-
Py_XDECREF(rval);
|
|
662
|
-
return NULL;
|
|
663
|
-
}
|
|
664
|
-
|
|
665
|
-
}
|
|
666
|
-
return rval;
|
|
667
|
-
}
|
|
668
|
-
|
|
669
|
-
static int
|
|
670
|
-
PyGpuNdArray_set_shape(PyGpuNdArrayObject *self, PyObject *value, void *closure)
|
|
671
|
-
{
|
|
672
|
-
PyErr_SetString(PyExc_NotImplementedError, "TODO: call reshape");
|
|
673
|
-
return -1;
|
|
674
|
-
}
|
|
675
|
-
|
|
676
|
-
static PyObject *
|
|
677
|
-
PyGpuNdArray_get_strides(PyGpuNdArrayObject *self, void *closure)
|
|
678
|
-
{
|
|
679
|
-
if ( PyGpuNdArray_NDIM(self) < 0){
|
|
680
|
-
PyErr_SetString(PyExc_ValueError, "PyGpuNdArrayObject not initialized");
|
|
681
|
-
return NULL;
|
|
682
|
-
}
|
|
683
|
-
PyObject * rval = PyTuple_New( PyGpuNdArray_NDIM(self));
|
|
684
|
-
for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i){
|
|
685
|
-
if (!rval || PyTuple_SetItem(rval, i, PyInt_FromLong(PyGpuNdArray_STRIDES(self)[i]))){
|
|
686
|
-
Py_XDECREF(rval);
|
|
687
|
-
return NULL;
|
|
688
|
-
}
|
|
689
|
-
}
|
|
690
|
-
return rval;
|
|
691
|
-
}
|
|
692
|
-
|
|
693
|
-
static PyObject *
|
|
694
|
-
PyGpuNdArray_get_data(PyGpuNdArrayObject *self, void *closure)
|
|
695
|
-
{
|
|
696
|
-
return PyInt_FromLong((long int) PyGpuNdArray_DATA(self));
|
|
697
|
-
}
|
|
698
|
-
|
|
699
|
-
static PyObject *
|
|
700
|
-
PyGpuNdArray_get_flags(PyGpuNdArrayObject *self, void *closure)
|
|
701
|
-
{
|
|
702
|
-
PyObject * dict = PyDict_New();
|
|
703
|
-
|
|
704
|
-
PyObject * str= PyString_FromString("C_CONTIGUOUS");
|
|
705
|
-
PyObject * i = PyBool_FromLong(PyGpuNdArray_ISCONTIGUOUS(self));
|
|
706
|
-
PyDict_SetItem(dict, str, i);
|
|
707
|
-
Py_DECREF(str);
|
|
708
|
-
Py_DECREF(i);
|
|
709
|
-
|
|
710
|
-
str= PyString_FromString("F_CONTIGUOUS");
|
|
711
|
-
i = PyBool_FromLong(PyGpuNdArray_CHKFLAGS(self, NPY_F_CONTIGUOUS));
|
|
712
|
-
PyDict_SetItem(dict, str, i);
|
|
713
|
-
Py_DECREF(str);
|
|
714
|
-
Py_DECREF(i);
|
|
715
|
-
|
|
716
|
-
str= PyString_FromString("WRITEABLE");
|
|
717
|
-
i = PyBool_FromLong(PyGpuNdArray_ISWRITEABLE(self));
|
|
718
|
-
PyDict_SetItem(dict, str, i);
|
|
719
|
-
Py_DECREF(str);
|
|
720
|
-
Py_DECREF(i);
|
|
721
|
-
|
|
722
|
-
str= PyString_FromString("ALIGNED");
|
|
723
|
-
i = PyBool_FromLong(PyGpuNdArray_ISALIGNED(self));
|
|
724
|
-
PyDict_SetItem(dict, str, i);
|
|
725
|
-
Py_DECREF(str);
|
|
726
|
-
Py_DECREF(i);
|
|
727
|
-
|
|
728
|
-
str= PyString_FromString("UPDATEIFCOPY");
|
|
729
|
-
i = PyBool_FromLong(PyGpuNdArray_CHKFLAGS(self, NPY_UPDATEIFCOPY));
|
|
730
|
-
PyDict_SetItem(dict, str, i);
|
|
731
|
-
Py_DECREF(str);
|
|
732
|
-
Py_DECREF(i);
|
|
733
|
-
|
|
734
|
-
str= PyString_FromString("OWNDATA");
|
|
735
|
-
i = PyBool_FromLong(PyGpuNdArray_CHKFLAGS(self, NPY_OWNDATA));
|
|
736
|
-
PyDict_SetItem(dict, str, i);
|
|
737
|
-
Py_DECREF(str);
|
|
738
|
-
Py_DECREF(i);
|
|
739
|
-
|
|
740
|
-
return dict;
|
|
741
|
-
}
|
|
742
|
-
static PyObject *
|
|
743
|
-
PyGpuNdArray_get_ndim(PyGpuNdArrayObject *self, void *closure)
|
|
744
|
-
{
|
|
745
|
-
return PyInt_FromLong((long int) PyGpuNdArray_NDIM(self));
|
|
746
|
-
}
|
|
747
|
-
static PyObject *
|
|
748
|
-
PyGpuNdArray_get_offset(PyGpuNdArrayObject *self, void *closure)
|
|
749
|
-
{
|
|
750
|
-
return PyInt_FromLong((long int) PyGpuNdArray_OFFSET(self));
|
|
751
|
-
}
|
|
752
|
-
static PyObject *
|
|
753
|
-
PyGpuNdArray_get_data_allocated(PyGpuNdArrayObject *self, void *closure)
|
|
754
|
-
{
|
|
755
|
-
return PyInt_FromLong((long int) self->data_allocated);
|
|
756
|
-
}
|
|
757
|
-
static PyObject *
|
|
758
|
-
PyGpuNdArray_get_size(PyGpuNdArrayObject *self, void *closure)
|
|
759
|
-
{
|
|
760
|
-
return PyInt_FromLong((long int) PyGpuNdArray_SIZE(self));
|
|
761
|
-
}
|
|
762
|
-
|
|
763
|
-
static PyObject *
|
|
764
|
-
PyGpuNdArray_get_base(PyGpuNdArrayObject *self, void *closure)
|
|
765
|
-
{
|
|
766
|
-
if (!PyGpuNdArray_BASE(self)){
|
|
767
|
-
Py_INCREF(Py_None);
|
|
768
|
-
return Py_None;
|
|
769
|
-
}
|
|
770
|
-
PyObject * ret = PyGpuNdArray_BASE(self);
|
|
771
|
-
Py_INCREF(ret);
|
|
772
|
-
return ret;
|
|
773
|
-
}
|
|
774
|
-
|
|
775
|
-
static PyObject *
|
|
776
|
-
PyGpuNdArray_get_dtype(PyArrayObject *self)
|
|
777
|
-
{
|
|
778
|
-
Py_INCREF(PyGpuNdArray_DESCR(self));
|
|
779
|
-
PyObject * ret = (PyObject *)PyGpuNdArray_DESCR(self);
|
|
780
|
-
return ret;
|
|
781
|
-
}
|
|
782
|
-
|
|
783
|
-
static PyObject *
|
|
784
|
-
PyGpuNdArray_get_itemsize(PyArrayObject *self)
|
|
785
|
-
{
|
|
786
|
-
return (PyObject *)PyInt_FromLong(PyGpuNdArray_ITEMSIZE(self));
|
|
787
|
-
}
|
|
788
|
-
|
|
789
|
-
static PyGetSetDef PyGpuNdArray_getset[] = {
|
|
790
|
-
{(char*)"base",
|
|
791
|
-
(getter)PyGpuNdArray_get_base,
|
|
792
|
-
NULL,
|
|
793
|
-
(char*)"Return the object stored in the base attribute",
|
|
794
|
-
NULL},
|
|
795
|
-
{(char*)"bytes",
|
|
796
|
-
(getter)PyGpuNdArray_get_data,
|
|
797
|
-
NULL,
|
|
798
|
-
(char*)"device data pointer",
|
|
799
|
-
NULL},
|
|
800
|
-
{(char*)"shape",
|
|
801
|
-
(getter)PyGpuNdArray_get_shape,
|
|
802
|
-
(setter)PyGpuNdArray_set_shape,
|
|
803
|
-
(char*)"shape of this ndarray (tuple)",
|
|
804
|
-
NULL},
|
|
805
|
-
{(char*)"strides",
|
|
806
|
-
(getter)PyGpuNdArray_get_strides,
|
|
807
|
-
NULL,//(setter)PyGpuNdArray_set_strides,
|
|
808
|
-
(char*)"data pointer strides (in elements)",
|
|
809
|
-
NULL},
|
|
810
|
-
{(char*)"ndim",
|
|
811
|
-
(getter)PyGpuNdArray_get_ndim,
|
|
812
|
-
NULL,
|
|
813
|
-
(char*)"The number of dimensions in this object",
|
|
814
|
-
NULL},
|
|
815
|
-
{(char*)"offset",
|
|
816
|
-
(getter)PyGpuNdArray_get_offset,
|
|
817
|
-
NULL,
|
|
818
|
-
(char*)"Return the offset value",
|
|
819
|
-
NULL},
|
|
820
|
-
{(char*)"size",
|
|
821
|
-
(getter)PyGpuNdArray_get_size,
|
|
822
|
-
NULL,
|
|
823
|
-
(char*)"The number of elements in this object.",
|
|
824
|
-
NULL},
|
|
825
|
-
{(char*)"data_allocated",
|
|
826
|
-
(getter)PyGpuNdArray_get_data_allocated,
|
|
827
|
-
NULL,
|
|
828
|
-
(char*)"The size of the allocated memory on the device.",
|
|
829
|
-
NULL},
|
|
830
|
-
{(char*)"itemsize",
|
|
831
|
-
(getter)PyGpuNdArray_get_itemsize,
|
|
832
|
-
NULL,
|
|
833
|
-
(char*)"The size of the base element.",
|
|
834
|
-
NULL},
|
|
835
|
-
{(char*)"dtype",
|
|
836
|
-
(getter)PyGpuNdArray_get_dtype,
|
|
837
|
-
NULL,
|
|
838
|
-
(char*)"The dtype of the element",
|
|
839
|
-
NULL},
|
|
840
|
-
{(char*)"flags",
|
|
841
|
-
(getter)PyGpuNdArray_get_flags,
|
|
842
|
-
NULL,
|
|
843
|
-
(char*)"Return the flags as a dictionary",
|
|
844
|
-
NULL},
|
|
845
|
-
{NULL, NULL, NULL, NULL} /* Sentinel */
|
|
846
|
-
};
|
|
847
|
-
|
|
848
|
-
// Will by called by __len__ in Python
|
|
849
|
-
static Py_ssize_t
|
|
850
|
-
PyGpuNdArray_len(PyObject * py_self)
|
|
851
|
-
{
|
|
852
|
-
PyGpuNdArrayObject * self = (PyGpuNdArrayObject*) py_self;
|
|
853
|
-
if (PyGpuNdArray_NDIM(self) <= 0)
|
|
854
|
-
{
|
|
855
|
-
return (Py_ssize_t) 0;
|
|
856
|
-
}
|
|
857
|
-
else
|
|
858
|
-
{
|
|
859
|
-
return (Py_ssize_t) PyGpuNdArray_DIMS(self)[0];
|
|
860
|
-
}
|
|
861
|
-
}
|
|
862
|
-
|
|
863
|
-
static int
|
|
864
|
-
PyGpuNdArray_add_offset(PyGpuNdArrayObject * self, int offset)
|
|
865
|
-
{
|
|
866
|
-
DPRINTF("PyGpuNdArray_add_offset: %p %d\n", self, offset);
|
|
867
|
-
|
|
868
|
-
#if OFFSET
|
|
869
|
-
PyGpuNdArray_OFFSET(self) += offset;
|
|
870
|
-
#else
|
|
871
|
-
PyGpuNdArray_DATA(self) += offset;
|
|
872
|
-
#endif
|
|
873
|
-
return 0;
|
|
874
|
-
}
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
static int
|
|
878
|
-
PyGpuNdArray_set_data(PyGpuNdArrayObject * self, char * data, PyObject * base, int offset)
|
|
879
|
-
{
|
|
880
|
-
DPRINTF("PyGpuNdArray_set_data: %p %p %p %d\n", self, data, base, offset);
|
|
881
|
-
if (self->data_allocated)
|
|
882
|
-
{
|
|
883
|
-
assert(PyGpuNdArray_DATA(self));
|
|
884
|
-
if (device_free(PyGpuNdArray_DATA(self)))
|
|
885
|
-
{
|
|
886
|
-
PyGpuNdArray_DATA(self) = NULL;
|
|
887
|
-
self->data_allocated = 0;
|
|
888
|
-
DPRINTF("PyGpuNdArray_set_data: device_free failed!\n");
|
|
889
|
-
PyErr_SetString(PyExc_ValueError, "PyGpuNdArray_set_data: device_free failed");
|
|
890
|
-
return -1;
|
|
891
|
-
}
|
|
892
|
-
}
|
|
893
|
-
|
|
894
|
-
// Get the original base object (base.base.base...)
|
|
895
|
-
// TODO: check that base is indeed a CudaNdarray?
|
|
896
|
-
PyObject * orig_base = base;
|
|
897
|
-
// base is not always a PyGpuNdArrayObject. It can be a GpuArray from pycuda, ...
|
|
898
|
-
while (orig_base && PyGpuNdArray_Check(orig_base) && ((PyGpuNdArrayObject*) orig_base)->base)
|
|
899
|
-
{
|
|
900
|
-
// base_base is itself a view
|
|
901
|
-
orig_base = ((PyGpuNdArrayObject*) orig_base)->base;
|
|
902
|
-
}
|
|
903
|
-
|
|
904
|
-
//N.B. XDECREF and XINCREF are no-ops for NULL pointers
|
|
905
|
-
if (PyGpuNdArray_BASE(self) != orig_base)
|
|
906
|
-
{
|
|
907
|
-
Py_XDECREF(PyGpuNdArray_BASE(self));
|
|
908
|
-
PyGpuNdArray_BASE(self) = orig_base;
|
|
909
|
-
Py_XINCREF(PyGpuNdArray_BASE(self));
|
|
910
|
-
}
|
|
911
|
-
self->data_allocated = 0;
|
|
912
|
-
#if OFFSET
|
|
913
|
-
PyGpuNdArray_DATA(self) = data;
|
|
914
|
-
PyGpuNdArray_OFFSET(self) = offset;
|
|
915
|
-
#else
|
|
916
|
-
PyGpuNdArray_DATA(self) = data + offset;
|
|
917
|
-
#endif
|
|
918
|
-
|
|
919
|
-
return 0;
|
|
920
|
-
}
|
|
921
|
-
|
|
922
|
-
// Will by called by __getitem__ in Python
|
|
923
|
-
static PyObject *
|
|
924
|
-
PyGpuNdArray_Subscript(PyObject * py_self, PyObject * key)
|
|
925
|
-
{
|
|
926
|
-
DPRINTF("Subscript start\n");
|
|
927
|
-
PyGpuNdArrayObject * self = (PyGpuNdArrayObject*) py_self;
|
|
928
|
-
PyObject * py_rval = NULL;
|
|
929
|
-
PyGpuNdArrayObject * rval = NULL;
|
|
930
|
-
PyObject * intobj = NULL;
|
|
931
|
-
|
|
932
|
-
//PyObject_Print(key, stderr, 0);
|
|
933
|
-
|
|
934
|
-
if (key == Py_Ellipsis)
|
|
935
|
-
{
|
|
936
|
-
DPRINTF("Subscript with ellipse \n");
|
|
937
|
-
Py_INCREF(py_self);
|
|
938
|
-
DPRINTF("Subscript with ellipse end\n");
|
|
939
|
-
return py_self;
|
|
940
|
-
}
|
|
941
|
-
if ((intobj=PyNumber_Int(key))) //INDEXING BY INTEGER
|
|
942
|
-
{
|
|
943
|
-
#ifdef DEBUG
|
|
944
|
-
PyGpuNdArray_fprint(stderr, self);
|
|
945
|
-
#endif
|
|
946
|
-
DPRINTF("Subscript with int \n");
|
|
947
|
-
|
|
948
|
-
int d_idx = PyInt_AsLong(intobj);
|
|
949
|
-
Py_DECREF(intobj); intobj=NULL;
|
|
950
|
-
|
|
951
|
-
DPRINTF("Subscript with int 1\n");
|
|
952
|
-
if (PyGpuNdArray_NDIM(self) == 0) {
|
|
953
|
-
PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed");
|
|
954
|
-
return NULL;
|
|
955
|
-
}else if (PyGpuNdArray_NDIM(self)< 0){
|
|
956
|
-
PyErr_SetString(PyExc_IndexError, "nd arrays must have a number of dim > 0!");
|
|
957
|
-
return NULL;
|
|
958
|
-
}
|
|
959
|
-
int d_dim = PyGpuNdArray_DIMS(self)[0];
|
|
960
|
-
int offset = 0;
|
|
961
|
-
DPRINTF("Subscript with int 2\n");
|
|
962
|
-
|
|
963
|
-
if ((d_idx >= 0) && (d_idx < d_dim)) {
|
|
964
|
-
//normal indexing
|
|
965
|
-
offset += d_idx * PyGpuNdArray_STRIDES(self)[0];
|
|
966
|
-
}
|
|
967
|
-
else if ((d_idx < 0) && (d_idx >= -d_dim)) {
|
|
968
|
-
//end-based indexing
|
|
969
|
-
// d_idx is negative
|
|
970
|
-
offset += (d_dim + d_idx) * PyGpuNdArray_STRIDES(self)[0];
|
|
971
|
-
} else {
|
|
972
|
-
PyErr_SetString(PyExc_IndexError, "index out of bounds");
|
|
973
|
-
return NULL;
|
|
974
|
-
}
|
|
975
|
-
DPRINTF("Subscript with int 3\n");
|
|
976
|
-
|
|
977
|
-
//Add the original offset
|
|
978
|
-
offset += PyGpuNdArray_OFFSET(self);
|
|
979
|
-
|
|
980
|
-
//allocate our subtensor view
|
|
981
|
-
py_rval = PyGpuNdArray_New(PyGpuNdArray_NDIM(self) - 1);
|
|
982
|
-
rval = (PyGpuNdArrayObject*) py_rval;
|
|
983
|
-
if (!rval) return NULL;
|
|
984
|
-
|
|
985
|
-
//TODO: find how to refcount on the descr!
|
|
986
|
-
PyGpuNdArray_DESCR(py_rval) = PyGpuNdArray_DESCR(self);
|
|
987
|
-
|
|
988
|
-
DPRINTF("Subscript with int 4\n");
|
|
989
|
-
//initialize the view's data pointer to our own.
|
|
990
|
-
assert (0 == rval->data_allocated);
|
|
991
|
-
if (PyGpuNdArray_set_data(rval, PyGpuNdArray_DATA(self), (PyObject *) self, offset)){
|
|
992
|
-
Py_DECREF(rval);
|
|
993
|
-
return NULL;
|
|
994
|
-
}
|
|
995
|
-
DPRINTF("Subscript with int 5\n");
|
|
996
|
-
|
|
997
|
-
for (int d = 1; d < PyGpuNdArray_NDIM(self); ++d) {
|
|
998
|
-
PyGpuNdArray_STRIDE(rval, d-1) = PyGpuNdArray_STRIDES(self)[d];
|
|
999
|
-
PyGpuNdArray_DIM(rval, d-1) = PyGpuNdArray_DIMS(self)[d];
|
|
1000
|
-
}
|
|
1001
|
-
}
|
|
1002
|
-
else {
|
|
1003
|
-
PyErr_Clear();
|
|
1004
|
-
}
|
|
1005
|
-
if (PySlice_Check(key)) //INDEXING BY SLICE
|
|
1006
|
-
{
|
|
1007
|
-
DPRINTF("Subscript with slice \n");
|
|
1008
|
-
if (PyGpuNdArray_NDIM(self) == 0)
|
|
1009
|
-
{
|
|
1010
|
-
PyErr_SetString(PyExc_ValueError, "cannot slice a 0-d array");
|
|
1011
|
-
return NULL;
|
|
1012
|
-
}
|
|
1013
|
-
|
|
1014
|
-
int d_dim = PyGpuNdArray_DIMS(self)[0];
|
|
1015
|
-
Py_ssize_t start, stop, step, slen;
|
|
1016
|
-
if (PySlice_GetIndicesEx((PySliceObject*)key, d_dim, &start, &stop, &step, &slen)) {
|
|
1017
|
-
return NULL;
|
|
1018
|
-
}
|
|
1019
|
-
|
|
1020
|
-
DPRINTF("start %zd\nstop %zd\n step %zd\n slen %zd\n",
|
|
1021
|
-
start, stop, step, slen);
|
|
1022
|
-
|
|
1023
|
-
//allocate our subtensor view
|
|
1024
|
-
py_rval = PyGpuNdArray_New(PyGpuNdArray_NDIM(self));
|
|
1025
|
-
rval = (PyGpuNdArrayObject*) py_rval;
|
|
1026
|
-
if (!rval) return NULL;
|
|
1027
|
-
|
|
1028
|
-
//TODO: find how to refcount on the descr!
|
|
1029
|
-
PyGpuNdArray_DESCR(py_rval) = PyGpuNdArray_DESCR(self);
|
|
1030
|
-
assert (0 == rval->data_allocated);
|
|
1031
|
-
if (PyGpuNdArray_set_data(rval,
|
|
1032
|
-
PyGpuNdArray_DATA(self),
|
|
1033
|
-
py_self,
|
|
1034
|
-
start * PyGpuNdArray_STRIDE(self, 0)
|
|
1035
|
-
+ PyGpuNdArray_OFFSET(self))) {
|
|
1036
|
-
Py_DECREF(rval);
|
|
1037
|
-
return NULL;
|
|
1038
|
-
}
|
|
1039
|
-
|
|
1040
|
-
//initialize dimension 0 of rval
|
|
1041
|
-
PyGpuNdArray_STRIDE(rval, 0) = step * PyGpuNdArray_STRIDES(self)[0];
|
|
1042
|
-
PyGpuNdArray_DIM(rval, 0) = slen;
|
|
1043
|
-
DPRINTF("rval stride %zd\n", PyGpuNdArray_STRIDES(rval)[0]);
|
|
1044
|
-
// initialize dimensions > 0 of rval
|
|
1045
|
-
for (int d = 1; d < PyGpuNdArray_NDIM(self); ++d) {
|
|
1046
|
-
PyGpuNdArray_STRIDE(rval, d) = PyGpuNdArray_STRIDES(self)[d];
|
|
1047
|
-
PyGpuNdArray_DIM(rval, d) = PyGpuNdArray_DIMS(self)[d];
|
|
1048
|
-
}
|
|
1049
|
-
}
|
|
1050
|
-
if (PyTuple_Check(key)) //INDEXING BY TUPLE
|
|
1051
|
-
{
|
|
1052
|
-
DPRINTF("Subscript with tuple \n");
|
|
1053
|
-
//elements of the tuple can be either integers or slices
|
|
1054
|
-
//the dimensionality of the view we will return is diminished for each slice in the tuple
|
|
1055
|
-
int tuple_start_index = 0;
|
|
1056
|
-
if (PyTuple_Size(key) > PyGpuNdArray_NDIM(self))
|
|
1057
|
-
{
|
|
1058
|
-
if (PyTuple_GetItem(key, 0) == Py_Ellipsis &&
|
|
1059
|
-
PyTuple_Size(key) == PyGpuNdArray_NDIM(self) + 1)
|
|
1060
|
-
{
|
|
1061
|
-
tuple_start_index = 1;
|
|
1062
|
-
DPRINTF("Subscript with tuple staring with an extra ellipse"
|
|
1063
|
-
" at the start.\n");
|
|
1064
|
-
}
|
|
1065
|
-
else{
|
|
1066
|
-
PyErr_SetString(PyExc_IndexError,
|
|
1067
|
-
"index error, specified more dimensions then"
|
|
1068
|
-
" the number of existing dimensions");
|
|
1069
|
-
return NULL;
|
|
1070
|
-
}
|
|
1071
|
-
}
|
|
1072
|
-
|
|
1073
|
-
//calculate the number of dimensions in the return value
|
|
1074
|
-
int rval_nd = PyGpuNdArray_NDIM(self);
|
|
1075
|
-
for (int tuple_d = tuple_start_index; tuple_d < PyTuple_Size(key);
|
|
1076
|
-
++tuple_d)
|
|
1077
|
-
{
|
|
1078
|
-
//On some paltform PyInt_Check(<type 'numpy.int64'>) return true, other it return false.
|
|
1079
|
-
//So we use PyArray_IsAnyScalar that should covert everything.
|
|
1080
|
-
rval_nd -= PyArray_IsAnyScalar(PyTuple_GetItem(key, tuple_d));
|
|
1081
|
-
}
|
|
1082
|
-
|
|
1083
|
-
//allocate our subtensor view
|
|
1084
|
-
py_rval = PyGpuNdArray_New(rval_nd);
|
|
1085
|
-
rval = (PyGpuNdArrayObject*) py_rval;
|
|
1086
|
-
if (!rval) return NULL;
|
|
1087
|
-
assert (0 == rval->data_allocated);
|
|
1088
|
-
|
|
1089
|
-
//TODO: find how to refcount on the descr!
|
|
1090
|
-
PyGpuNdArray_DESCR(py_rval) = PyGpuNdArray_DESCR(self);
|
|
1091
|
-
|
|
1092
|
-
//initialize the view's data pointer to our own.
|
|
1093
|
-
if (PyGpuNdArray_set_data(rval, PyGpuNdArray_DATA(self),
|
|
1094
|
-
py_self, PyGpuNdArray_OFFSET(self)))
|
|
1095
|
-
{
|
|
1096
|
-
Py_DECREF(rval);
|
|
1097
|
-
return NULL;
|
|
1098
|
-
}
|
|
1099
|
-
|
|
1100
|
-
// rval_d will refer to the current dimension in the rval.
|
|
1101
|
-
// It will not be incremented for integer keys, but will be incremented for slice
|
|
1102
|
-
// keys
|
|
1103
|
-
int rval_d = 0;
|
|
1104
|
-
|
|
1105
|
-
for (int self_d = 0, tuple_d = tuple_start_index;
|
|
1106
|
-
self_d < PyGpuNdArray_NDIM(self); ++self_d, ++tuple_d)
|
|
1107
|
-
{
|
|
1108
|
-
// keys can be shorter than PyGpuNdArray_NDIM(self).
|
|
1109
|
-
// when that happens, it means that the remaining dimensions are "full slices"
|
|
1110
|
-
if (tuple_d >= PyTuple_Size(key))
|
|
1111
|
-
{
|
|
1112
|
-
PyGpuNdArray_STRIDE(rval, rval_d) =
|
|
1113
|
-
PyGpuNdArray_STRIDES(self)[tuple_d];
|
|
1114
|
-
PyGpuNdArray_DIM(rval, rval_d) =
|
|
1115
|
-
PyGpuNdArray_DIMS(self)[tuple_d];
|
|
1116
|
-
++rval_d;
|
|
1117
|
-
DPRINTF("Subscript extra dims to append %zd %zd\n",
|
|
1118
|
-
PyGpuNdArray_STRIDE(rval, rval_d),
|
|
1119
|
-
PyGpuNdArray_DIM(rval, rval_d));
|
|
1120
|
-
}
|
|
1121
|
-
else
|
|
1122
|
-
{
|
|
1123
|
-
PyObject * key_d = PyTuple_GetItem(key, tuple_d);
|
|
1124
|
-
|
|
1125
|
-
if (PySlice_Check(key_d))
|
|
1126
|
-
{
|
|
1127
|
-
Py_ssize_t start, stop, step, slen;
|
|
1128
|
-
if (PySlice_GetIndicesEx((PySliceObject*)key_d,
|
|
1129
|
-
PyGpuNdArray_DIMS(self)[self_d],
|
|
1130
|
-
&start, &stop, &step, &slen))
|
|
1131
|
-
{
|
|
1132
|
-
Py_DECREF(rval);
|
|
1133
|
-
return NULL;
|
|
1134
|
-
}
|
|
1135
|
-
PyGpuNdArray_add_offset(rval, start * PyGpuNdArray_STRIDES(self)[self_d]);
|
|
1136
|
-
PyGpuNdArray_STRIDE(rval, rval_d) = step * PyGpuNdArray_STRIDES(self)[self_d];
|
|
1137
|
-
PyGpuNdArray_DIM(rval, rval_d) = slen;
|
|
1138
|
-
|
|
1139
|
-
DPRINTF("rval_d %d self_d %d\n start %zd\nstop %zd\n step %zd\n slen %zd\n",
|
|
1140
|
-
rval_d, self_d, start, stop, step, slen);
|
|
1141
|
-
++rval_d;
|
|
1142
|
-
}
|
|
1143
|
-
else if ((intobj=PyNumber_Int(key_d)))
|
|
1144
|
-
{
|
|
1145
|
-
assert(PyArray_IsAnyScalar(key_d));
|
|
1146
|
-
int d_idx = PyInt_AsLong(intobj);
|
|
1147
|
-
Py_DECREF(intobj);
|
|
1148
|
-
intobj = NULL;
|
|
1149
|
-
int d_dim = PyGpuNdArray_DIMS(self)[self_d];
|
|
1150
|
-
|
|
1151
|
-
if ((d_idx >= 0) && (d_idx < d_dim))
|
|
1152
|
-
{
|
|
1153
|
-
//normal indexing
|
|
1154
|
-
PyGpuNdArray_add_offset(rval, d_idx * PyGpuNdArray_STRIDES(self)[self_d]);
|
|
1155
|
-
}
|
|
1156
|
-
else if ((d_idx < 0) && (d_idx >= -d_dim))
|
|
1157
|
-
{
|
|
1158
|
-
//end-based indexing
|
|
1159
|
-
PyGpuNdArray_add_offset(rval, (d_dim + d_idx) * PyGpuNdArray_STRIDES(self)[self_d]);
|
|
1160
|
-
}
|
|
1161
|
-
else
|
|
1162
|
-
{
|
|
1163
|
-
PyErr_SetString(PyExc_IndexError, "index out of bounds");
|
|
1164
|
-
Py_DECREF(rval);
|
|
1165
|
-
return NULL;
|
|
1166
|
-
}
|
|
1167
|
-
}
|
|
1168
|
-
else if (key_d == Py_Ellipsis)
|
|
1169
|
-
{
|
|
1170
|
-
if (self_d != 0){
|
|
1171
|
-
PyErr_Format(PyExc_IndexError,
|
|
1172
|
-
"Ellipsis supported only at the start of"
|
|
1173
|
-
" the tuple");
|
|
1174
|
-
Py_DECREF(rval);
|
|
1175
|
-
return NULL;
|
|
1176
|
-
}
|
|
1177
|
-
DPRINTF("Substript with tuple with the first element an ellipse\n");
|
|
1178
|
-
for( ; self_d < (rval_nd - PyTuple_Size(key) + 1); self_d++)
|
|
1179
|
-
{
|
|
1180
|
-
PyGpuNdArray_STRIDE(rval, rval_d) =
|
|
1181
|
-
PyGpuNdArray_STRIDES(self)[self_d];
|
|
1182
|
-
PyGpuNdArray_DIM(rval, rval_d) =
|
|
1183
|
-
PyGpuNdArray_DIMS(self)[self_d];
|
|
1184
|
-
DPRINTF("Ellipse append dimensions self_%d with %zd %zd\n",
|
|
1185
|
-
self_d,
|
|
1186
|
-
PyGpuNdArray_STRIDE(rval, rval_d),
|
|
1187
|
-
PyGpuNdArray_DIM(rval, rval_d));
|
|
1188
|
-
++rval_d;
|
|
1189
|
-
}
|
|
1190
|
-
tuple_start_index = 1;
|
|
1191
|
-
self_d--;
|
|
1192
|
-
}
|
|
1193
|
-
else
|
|
1194
|
-
{
|
|
1195
|
-
PyErr_Clear(); // clear the error set by PyNumber_Int
|
|
1196
|
-
PyErr_Format(PyExc_IndexError,
|
|
1197
|
-
"index must be either int or slice. Got %s",
|
|
1198
|
-
PyString_AsString(PyObject_Str(key_d)));
|
|
1199
|
-
Py_DECREF(rval);
|
|
1200
|
-
return NULL;
|
|
1201
|
-
}
|
|
1202
|
-
}
|
|
1203
|
-
}
|
|
1204
|
-
}
|
|
1205
|
-
if (py_rval)
|
|
1206
|
-
{
|
|
1207
|
-
#ifdef DEBUG
|
|
1208
|
-
PyGpuNdArray_fprint(stderr, self);
|
|
1209
|
-
PyGpuNdArray_fprint(stderr, rval);
|
|
1210
|
-
#endif
|
|
1211
|
-
}
|
|
1212
|
-
else
|
|
1213
|
-
{
|
|
1214
|
-
PyErr_SetString(PyExc_NotImplementedError, "Unknown key type");
|
|
1215
|
-
return NULL;
|
|
1216
|
-
}
|
|
1217
|
-
|
|
1218
|
-
// Set flags
|
|
1219
|
-
if (PyGpuNdArray_ISWRITEABLE(self)) {
|
|
1220
|
-
PyGpuNdArray_FLAGS(rval) |= NPY_WRITEABLE;
|
|
1221
|
-
} else {
|
|
1222
|
-
PyGpuNdArray_FLAGS(rval) &= ~NPY_WRITEABLE;
|
|
1223
|
-
}
|
|
1224
|
-
PyGpuNdArray_FLAGS(rval) &= ~NPY_OWNDATA;
|
|
1225
|
-
if (PyGpuNdArray_ISALIGNED(self)) {
|
|
1226
|
-
PyGpuNdArray_FLAGS(rval) |= NPY_ALIGNED;
|
|
1227
|
-
} else {
|
|
1228
|
-
PyGpuNdArray_FLAGS(rval) &= ~NPY_ALIGNED;
|
|
1229
|
-
}
|
|
1230
|
-
PyGpuNdArray_FLAGS(rval) &= ~NPY_UPDATEIFCOPY;
|
|
1231
|
-
|
|
1232
|
-
if (false && PyGpuNdArray_NDIM(rval) == 0) {
|
|
1233
|
-
//Numpy is not consistent here
|
|
1234
|
-
//When we create a new numpy ndarray of 0 dim, it is not f contiguous
|
|
1235
|
-
//But when we take a subtensor that is of 0 dim, it is f contiguous!
|
|
1236
|
-
//We make as them for now...
|
|
1237
|
-
PyGpuNdArray_FLAGS(rval) &= ~NPY_F_CONTIGUOUS;
|
|
1238
|
-
PyGpuNdArray_FLAGS(rval) |= NPY_C_CONTIGUOUS;
|
|
1239
|
-
} else {
|
|
1240
|
-
if (PyGpuNdArray_is_c_contiguous(rval)) {
|
|
1241
|
-
PyGpuNdArray_FLAGS(rval) |= NPY_C_CONTIGUOUS;
|
|
1242
|
-
} else {
|
|
1243
|
-
PyGpuNdArray_FLAGS(rval) &= ~NPY_C_CONTIGUOUS;
|
|
1244
|
-
}
|
|
1245
|
-
if (PyGpuNdArray_is_f_contiguous(rval)) {
|
|
1246
|
-
PyGpuNdArray_FLAGS(rval) |= NPY_F_CONTIGUOUS;
|
|
1247
|
-
} else {
|
|
1248
|
-
PyGpuNdArray_FLAGS(rval) &= ~NPY_F_CONTIGUOUS;
|
|
1249
|
-
}
|
|
1250
|
-
}
|
|
1251
|
-
|
|
1252
|
-
DPRINTF("Subscript end\n");
|
|
1253
|
-
return py_rval;
|
|
1254
|
-
}
|
|
1255
|
-
|
|
1256
|
-
PyMappingMethods PyGpuNdArrayMappingMethods = {
|
|
1257
|
-
PyGpuNdArray_len, //lenfunc mp_length; __len__
|
|
1258
|
-
PyGpuNdArray_Subscript, //binaryfunc mp_subscript; __getitem__
|
|
1259
|
-
0 //PyGpuNdArray_setitem //objobjargproc mp_ass_subscript; __setitem__
|
|
1260
|
-
};
|
|
1261
|
-
|
|
1262
|
-
static PyTypeObject PyGpuNdArrayType =
|
|
1263
|
-
{
|
|
1264
|
-
PyObject_HEAD_INIT(NULL)
|
|
1265
|
-
0, /*ob_size*/
|
|
1266
|
-
"GpuNdArray", /*tp_name*/
|
|
1267
|
-
sizeof(PyGpuNdArrayObject), /*tp_basicsize*/
|
|
1268
|
-
0, /*tp_itemsize*/
|
|
1269
|
-
(destructor)PyGpuNdArrayObject_dealloc, /*tp_dealloc*/
|
|
1270
|
-
0, /*tp_print*/
|
|
1271
|
-
0, /*tp_getattr*/
|
|
1272
|
-
0, /*tp_setattr*/
|
|
1273
|
-
0, /*tp_compare*/
|
|
1274
|
-
0, /*tp_repr*/
|
|
1275
|
-
0, //&PyGpuNdArrayObjectNumberMethods, /*tp_as_number*/
|
|
1276
|
-
0, /*tp_as_sequence*/
|
|
1277
|
-
&PyGpuNdArrayMappingMethods,/*tp_as_mapping*/
|
|
1278
|
-
0, /*tp_hash */
|
|
1279
|
-
0, /*tp_call*/
|
|
1280
|
-
0, /*tp_str*/
|
|
1281
|
-
0, /*tp_getattro*/
|
|
1282
|
-
0, /*tp_setattro*/
|
|
1283
|
-
0, /*tp_as_buffer*/
|
|
1284
|
-
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES, /*tp_flags*/
|
|
1285
|
-
"PyGpuNdArrayObject objects", /* tp_doc */
|
|
1286
|
-
0, /* tp_traverse */
|
|
1287
|
-
0, /* tp_clear */
|
|
1288
|
-
0, /* tp_richcompare */
|
|
1289
|
-
0, /* tp_weaklistoffset */
|
|
1290
|
-
0, /* tp_iter */
|
|
1291
|
-
0, /* tp_iternext */
|
|
1292
|
-
PyGpuNdArray_methods, /* tp_methods */
|
|
1293
|
-
0, //PyGpuNdArray_members, /* tp_members */ //TODO
|
|
1294
|
-
PyGpuNdArray_getset, /* tp_getset */
|
|
1295
|
-
0, /* tp_base */
|
|
1296
|
-
0, /* tp_dict */
|
|
1297
|
-
0, /* tp_descr_get */
|
|
1298
|
-
0, /* tp_descr_set */
|
|
1299
|
-
0, /* tp_dictoffset */
|
|
1300
|
-
(initproc)PyGpuNdArray_init,/* tp_init */
|
|
1301
|
-
0, /* tp_alloc */
|
|
1302
|
-
PyGpuNdArray_new, /* tp_new */
|
|
1303
|
-
};
|
|
1304
|
-
|
|
1305
|
-
//////////////////////////////////////
|
|
1306
|
-
//
|
|
1307
|
-
// C API FOR PyGpuNdArrayObject
|
|
1308
|
-
//
|
|
1309
|
-
//////////////////////////////////////
|
|
1310
|
-
PyObject *
|
|
1311
|
-
PyGpuNdArray_New(int nd)
|
|
1312
|
-
{
|
|
1313
|
-
DPRINTF("PyGpuNdArray_New start\n");
|
|
1314
|
-
PyGpuNdArrayObject *self = (PyGpuNdArrayObject *)PyGpuNdArrayType.tp_alloc(&PyGpuNdArrayType, 0);
|
|
1315
|
-
if (self == NULL) {
|
|
1316
|
-
PyErr_SetString(PyExc_RuntimeError, "PyGpuNdArray_New failed to allocate self");
|
|
1317
|
-
return NULL;
|
|
1318
|
-
}
|
|
1319
|
-
PyGpuNdArray_null_init(self);
|
|
1320
|
-
|
|
1321
|
-
if (nd == 0) {
|
|
1322
|
-
PyGpuNdArray_NDIM(self) = 0;
|
|
1323
|
-
}
|
|
1324
|
-
else if (nd > 0) {
|
|
1325
|
-
if (PyGpuNdArray_set_nd(self, nd)) {
|
|
1326
|
-
Py_DECREF(self);
|
|
1327
|
-
return NULL;
|
|
1328
|
-
}
|
|
1329
|
-
}
|
|
1330
|
-
++_outstanding_mallocs[1];
|
|
1331
|
-
DPRINTF("PyGpuNdArray_New end\n");
|
|
1332
|
-
return (PyObject *)self;
|
|
1333
|
-
}
|
|
1334
|
-
|
|
1335
|
-
int
|
|
1336
|
-
PyGpuNdArray_Check(const PyObject * ob)
|
|
1337
|
-
{
|
|
1338
|
-
DPRINTF("PyGpuNdArray_Check\n");
|
|
1339
|
-
//TODO: doesn't work with inheritance
|
|
1340
|
-
return PyGpuNdArray_CheckExact(ob);
|
|
1341
|
-
}
|
|
1342
|
-
int
|
|
1343
|
-
PyGpuNdArray_CheckExact(const PyObject * ob)
|
|
1344
|
-
{
|
|
1345
|
-
DPRINTF("PyGpuNdArray_CheckExact\n");
|
|
1346
|
-
return ((ob->ob_type == &PyGpuNdArrayType) ? 1 : 0);
|
|
1347
|
-
}
|
|
1348
|
-
|
|
1349
|
-
static PyObject *
|
|
1350
|
-
PyGpuNdArray_as_c_contiguous(PyObject* dummy, PyObject* args, PyObject *kargs)
|
|
1351
|
-
{
|
|
1352
|
-
DPRINTF("PyGpuNdArray_as_c_contiguous:start\n");
|
|
1353
|
-
static const char *kwlist[] = {"a", "dtype", NULL};
|
|
1354
|
-
PyArray_Descr *typecode = NULL;
|
|
1355
|
-
PyObject *self_ = NULL;
|
|
1356
|
-
|
|
1357
|
-
if (!PyArg_ParseTupleAndKeywords(args, kargs, "O|O&",
|
|
1358
|
-
(char **)kwlist,
|
|
1359
|
-
&self_,
|
|
1360
|
-
PyArray_DescrConverter,
|
|
1361
|
-
&typecode)) {
|
|
1362
|
-
Py_XDECREF(typecode);
|
|
1363
|
-
Py_XDECREF(self_);
|
|
1364
|
-
return NULL;
|
|
1365
|
-
}
|
|
1366
|
-
assert(typecode == NULL);
|
|
1367
|
-
if (!PyGpuNdArray_Check(self_)){
|
|
1368
|
-
PyErr_SetString(PyExc_TypeError,
|
|
1369
|
-
"PyGpuNdArray_as_c_contiguous:"
|
|
1370
|
-
" PyGpuNdArrayObject required");
|
|
1371
|
-
return NULL;
|
|
1372
|
-
}
|
|
1373
|
-
|
|
1374
|
-
PyGpuNdArrayObject *self = (PyGpuNdArrayObject*)self_;
|
|
1375
|
-
if (PyGpuNdArray_is_c_contiguous(self)){
|
|
1376
|
-
Py_INCREF(self);
|
|
1377
|
-
if (PyGpuNdArray_NDIM(self) == 0){
|
|
1378
|
-
//numpy.ascontiguous() always return object with 1d.
|
|
1379
|
-
DPRINTF("PyGpuNdArray_as_c_contiguous: upcast to 1d tensor end\n");
|
|
1380
|
-
PyObject * rval = PyGpuNdArray_View(self);
|
|
1381
|
-
if (!rval)
|
|
1382
|
-
return NULL;
|
|
1383
|
-
PyGpuNdArray_set_nd((PyGpuNdArrayObject*)rval, 1);
|
|
1384
|
-
PyGpuNdArray_DIM(rval, 0) = 1;
|
|
1385
|
-
PyGpuNdArray_STRIDE(rval, 0) = PyGpuNdArray_ITEMSIZE(rval);
|
|
1386
|
-
return rval;
|
|
1387
|
-
}
|
|
1388
|
-
DPRINTF("PyGpuNdArray_as_c_contiguous: no copy end\n");
|
|
1389
|
-
return (PyObject*)self;
|
|
1390
|
-
}
|
|
1391
|
-
|
|
1392
|
-
PyObject * ret = PyGpuNdArray_Copy(self);
|
|
1393
|
-
DPRINTF("PyGpuNdArray_as_c_contiguous: copy end\n");
|
|
1394
|
-
return ret;
|
|
1395
|
-
}
|
|
1396
|
-
static PyObject *
|
|
1397
|
-
PyGpuNdArray_as_f_contiguous(PyObject* dummy, PyObject* args, PyObject *kargs)
|
|
1398
|
-
{
|
|
1399
|
-
DPRINTF("PyGpuNdArray_as_f_contiguous:start\n");
|
|
1400
|
-
static const char *kwlist[] = {"a", "dtype", NULL};
|
|
1401
|
-
PyArray_Descr *typecode = NULL;
|
|
1402
|
-
PyObject *self_ = NULL;
|
|
1403
|
-
|
|
1404
|
-
if (!PyArg_ParseTupleAndKeywords(args, kargs, "O|O&",
|
|
1405
|
-
(char **)kwlist,
|
|
1406
|
-
&self_,
|
|
1407
|
-
PyArray_DescrConverter,
|
|
1408
|
-
&typecode)) {
|
|
1409
|
-
Py_XDECREF(typecode);
|
|
1410
|
-
Py_XDECREF(self_);
|
|
1411
|
-
return NULL;
|
|
1412
|
-
}
|
|
1413
|
-
assert(typecode == NULL);
|
|
1414
|
-
if (!PyGpuNdArray_Check(self_)){
|
|
1415
|
-
PyErr_SetString(PyExc_TypeError,
|
|
1416
|
-
"PyGpuNdArray_as_f_contiguous:"
|
|
1417
|
-
" PyGpuNdArrayObject required");
|
|
1418
|
-
return NULL;
|
|
1419
|
-
}
|
|
1420
|
-
|
|
1421
|
-
PyGpuNdArrayObject *self = (PyGpuNdArrayObject*)self_;
|
|
1422
|
-
if (PyGpuNdArray_is_f_contiguous(self)){
|
|
1423
|
-
Py_INCREF(self);
|
|
1424
|
-
if (PyGpuNdArray_NDIM(self) == 0){
|
|
1425
|
-
//numpy.ascontiguous() always return object with 1d.
|
|
1426
|
-
PyObject * rval = PyGpuNdArray_View(self);
|
|
1427
|
-
if (!rval)
|
|
1428
|
-
return NULL;
|
|
1429
|
-
PyGpuNdArray_set_nd((PyGpuNdArrayObject*)rval, 1);
|
|
1430
|
-
PyGpuNdArray_DIM(rval, 0) = 1;
|
|
1431
|
-
PyGpuNdArray_STRIDE(rval, 0) = PyGpuNdArray_ITEMSIZE(rval);
|
|
1432
|
-
DPRINTF("PyGpuNdArray_as_f_contiguous: upcast to 1d tensor end\n");
|
|
1433
|
-
return rval;
|
|
1434
|
-
}
|
|
1435
|
-
DPRINTF("PyGpuNdArray_as_f_contiguous: no copy end\n");
|
|
1436
|
-
return (PyObject*)self;
|
|
1437
|
-
}
|
|
1438
|
-
|
|
1439
|
-
PyObject * ret = PyGpuNdArray_Copy(self, NPY_FORTRANORDER);
|
|
1440
|
-
DPRINTF("PyGpuNdArray_as_f_contiguous: copy end\n");
|
|
1441
|
-
return ret;
|
|
1442
|
-
}
|
|
1443
|
-
|
|
1444
|
-
#ifdef WITH_OPENCL
|
|
1445
|
-
#ifdef __APPLE__
|
|
1446
|
-
#include <OpenCL/opencl.h>
|
|
1447
|
-
#else
|
|
1448
|
-
#include <CL/opencl.h>
|
|
1449
|
-
#endif
|
|
1450
|
-
extern void setup_context(cl_context c);
|
|
1451
|
-
|
|
1452
|
-
PyObject *
|
|
1453
|
-
PyGpuNdArray_set_opencl_context(PyObject *mod, PyObject *ctx) {
|
|
1454
|
-
Py_ssize_t v;
|
|
1455
|
-
|
|
1456
|
-
v = PyInt_AsSsize_t(ctx);
|
|
1457
|
-
if (v == -1 && PyErr_Occurred())
|
|
1458
|
-
return NULL;
|
|
1459
|
-
|
|
1460
|
-
setup_context((cl_context)v);
|
|
1461
|
-
|
|
1462
|
-
Py_INCREF(Py_None);
|
|
1463
|
-
return Py_None;
|
|
1464
|
-
}
|
|
1465
|
-
#endif
|
|
1466
|
-
|
|
1467
|
-
static PyMethodDef module_methods[] = {
|
|
1468
|
-
//{"dimshuffle", PyGpuNdArray_Dimshuffle, METH_VARARGS, "Returns the dimshuffle of a PyGpuNdArray."},
|
|
1469
|
-
{"outstanding_mallocs", outstanding_mallocs, METH_VARARGS, "how many more mallocs have been called than free's"},
|
|
1470
|
-
{"zeros",
|
|
1471
|
-
(PyCFunction)PyGpuNdArray_zeros, METH_VARARGS|METH_KEYWORDS,
|
|
1472
|
-
"Create a new PyGpuNdArray with specified shape, filled with zeros."},
|
|
1473
|
-
{"empty",
|
|
1474
|
-
(PyCFunction)PyGpuNdArray_empty, METH_VARARGS|METH_KEYWORDS,
|
|
1475
|
-
"Create a new PyGpuNdArray with specified shape, filled with zeros."},
|
|
1476
|
-
{"ascontiguousarray",
|
|
1477
|
-
(PyCFunction)PyGpuNdArray_as_c_contiguous, METH_VARARGS|METH_KEYWORDS,
|
|
1478
|
-
"If the array is not c contiguous, copy it to a new c contiguous region."},
|
|
1479
|
-
{"asfortranarray",
|
|
1480
|
-
(PyCFunction)PyGpuNdArray_as_f_contiguous, METH_VARARGS|METH_KEYWORDS,
|
|
1481
|
-
"If the array is not f contiguous, copy it to a new c contiguous region."},
|
|
1482
|
-
#ifdef WITH_OPENCL
|
|
1483
|
-
{"set_opencl_context",
|
|
1484
|
-
PyGpuNdArray_set_opencl_context, METH_O,
|
|
1485
|
-
"Set the OpenCL context to use for allocations and work."},
|
|
1486
|
-
#endif
|
|
1487
|
-
{NULL, NULL, NULL, NULL} /* Sentinel */
|
|
1488
|
-
};
|
|
1489
|
-
|
|
1490
|
-
#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
|
|
1491
|
-
#define PyMODINIT_FUNC void
|
|
1492
|
-
#endif
|
|
1493
|
-
PyMODINIT_FUNC
|
|
1494
|
-
initpygpu_ndarray(void)
|
|
1495
|
-
{
|
|
1496
|
-
import_array();
|
|
1497
|
-
|
|
1498
|
-
PyObject* m;
|
|
1499
|
-
|
|
1500
|
-
if (PyType_Ready(&PyGpuNdArrayType) < 0)
|
|
1501
|
-
return;
|
|
1502
|
-
|
|
1503
|
-
m = Py_InitModule3("pygpu_ndarray", module_methods,
|
|
1504
|
-
"Example module that creates an extension type.");
|
|
1505
|
-
|
|
1506
|
-
if (m == NULL)
|
|
1507
|
-
return;
|
|
1508
|
-
|
|
1509
|
-
Py_INCREF(&PyGpuNdArrayType);
|
|
1510
|
-
PyModule_AddObject(m, "GpuNdArrayObject", (PyObject *)&PyGpuNdArrayType);
|
|
1511
|
-
#if COMPUTE_GPU_MEM_USED
|
|
1512
|
-
for(int i=0;i<TABLE_SIZE;i++){
|
|
1513
|
-
_alloc_size_table[i].ptr=NULL;
|
|
1514
|
-
_alloc_size_table[i].size=0;
|
|
1515
|
-
}
|
|
1516
|
-
#endif
|
|
1517
|
-
// cublasInit();
|
|
1518
|
-
//if (0&&CUBLAS_STATUS_SUCCESS != cublasGetError())
|
|
1519
|
-
//{
|
|
1520
|
-
//std::cerr << "WARNING: initcuda_ndarray: error initializing device\n";
|
|
1521
|
-
//}
|
|
1522
|
-
/*
|
|
1523
|
-
if (0) //TODO: is this necessary?
|
|
1524
|
-
{
|
|
1525
|
-
int deviceId = 0; // TODO: what number goes here?
|
|
1526
|
-
cudaSetDevice(deviceId);
|
|
1527
|
-
cudaError_t err = cudaGetLastError();
|
|
1528
|
-
if( cudaSuccess != err)
|
|
1529
|
-
{
|
|
1530
|
-
std::cerr << "Error in SetDevice:" << cudaGetErrorString(err) << "\n";
|
|
1531
|
-
}
|
|
1532
|
-
}
|
|
1533
|
-
*/
|
|
1534
|
-
}
|
|
1535
|
-
|
|
1536
|
-
/*
|
|
1537
|
-
Local Variables:
|
|
1538
|
-
mode:c++
|
|
1539
|
-
c-basic-offset:4
|
|
1540
|
-
c-file-style:"stroustrup"
|
|
1541
|
-
c-file-offsets:((innamespace . 0)(inline-open . 0))
|
|
1542
|
-
indent-tabs-mode:nil
|
|
1543
|
-
fill-column:79
|
|
1544
|
-
End:
|
|
1545
|
-
*/
|
|
1546
|
-
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:textwidth=79 :
|