pyopencl 2024.2.7__cp38-cp38-win_amd64.whl → 2024.3__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +127 -122
- pyopencl/_cl.cp38-win_amd64.pyd +0 -0
- pyopencl/_mymako.py +3 -3
- pyopencl/algorithm.py +10 -7
- pyopencl/array.py +50 -40
- pyopencl/bitonic_sort.py +3 -1
- pyopencl/bitonic_sort_templates.py +1 -1
- pyopencl/cache.py +23 -22
- pyopencl/capture_call.py +5 -4
- pyopencl/clrandom.py +1 -0
- pyopencl/compyte/dtypes.py +4 -4
- pyopencl/compyte/pyproject.toml +54 -0
- pyopencl/elementwise.py +9 -2
- pyopencl/invoker.py +11 -9
- pyopencl/ipython_ext.py +1 -1
- pyopencl/reduction.py +16 -10
- pyopencl/scan.py +38 -22
- pyopencl/tools.py +23 -13
- {pyopencl-2024.2.7.dist-info → pyopencl-2024.3.dist-info}/METADATA +11 -8
- pyopencl-2024.3.dist-info/RECORD +42 -0
- {pyopencl-2024.2.7.dist-info → pyopencl-2024.3.dist-info}/WHEEL +1 -1
- pyopencl/compyte/.git +0 -1
- pyopencl/compyte/ndarray/Makefile +0 -31
- pyopencl/compyte/ndarray/__init__.py +0 -0
- pyopencl/compyte/ndarray/gen_elemwise.py +0 -1907
- pyopencl/compyte/ndarray/gen_reduction.py +0 -1511
- pyopencl/compyte/ndarray/gpu_ndarray.h +0 -35
- pyopencl/compyte/ndarray/pygpu_language.h +0 -207
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +0 -622
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +0 -317
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +0 -1546
- pyopencl/compyte/ndarray/pygpu_ndarray.h +0 -71
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +0 -232
- pyopencl/compyte/ndarray/setup_opencl.py +0 -101
- pyopencl/compyte/ndarray/test_gpu_elemwise.py +0 -411
- pyopencl/compyte/ndarray/test_gpu_ndarray.py +0 -487
- pyopencl-2024.2.7.dist-info/RECORD +0 -56
- {pyopencl-2024.2.7.dist-info → pyopencl-2024.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
#ifndef _GPU_NDARRAY_H
|
|
2
|
-
#define _GPU_NDARRAY_H
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
typedef struct GpuNdArray{
|
|
6
|
-
char* data; //pointer to data element [0,..,0].
|
|
7
|
-
int offset;
|
|
8
|
-
int nd; //the number of dimensions of the tensor
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* base:
|
|
12
|
-
* either NULL or a pointer to a fellow CudaNdarray into which this one is viewing.
|
|
13
|
-
* This pointer is never followed, except during Py_DECREF when we do not need it any longer.
|
|
14
|
-
*/
|
|
15
|
-
void * base;
|
|
16
|
-
ssize_t * dimensions; //dim0, dim1, ... dim nd
|
|
17
|
-
ssize_t * strides; //stride0, stride1, ... stride nd
|
|
18
|
-
int flags; // Flags, see numpy flags
|
|
19
|
-
//DTYPE dtype; // fine for numeric types
|
|
20
|
-
//DtypeMeta * dtype_meta; // reserved for future use.
|
|
21
|
-
//PyArray_Descr *descr; /* Pointer to type structure */
|
|
22
|
-
} GpuNdArray;
|
|
23
|
-
|
|
24
|
-
#endif
|
|
25
|
-
/*
|
|
26
|
-
Local Variables:
|
|
27
|
-
mode:c++
|
|
28
|
-
c-basic-offset:4
|
|
29
|
-
c-file-style:"stroustrup"
|
|
30
|
-
c-file-offsets:((innamespace . 0)(inline-open . 0))
|
|
31
|
-
indent-tabs-mode:nil
|
|
32
|
-
fill-column:79
|
|
33
|
-
End:
|
|
34
|
-
*/
|
|
35
|
-
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:textwidth=79 :
|
|
@@ -1,207 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* This file contain the header for ALL code that depend on cuda or opencl.
|
|
3
|
-
*/
|
|
4
|
-
#ifndef _PYGPU_LANGUAGE_H
|
|
5
|
-
#define _PYGPU_LANGUAGE_H
|
|
6
|
-
#include <Python.h>
|
|
7
|
-
//#include <iostream>
|
|
8
|
-
|
|
9
|
-
#include "pygpu_ndarray_object.h"
|
|
10
|
-
|
|
11
|
-
/////////////////////////
|
|
12
|
-
// Alloc and Free
|
|
13
|
-
/////////////////////////
|
|
14
|
-
//If true, when there is a gpu malloc or free error, we print the size of allocated memory on the device.
|
|
15
|
-
#define COMPUTE_GPU_MEM_USED 0
|
|
16
|
-
#define VERBOSE_ALLOC_FREE 0
|
|
17
|
-
//If true, we fill with NAN allocated device memory.
|
|
18
|
-
#define ALLOC_MEMSET 0
|
|
19
|
-
|
|
20
|
-
static int _outstanding_mallocs[] = {0,0};
|
|
21
|
-
|
|
22
|
-
#ifdef DEBUG
|
|
23
|
-
#define DPRINTF(args...) fprintf(stderr, args)
|
|
24
|
-
#else
|
|
25
|
-
#define DPRINTF(...)
|
|
26
|
-
#endif
|
|
27
|
-
|
|
28
|
-
#if COMPUTE_GPU_MEM_USED
|
|
29
|
-
int _allocated_size = 0;
|
|
30
|
-
const int TABLE_SIZE = 10000;
|
|
31
|
-
struct table_struct{
|
|
32
|
-
void* ptr;
|
|
33
|
-
int size;
|
|
34
|
-
};
|
|
35
|
-
table_struct _alloc_size_table[TABLE_SIZE];
|
|
36
|
-
#endif
|
|
37
|
-
|
|
38
|
-
/**
|
|
39
|
-
* Allocation and freeing of device memory should go through these functions so that the lib can track memory usage.
|
|
40
|
-
*
|
|
41
|
-
* device_malloc will set the Python error message before returning None.
|
|
42
|
-
* device_free will return nonzero on failure (after setting the python error message)
|
|
43
|
-
*/
|
|
44
|
-
void * device_malloc(size_t size);
|
|
45
|
-
int device_free(void * ptr);
|
|
46
|
-
static PyObject *
|
|
47
|
-
outstanding_mallocs(PyObject* self, PyObject * args)
|
|
48
|
-
{
|
|
49
|
-
return PyInt_FromLong(_outstanding_mallocs[0]);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
int PyGpuNdArray_CopyFromPyGpuNdArray(PyGpuNdArrayObject * self, PyGpuNdArrayObject * other, bool unbroadcast = false);
|
|
53
|
-
|
|
54
|
-
/**
|
|
55
|
-
* PyGpuNdArray_alloc_contiguous
|
|
56
|
-
*
|
|
57
|
-
* Allocate storage space for a tensor of rank 'nd' and given dimensions.
|
|
58
|
-
*
|
|
59
|
-
* Note: PyGpuNdArray_alloc_contiguous is templated to work for both int dimensions and npy_intp dimensions
|
|
60
|
-
*/
|
|
61
|
-
template<typename inttype>
|
|
62
|
-
int PyGpuNdArray_alloc_contiguous(PyGpuNdArrayObject *self, const int nd, const inttype * dim, NPY_ORDER order=NPY_CORDER)
|
|
63
|
-
{
|
|
64
|
-
DPRINTF("PyGpuNdArray_alloc_contiguous: start nd=%i descr=%p\n", nd, self);
|
|
65
|
-
|
|
66
|
-
if (!PyGpuNdArray_DESCR(self)){
|
|
67
|
-
PyErr_SetString(PyExc_ValueError,
|
|
68
|
-
"PyGpuNdArray_alloc_contiguous: The array don't have a type! We can't allocate it!\n");
|
|
69
|
-
return -1;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
// allocate an empty ndarray with c_contiguous access
|
|
73
|
-
// return 0 on success
|
|
74
|
-
int size = 1; //set up the strides for contiguous tensor
|
|
75
|
-
assert (nd >= 0);
|
|
76
|
-
if (PyGpuNdArray_set_nd(self, nd))
|
|
77
|
-
{
|
|
78
|
-
return -1;
|
|
79
|
-
}
|
|
80
|
-
//TODO: check if by any chance our current dims are correct,
|
|
81
|
-
// and strides already contiguous
|
|
82
|
-
// in that case we can return right here.
|
|
83
|
-
DPRINTF("PyGpuNdArray_alloc_contiguous: before itemsize descr=%p elsize=%i\n", self->descr, self->descr->elsize);
|
|
84
|
-
int elsize = PyGpuNdArray_ITEMSIZE((PyObject*)self);
|
|
85
|
-
DPRINTF("PyGpuNdArray_alloc_contiguous: set_nd %d! elsize=%i\n", nd, elsize);
|
|
86
|
-
if(order != NPY_FORTRANORDER){
|
|
87
|
-
DPRINTF("PyGpuNdArray_alloc_contiguous: NPY_CORDER\n");
|
|
88
|
-
for (int i = nd-1; i >= 0; --i){
|
|
89
|
-
if (size == 0)
|
|
90
|
-
PyGpuNdArray_STRIDE(self, i) = elsize;
|
|
91
|
-
else
|
|
92
|
-
PyGpuNdArray_STRIDE(self,i) = size * elsize;
|
|
93
|
-
PyGpuNdArray_DIM(self,i) = dim[i];
|
|
94
|
-
size = size * dim[i];
|
|
95
|
-
}
|
|
96
|
-
}else if (nd>0){
|
|
97
|
-
DPRINTF("PyGpuNdArray_alloc_contiguous: NPY_FORTRANORDER\n");
|
|
98
|
-
size = dim[0];
|
|
99
|
-
PyGpuNdArray_STRIDE(self, 0) = elsize;
|
|
100
|
-
PyGpuNdArray_DIM(self, nd-1) = dim[nd-1];
|
|
101
|
-
for (int i = 1; i < nd; ++i){
|
|
102
|
-
if (size == 0)
|
|
103
|
-
PyGpuNdArray_STRIDE(self, i) = elsize;
|
|
104
|
-
else
|
|
105
|
-
PyGpuNdArray_STRIDE(self, i) = PyGpuNdArray_STRIDE(self, i-1) * dim[i-1];
|
|
106
|
-
PyGpuNdArray_DIM(self, nd-i-1) = dim[nd-i-1];
|
|
107
|
-
size = size * dim[i];
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
if (self->data_allocated != size)
|
|
112
|
-
{
|
|
113
|
-
// If self is a view, do not try to free its memory
|
|
114
|
-
if (self->data_allocated && device_free(PyGpuNdArray_DATA(self))) {
|
|
115
|
-
// Does this ever happen?? Do we need to set data_allocated or devdata to 0?
|
|
116
|
-
PyGpuNdArray_DATA(self) = NULL;
|
|
117
|
-
self->data_allocated = 0;
|
|
118
|
-
return -1;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
assert(size>0);
|
|
122
|
-
DPRINTF("PyGpuNdArray_alloc_contiguous: will allocate for size=%d elements\n", size);
|
|
123
|
-
|
|
124
|
-
PyGpuNdArray_DATA(self) = (char*)device_malloc(size * PyGpuNdArray_ITEMSIZE((PyObject *)self));
|
|
125
|
-
if (!PyGpuNdArray_DATA(self))
|
|
126
|
-
{
|
|
127
|
-
PyGpuNdArray_set_nd(self,-1);
|
|
128
|
-
self->data_allocated = 0;
|
|
129
|
-
PyGpuNdArray_DATA(self) = 0;
|
|
130
|
-
return -1;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
// The structure of self will be reused with newly allocated memory.
|
|
134
|
-
// If self was a view, we should remove the reference to its base.
|
|
135
|
-
// (If base was already NULL, the following has no effect.)
|
|
136
|
-
Py_XDECREF(self->base);
|
|
137
|
-
self->base = NULL;
|
|
138
|
-
|
|
139
|
-
self->data_allocated = size;
|
|
140
|
-
self->gpu_ndarray.flags = NPY_DEFAULT;
|
|
141
|
-
PyGpuNdArray_FLAGS(self) |= NPY_WRITEABLE;
|
|
142
|
-
PyGpuNdArray_FLAGS(self) |= NPY_OWNDATA;
|
|
143
|
-
if (nd == 0) {
|
|
144
|
-
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
145
|
-
if (order != NPY_FORTRANORDER) {
|
|
146
|
-
PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
|
|
147
|
-
} else {
|
|
148
|
-
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
}else if(nd == 1){//set c and f contiguous
|
|
152
|
-
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
153
|
-
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
154
|
-
}else if(order != NPY_FORTRANORDER){//set c contiguous
|
|
155
|
-
PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
|
|
156
|
-
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
157
|
-
}else{//set f contiguous
|
|
158
|
-
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
159
|
-
PyGpuNdArray_FLAGS(self) &= ~NPY_C_CONTIGUOUS;
|
|
160
|
-
}
|
|
161
|
-
PyGpuNdArray_FLAGS(self) &= ~NPY_UPDATEIFCOPY;
|
|
162
|
-
}else if(size == 0){
|
|
163
|
-
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
164
|
-
PyGpuNdArray_FLAGS(self) |= NPY_OWNDATA;
|
|
165
|
-
if (nd == 0) {
|
|
166
|
-
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
167
|
-
if (order != NPY_FORTRANORDER) {
|
|
168
|
-
PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
|
|
169
|
-
} else {
|
|
170
|
-
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
}else if(nd == 1){//set c and f contiguous
|
|
174
|
-
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
175
|
-
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
176
|
-
}else if(order != NPY_FORTRANORDER){//set c contiguous
|
|
177
|
-
PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
|
|
178
|
-
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
179
|
-
}else{//set f contiguous
|
|
180
|
-
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
181
|
-
PyGpuNdArray_FLAGS(self) &= ~NPY_C_CONTIGUOUS;
|
|
182
|
-
}
|
|
183
|
-
PyGpuNdArray_FLAGS(self) &= ~NPY_UPDATEIFCOPY;
|
|
184
|
-
return 0;
|
|
185
|
-
}else{
|
|
186
|
-
// How to check for the flags? Need to check if already contiguous.
|
|
187
|
-
PyErr_Format(PyExc_RuntimeError,
|
|
188
|
-
"PyGpuNdArray_alloc_contiguous: self->data_allocated=%d, size=%d, cmp=%d",
|
|
189
|
-
self->data_allocated, size, self->data_allocated != size
|
|
190
|
-
);
|
|
191
|
-
return -1;
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
if (order != NPY_FORTRANORDER) {
|
|
195
|
-
assert(PyGpuNdArray_is_c_contiguous(self));
|
|
196
|
-
} else {
|
|
197
|
-
assert(PyGpuNdArray_is_f_contiguous(self));
|
|
198
|
-
}
|
|
199
|
-
DPRINTF("PyGpuNdArray_alloc_contiguous: end\n");
|
|
200
|
-
return 0;
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
enum PyGpuTransfert { PyGpuHostToDevice, PyGpuDeviceToHost };
|
|
204
|
-
int PyGpuMemcpy(void * dst, const void * src, int dev_offset, size_t bytes, PyGpuTransfert direction);
|
|
205
|
-
|
|
206
|
-
int PyGpuMemset(void * dst, int data, size_t bytes);
|
|
207
|
-
#endif
|