pyopencl 2024.2.6__cp38-cp38-macosx_11_0_arm64.whl → 2024.3__cp38-cp38-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +127 -122
- pyopencl/_cl.cpython-38-darwin.so +0 -0
- pyopencl/_mymako.py +3 -3
- pyopencl/algorithm.py +10 -7
- pyopencl/array.py +50 -40
- pyopencl/bitonic_sort.py +3 -1
- pyopencl/bitonic_sort_templates.py +1 -1
- pyopencl/cache.py +23 -22
- pyopencl/capture_call.py +5 -4
- pyopencl/clrandom.py +1 -0
- pyopencl/compyte/dtypes.py +4 -4
- pyopencl/compyte/pyproject.toml +54 -0
- pyopencl/elementwise.py +9 -2
- pyopencl/invoker.py +11 -9
- pyopencl/ipython_ext.py +1 -1
- pyopencl/reduction.py +16 -10
- pyopencl/scan.py +38 -22
- pyopencl/tools.py +23 -13
- {pyopencl-2024.2.6.dist-info → pyopencl-2024.3.dist-info}/METADATA +11 -8
- pyopencl-2024.3.dist-info/RECORD +42 -0
- {pyopencl-2024.2.6.dist-info → pyopencl-2024.3.dist-info}/WHEEL +1 -1
- pyopencl/compyte/.git +0 -1
- pyopencl/compyte/ndarray/Makefile +0 -31
- pyopencl/compyte/ndarray/__init__.py +0 -0
- pyopencl/compyte/ndarray/gen_elemwise.py +0 -1907
- pyopencl/compyte/ndarray/gen_reduction.py +0 -1511
- pyopencl/compyte/ndarray/gpu_ndarray.h +0 -35
- pyopencl/compyte/ndarray/pygpu_language.h +0 -207
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +0 -622
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +0 -317
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +0 -1546
- pyopencl/compyte/ndarray/pygpu_ndarray.h +0 -71
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +0 -232
- pyopencl/compyte/ndarray/setup_opencl.py +0 -101
- pyopencl/compyte/ndarray/test_gpu_elemwise.py +0 -411
- pyopencl/compyte/ndarray/test_gpu_ndarray.py +0 -487
- pyopencl-2024.2.6.dist-info/RECORD +0 -56
- {pyopencl-2024.2.6.dist-info → pyopencl-2024.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
#ifndef _PYGPU_NDARRAY_H
|
|
2
|
-
#define _PYGPU_NDARRAY_H
|
|
3
|
-
#ifndef OFFSET
|
|
4
|
-
#define OFFSET 0
|
|
5
|
-
#endif
|
|
6
|
-
|
|
7
|
-
//#include <Python.h>
|
|
8
|
-
//#include <structmember.h>
|
|
9
|
-
#include <stdio.h>
|
|
10
|
-
#include <numpy/arrayobject.h>
|
|
11
|
-
|
|
12
|
-
#include "pygpu_ndarray_object.h"
|
|
13
|
-
#include "gpu_ndarray.h"
|
|
14
|
-
#include "pygpu_language.h"
|
|
15
|
-
|
|
16
|
-
/*
|
|
17
|
-
* Return a PyGpuNdArray whose 'nd' dimensions are all 0.
|
|
18
|
-
* if nd==-1, it is not initialized.
|
|
19
|
-
*/
|
|
20
|
-
PyObject * PyGpuNdArray_New(int nd=-1);
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Return 1 for a PyGpuNdArrayObject otw 0
|
|
24
|
-
*/
|
|
25
|
-
int
|
|
26
|
-
PyGpuNdArray_Check(const PyObject * ob);
|
|
27
|
-
|
|
28
|
-
/**
|
|
29
|
-
* Return 1 for a PyGpuNdArrayObject otw 0
|
|
30
|
-
*/
|
|
31
|
-
int
|
|
32
|
-
PyGpuNdArray_CheckExact(const PyObject * ob);
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Transfer the contents of numpy array `obj` to `self`.
|
|
36
|
-
*
|
|
37
|
-
* self is reallocated to have the correct dimensions if necessary.
|
|
38
|
-
*/
|
|
39
|
-
int PyGpuNdArray_CopyFromArray(PyGpuNdArrayObject * self, PyArrayObject*obj);
|
|
40
|
-
|
|
41
|
-
static int
|
|
42
|
-
PyGpuNdArray_add_offset(PyGpuNdArrayObject * self, int offset);
|
|
43
|
-
|
|
44
|
-
static int
|
|
45
|
-
PyGpuNdArray_set_data(PyGpuNdArrayObject * self, char * data, PyObject * base, int offset=0);
|
|
46
|
-
|
|
47
|
-
static PyObject *
|
|
48
|
-
PyGpuNdArray_Subscript(PyObject * py_self, PyObject * key);
|
|
49
|
-
|
|
50
|
-
static PyObject *
|
|
51
|
-
PyGpuNdArray_Copy(PyGpuNdArrayObject * self, NPY_ORDER order=NPY_CORDER);
|
|
52
|
-
|
|
53
|
-
static PyObject *
|
|
54
|
-
PyGpuNdArray_Zeros(int nd, npy_intp* dims, PyArray_Descr* dtype, int fortran);
|
|
55
|
-
|
|
56
|
-
static PyObject *
|
|
57
|
-
PyGpuNdArray_Empty(int nd, npy_intp* dims, PyArray_Descr* dtype, int fortran);
|
|
58
|
-
|
|
59
|
-
#endif
|
|
60
|
-
|
|
61
|
-
/*
|
|
62
|
-
Local Variables:
|
|
63
|
-
mode:c++
|
|
64
|
-
c-basic-offset:4
|
|
65
|
-
c-file-style:"stroustrup"
|
|
66
|
-
c-file-offsets:((innamespace . 0)(inline-open . 0))
|
|
67
|
-
indent-tabs-mode:nil
|
|
68
|
-
fill-column:79
|
|
69
|
-
End:
|
|
70
|
-
*/
|
|
71
|
-
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:textwidth=79 :
|
|
@@ -1,232 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* struct PyGPUArrayObject
|
|
3
|
-
*
|
|
4
|
-
* This is a Python type.
|
|
5
|
-
*
|
|
6
|
-
*/
|
|
7
|
-
#ifndef _PYGPU_NDARRAY_OBJECT_H
|
|
8
|
-
#define _PYGPU_NDARRAY_OBJECT_H
|
|
9
|
-
|
|
10
|
-
#include <Python.h>
|
|
11
|
-
#include <numpy/arrayobject.h>
|
|
12
|
-
#include "gpu_ndarray.h"
|
|
13
|
-
|
|
14
|
-
typedef struct PyGpuNdArrayObject{
|
|
15
|
-
PyObject_HEAD
|
|
16
|
-
|
|
17
|
-
GpuNdArray gpu_ndarray; //no pointer, just inlined.
|
|
18
|
-
PyObject * base;
|
|
19
|
-
PyArray_Descr * descr; // for numpy-like desc
|
|
20
|
-
int data_allocated; //the number of bytes allocated for devdata
|
|
21
|
-
} PyGpuNdArrayObject;
|
|
22
|
-
|
|
23
|
-
#define PyGpuNdArray_NDIM(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.nd)
|
|
24
|
-
#define PyGpuNdArray_DATA(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.data)
|
|
25
|
-
#define PyGpuNdArray_BYTES(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.data)
|
|
26
|
-
#define PyGpuNdArray_OFFSET(obj) (((PyGpuNdArrayObject *)(obj))->gpu_ndarray.offset)
|
|
27
|
-
#define PyGpuNdArray_DIMS(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.dimensions)
|
|
28
|
-
#define PyGpuNdArray_STRIDES(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.strides)
|
|
29
|
-
#define PyGpuNdArray_DIM(obj,n) (PyGpuNdArray_DIMS(obj)[n])
|
|
30
|
-
#define PyGpuNdArray_STRIDE(obj,n) (PyGpuNdArray_STRIDES(obj)[n])
|
|
31
|
-
#define PyGpuNdArray_BASE(obj) (((PyGpuNdArrayObject *)obj)->base)
|
|
32
|
-
#define PyGpuNdArray_DESCR(obj) (((PyGpuNdArrayObject *)obj)->descr)
|
|
33
|
-
#define PyGpuNdArray_FLAGS(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.flags)
|
|
34
|
-
#define PyGpuNdArray_ITEMSIZE(obj) (((PyGpuNdArrayObject *)obj)->descr->elsize)
|
|
35
|
-
#define PyGpuNdArray_TYPE(obj) (((PyGpuNdArrayObject *)(obj))->descr->type_num)
|
|
36
|
-
|
|
37
|
-
#define PyGpuNdArray_SIZE(obj) PyArray_MultiplyList(PyGpuNdArray_DIMS(obj),PyGpuNdArray_NDIM(obj))
|
|
38
|
-
//npy_intp PyGpuNdArray_Size(PyObject* obj);
|
|
39
|
-
//npy_intp PyGpuNdArray_NBYTES(PyObject* arr);
|
|
40
|
-
|
|
41
|
-
/*
|
|
42
|
-
Flags accessor
|
|
43
|
-
*/
|
|
44
|
-
#define PyGpuNdArray_CHKFLAGS(m, FLAGS) \
|
|
45
|
-
((((PyGpuNdArrayObject *)(m))->gpu_ndarray.flags & (FLAGS)) == (FLAGS))
|
|
46
|
-
|
|
47
|
-
#define PyGpuNdArray_ISCONTIGUOUS(m) PyGpuNdArray_CHKFLAGS(m, NPY_CONTIGUOUS)
|
|
48
|
-
#define PyGpuNdArray_ISFORTRAN(m) (PyGpuNdArray_CHKFLAGS(m, NPY_F_CONTIGUOUS) && \
|
|
49
|
-
PyGpuNdArray_NDIM(m) > 1)
|
|
50
|
-
#define PyGpuNdArray_FORTRAN_IF(m) (PyGpuNdArray_CHKFLAGS(m, NPY_F_CONTIGUOUS)? \
|
|
51
|
-
NPY_F_CONTIGUOUS : 0)
|
|
52
|
-
#define PyGpuNdArray_ISONESEGMENT(m) (PyGpuNdArray_NDIM(m) == 0 || \
|
|
53
|
-
PyGpuNdArray_ISCONTIGUOUS(m) || \
|
|
54
|
-
PyGpuNdArray_ISFORTRAN(m))
|
|
55
|
-
#define PyGpuNdArray_ISWRITEABLE(m) PyGpuNdArray_CHKFLAGS(m, NPY_WRITEABLE)
|
|
56
|
-
#define PyGpuNdArray_ISALIGNED(m) PyGpuNdArray_CHKFLAGS(m, NPY_ALIGNED)
|
|
57
|
-
|
|
58
|
-
#define PyGpuNdArray_ISNBO(arg) ((arg) != NPY_OPPBYTE)
|
|
59
|
-
// THE NEXT ONE SEEM BAD...
|
|
60
|
-
#define PyGpuNdArray_IsNativeByteOrder PyArray_ISNBO
|
|
61
|
-
#define PyGpuNdArray_ISNOTSWAPPED(m) PyArray_ISNBO(PyArray_DESCR(m)->byteorder)
|
|
62
|
-
#define PyGpuNdArray_FLAGSWAP(m, flags) (PyGpuNdArray_CHKFLAGS(m, flags) && PyGpuNdArray_ISNOTSWAPPED(m))
|
|
63
|
-
|
|
64
|
-
#define PyGpuNdArray_ISCARRAY(m) PyGpuNdArray_FLAGSWAP(m, NPY_CARRAY)
|
|
65
|
-
#define PyGpuNdArray_ISCARRAY_RO(m) PyGpuNdArray_FLAGSWAP(m, NPY_CARRAY_RO)
|
|
66
|
-
#define PyGpuNdArray_ISFARRAY(m) PyGpuNdArray_FLAGSWAP(m, NPY_FARRAY)
|
|
67
|
-
#define PyGpuNdArray_ISFARRAY_RO(m) PyGpuNdArray_FLAGSWAP(m, NPY_FARRAY_RO)
|
|
68
|
-
#define PyGpuNdArray_ISBEHAVED(m) PyGpuNdArray_FLAGSWAP(m, NPY_BEHAVED)
|
|
69
|
-
#define PyGpuNdArray_ISBEHAVED_RO(m) PyGpuNdArray_FLAGSWAP(m, NPY_ALIGNED)
|
|
70
|
-
|
|
71
|
-
static
|
|
72
|
-
void PyGpuNdArray_fprint(FILE * fd, const PyGpuNdArrayObject *self)
|
|
73
|
-
{
|
|
74
|
-
fprintf(fd, "PyGpuNdArrayObject <%p, %p> nd=%i data_allocated=%d\n",
|
|
75
|
-
self, PyGpuNdArray_DATA(self), PyGpuNdArray_NDIM(self), self->data_allocated);
|
|
76
|
-
fprintf(fd, "\tITEMSIZE: %d\n", PyGpuNdArray_ITEMSIZE(self));
|
|
77
|
-
fprintf(fd, "\tTYPENUM: %d\n", PyGpuNdArray_TYPE(self));
|
|
78
|
-
fprintf(fd, "\tRefcount: %ld\n", (long int)self->ob_refcnt);
|
|
79
|
-
fprintf(fd, "\tBASE: %p\n", PyGpuNdArray_BASE(self));
|
|
80
|
-
fprintf(fd, "\tHOST_DIMS: ");
|
|
81
|
-
for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i)
|
|
82
|
-
{
|
|
83
|
-
fprintf(fd, "%ld\t", PyGpuNdArray_DIMS(self)[i]);
|
|
84
|
-
}
|
|
85
|
-
fprintf(fd, "\n\tHOST_STRIDES: ");
|
|
86
|
-
for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i)
|
|
87
|
-
{
|
|
88
|
-
fprintf(fd, "%ld\t", PyGpuNdArray_STRIDES(self)[i]);
|
|
89
|
-
}
|
|
90
|
-
fprintf(fd, "\n\tFLAGS: ");
|
|
91
|
-
fprintf(fd, "\n\t\tC_CONTIGUOUS: %d", PyGpuNdArray_ISCONTIGUOUS(self));
|
|
92
|
-
fprintf(fd, "\n\t\tPyGpuNdArray_ISFORTRAN: %d PyGpuNdArray_FORTRAN_IF:%d F_CONTIGUOUS: %d",
|
|
93
|
-
PyGpuNdArray_ISFORTRAN(self), PyGpuNdArray_FORTRAN_IF(self), PyGpuNdArray_CHKFLAGS(self, NPY_FORTRAN));
|
|
94
|
-
fprintf(fd, "\n\t\tOWNDATA: %d", PyGpuNdArray_CHKFLAGS(self, NPY_OWNDATA));
|
|
95
|
-
fprintf(fd, "\n\t\tWRITEABLE: %d", PyGpuNdArray_ISWRITEABLE(self));
|
|
96
|
-
fprintf(fd, "\n\t\tALIGNED: %d", PyGpuNdArray_ISALIGNED(self));
|
|
97
|
-
fprintf(fd, "\n\t\tUPDATEIFCOPY: %d", PyGpuNdArray_CHKFLAGS(self, NPY_UPDATEIFCOPY));
|
|
98
|
-
fprintf(fd, "\n");
|
|
99
|
-
|
|
100
|
-
}
|
|
101
|
-
static
|
|
102
|
-
void PyArray_fprint(FILE * fd, const PyArrayObject *self)
|
|
103
|
-
{
|
|
104
|
-
fprintf(fd, "PyArrayObject <%p, %p> nd=%i\n",
|
|
105
|
-
self, PyArray_DATA(self), PyArray_NDIM(self));
|
|
106
|
-
fprintf(fd, "\tITEMSIZE: %d\n", PyArray_ITEMSIZE(self));
|
|
107
|
-
fprintf(fd, "\tTYPENUM: %d\n", PyArray_TYPE(self));
|
|
108
|
-
fprintf(fd, "\tHOST_DIMS: ");
|
|
109
|
-
for (int i = 0; i < PyArray_NDIM(self); ++i)
|
|
110
|
-
{
|
|
111
|
-
fprintf(fd, "%ld\t", PyArray_DIMS(self)[i]);
|
|
112
|
-
}
|
|
113
|
-
fprintf(fd, "\n\tHOST_STRIDES: ");
|
|
114
|
-
for (int i = 0; i < PyArray_NDIM(self); ++i)
|
|
115
|
-
{
|
|
116
|
-
fprintf(fd, "%ld\t", PyArray_STRIDES(self)[i]);
|
|
117
|
-
}
|
|
118
|
-
fprintf(fd, "\n\tFLAGS: ");
|
|
119
|
-
fprintf(fd, "\n\t\tC_CONTIGUOUS: %d", PyArray_ISCONTIGUOUS(self));
|
|
120
|
-
fprintf(fd, "\n\t\tPyArray_ISFORTRAN: %d PyArray_FORTRAN_IF:%d F_CONTIGUOUS: %d",
|
|
121
|
-
PyArray_ISFORTRAN(self), PyArray_FORTRAN_IF(self), PyArray_CHKFLAGS(self, NPY_FORTRAN));
|
|
122
|
-
fprintf(fd, "\n\t\tOWNDATA: %d", PyArray_CHKFLAGS(self, NPY_OWNDATA));
|
|
123
|
-
fprintf(fd, "\n\t\tWRITEABLE: %d", PyArray_ISWRITEABLE(self));
|
|
124
|
-
fprintf(fd, "\n\t\tALIGNED: %d", PyArray_ISALIGNED(self));
|
|
125
|
-
fprintf(fd, "\n\t\tUPDATEIFCOPY: %d", PyArray_CHKFLAGS(self, NPY_UPDATEIFCOPY));
|
|
126
|
-
fprintf(fd, "\n");
|
|
127
|
-
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
template <typename T>
|
|
131
|
-
static T ceil_intdiv(T a, T b)
|
|
132
|
-
{
|
|
133
|
-
return (a/b) + ((a % b) ? 1: 0);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
//Compute if the resulting array is c contiguous
|
|
138
|
-
static bool
|
|
139
|
-
PyGpuNdArray_is_c_contiguous(const PyGpuNdArrayObject * self)
|
|
140
|
-
{
|
|
141
|
-
bool c_contiguous = true;
|
|
142
|
-
int size = PyGpuNdArray_ITEMSIZE(self);
|
|
143
|
-
for (int i = PyGpuNdArray_NDIM(self)-1; (i >= 0) && c_contiguous; --i) {
|
|
144
|
-
if (PyGpuNdArray_STRIDE(self, i) != size) {
|
|
145
|
-
c_contiguous = false;
|
|
146
|
-
}
|
|
147
|
-
size = size * PyGpuNdArray_DIM(self, i);
|
|
148
|
-
}
|
|
149
|
-
return c_contiguous;
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
//Compute if the resulting array is f contiguous
|
|
153
|
-
static bool
|
|
154
|
-
PyGpuNdArray_is_f_contiguous(const PyGpuNdArrayObject * self)
|
|
155
|
-
{
|
|
156
|
-
bool f_contiguous = true;
|
|
157
|
-
int size = PyGpuNdArray_ITEMSIZE(self);
|
|
158
|
-
for (int i = 0; i < PyGpuNdArray_NDIM(self) && f_contiguous; ++i) {
|
|
159
|
-
if (PyGpuNdArray_STRIDE(self, i) != size) {
|
|
160
|
-
f_contiguous = false;
|
|
161
|
-
}
|
|
162
|
-
size = size * PyGpuNdArray_DIM(self, i);
|
|
163
|
-
}
|
|
164
|
-
return f_contiguous;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
static PyObject *
|
|
168
|
-
PyGpuNdArray_as_c_contiguous(PyObject* dummy, PyObject* args, PyObject *kargs);
|
|
169
|
-
static PyObject *
|
|
170
|
-
PyGpuNdArray_as_f_contiguous(PyObject* dummy, PyObject* args, PyObject *kargs);
|
|
171
|
-
|
|
172
|
-
/**
|
|
173
|
-
* [Re]allocate a PyGpuNdArrayObject with access to 'nd' dimensions.
|
|
174
|
-
*
|
|
175
|
-
* Note: This does not allocate storage for data.
|
|
176
|
-
*/
|
|
177
|
-
static
|
|
178
|
-
int PyGpuNdArray_set_nd(PyGpuNdArrayObject * self, const int nd)
|
|
179
|
-
{
|
|
180
|
-
if (nd != PyGpuNdArray_NDIM(self))
|
|
181
|
-
{
|
|
182
|
-
if(0) fprintf(stderr, "PyGpuNdArray_set_nd: modif nd=%i to nd=%i\n", PyGpuNdArray_NDIM(self), nd);
|
|
183
|
-
|
|
184
|
-
if (PyGpuNdArray_DIMS(self)){
|
|
185
|
-
free(PyGpuNdArray_DIMS(self));
|
|
186
|
-
PyGpuNdArray_DIMS(self) = NULL;
|
|
187
|
-
PyGpuNdArray_NDIM(self) = -1;
|
|
188
|
-
}
|
|
189
|
-
if (PyGpuNdArray_STRIDES(self)){
|
|
190
|
-
free(PyGpuNdArray_STRIDES(self));
|
|
191
|
-
PyGpuNdArray_STRIDES(self) = NULL;
|
|
192
|
-
PyGpuNdArray_NDIM(self) = -1;
|
|
193
|
-
}
|
|
194
|
-
if (nd == -1) return 0;
|
|
195
|
-
|
|
196
|
-
PyGpuNdArray_DIMS(self) = (npy_intp*)malloc(nd*sizeof(npy_intp));
|
|
197
|
-
if (NULL == PyGpuNdArray_DIMS(self))
|
|
198
|
-
{
|
|
199
|
-
PyErr_SetString(PyExc_MemoryError, "PyGpuNdArray_set_nd: Failed to allocate dimensions");
|
|
200
|
-
return -1;
|
|
201
|
-
}
|
|
202
|
-
PyGpuNdArray_STRIDES(self) = (npy_intp*)malloc(nd*sizeof(npy_intp));
|
|
203
|
-
if (NULL == PyGpuNdArray_STRIDES(self))
|
|
204
|
-
{
|
|
205
|
-
PyErr_SetString(PyExc_MemoryError, "PyGpuNdArray_set_nd: Failed to allocate str");
|
|
206
|
-
return -1;
|
|
207
|
-
}
|
|
208
|
-
//initialize all dimensions and strides to 0
|
|
209
|
-
for (int i = 0; i < nd; ++i)
|
|
210
|
-
{
|
|
211
|
-
PyGpuNdArray_DIM(self, i) = 0;
|
|
212
|
-
PyGpuNdArray_STRIDES(self)[i] = 0;
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
PyGpuNdArray_NDIM(self) = nd;
|
|
216
|
-
if(0) fprintf(stderr, "PyGpuNdArray_set_nd: end\n");
|
|
217
|
-
}
|
|
218
|
-
return 0;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
#endif
|
|
222
|
-
/*
|
|
223
|
-
Local Variables:
|
|
224
|
-
mode:c++
|
|
225
|
-
c-basic-offset:4
|
|
226
|
-
c-file-style:"stroustrup"
|
|
227
|
-
c-file-offsets:((innamespace . 0)(inline-open . 0))
|
|
228
|
-
indent-tabs-mode:nil
|
|
229
|
-
fill-column:79
|
|
230
|
-
End:
|
|
231
|
-
*/
|
|
232
|
-
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:textwidth=79 :
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from distutils.command.build_ext import build_ext
|
|
3
|
-
from distutils.core import Extension, setup
|
|
4
|
-
from distutils.dep_util import newer
|
|
5
|
-
|
|
6
|
-
import numpy as np
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class build_ext_nvcc(build_ext):
|
|
10
|
-
user_options = build_ext.user_options
|
|
11
|
-
user_options.extend([
|
|
12
|
-
('cuda-root=', None, "The cuda root directory")])
|
|
13
|
-
|
|
14
|
-
def initialize_options(self):
|
|
15
|
-
build_ext.initialize_options(self)
|
|
16
|
-
self.cuda_root = None
|
|
17
|
-
|
|
18
|
-
def finalize_options(self):
|
|
19
|
-
build_ext.finalize_options(self)
|
|
20
|
-
if self.cuda_root is None:
|
|
21
|
-
self.cuda_root = os.getenv('CUDA_ROOT', None)
|
|
22
|
-
if self.cuda_root is not None:
|
|
23
|
-
self._nvcc_bin = os.path.join(self.cuda_root, 'bin', 'nvcc')
|
|
24
|
-
else:
|
|
25
|
-
self._nvcc_bin = 'nvcc'
|
|
26
|
-
|
|
27
|
-
def cuda_process(self, source, include_args):
|
|
28
|
-
target = source + '.cpp'
|
|
29
|
-
if newer(source, target):
|
|
30
|
-
self.spawn([self._nvcc_bin, '--cuda', source, '-o', target] + \
|
|
31
|
-
include_args)
|
|
32
|
-
return target
|
|
33
|
-
|
|
34
|
-
def cuda_extension(self, ext):
|
|
35
|
-
includes = self.distribution.include_dirs + ext.include_dirs
|
|
36
|
-
include_args = ['-I' + i for i in includes]
|
|
37
|
-
new_sources = []
|
|
38
|
-
anycuda = False
|
|
39
|
-
for src in ext.sources:
|
|
40
|
-
if src.endswith('.cu'):
|
|
41
|
-
new_sources.append(self.cuda_process(src, include_args))
|
|
42
|
-
anycuda = True
|
|
43
|
-
else:
|
|
44
|
-
new_sources.append(src)
|
|
45
|
-
if anycuda:
|
|
46
|
-
ext.sources = new_sources
|
|
47
|
-
if self.cuda_root is not None:
|
|
48
|
-
lib = os.path.join(self.cuda_root, 'lib')
|
|
49
|
-
lib64 = os.path.join(self.cuda_root, 'lib64')
|
|
50
|
-
if os.path.isdir(lib):
|
|
51
|
-
ext.library_dirs.append(lib)
|
|
52
|
-
ext.extra_link_args.append('-Xlinker')
|
|
53
|
-
ext.extra_link_args.append('-rpath')
|
|
54
|
-
ext.extra_link_args.append('-Xlinker')
|
|
55
|
-
ext.extra_link_args.append(lib)
|
|
56
|
-
if os.path.isdir(lib64):
|
|
57
|
-
ext.library_dirs.append(lib64)
|
|
58
|
-
# ext.extra_link_args.append('-rpath')
|
|
59
|
-
# ext.extra_link_args.append(lib64)
|
|
60
|
-
if 'cudart' not in ext.libraries:
|
|
61
|
-
ext.libraries.append('cudart')
|
|
62
|
-
|
|
63
|
-
if self.cuda_root:
|
|
64
|
-
include = os.path.join(self.cuda_root, 'include')
|
|
65
|
-
if os.path.isdir(include):
|
|
66
|
-
ext.extra_compile_args.append('-I' + include)
|
|
67
|
-
if os.path.isfile('/usr/lib/nvidia-current/libOpenCL.so'):
|
|
68
|
-
ext.extra_link_args.append('-L/usr/lib/nvidia-current')
|
|
69
|
-
ext.extra_link_args.append('-Xlinker')
|
|
70
|
-
ext.extra_link_args.append('-rpath')
|
|
71
|
-
ext.extra_link_args.append('-Xlinker')
|
|
72
|
-
ext.extra_link_args.append('/usr/lib/nvidia-current')
|
|
73
|
-
|
|
74
|
-
def build_extensions(self):
|
|
75
|
-
self.check_extensions_list(self.extensions)
|
|
76
|
-
|
|
77
|
-
for ext in self.extensions:
|
|
78
|
-
self.cuda_extension(ext)
|
|
79
|
-
# uncomment this + inherit from the cython version of build_ext
|
|
80
|
-
# work with cuda and cython sources
|
|
81
|
-
#ext.sources = self.cython_sources(ext.sources, ext)
|
|
82
|
-
self.build_extension(ext)
|
|
83
|
-
|
|
84
|
-
import sys
|
|
85
|
-
|
|
86
|
-
if sys.platform == 'darwin':
|
|
87
|
-
libcl_args = {'extra_link_args': ['-framework', 'OpenCL']}
|
|
88
|
-
else:
|
|
89
|
-
libcl_args = {'libraries': ['OpenCL']}
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
setup(name='compyte',
|
|
93
|
-
cmdclass={'build_ext': build_ext_nvcc},
|
|
94
|
-
include_dirs=[np.get_include(), '.'],
|
|
95
|
-
ext_modules=[Extension('pygpu_ndarray',
|
|
96
|
-
define_macros=[('OFFSET', '1'), ('WITH_OPENCL', '')],
|
|
97
|
-
sources=['pygpu_language_opencl.cpp',
|
|
98
|
-
'pygpu_ndarray.cpp'],
|
|
99
|
-
**libcl_args)
|
|
100
|
-
]
|
|
101
|
-
)
|