pyopencl 2024.2.2__cp39-cp39-win_amd64.whl → 2024.2.5__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +16 -4
- pyopencl/_cl.cp39-win_amd64.pyd +0 -0
- pyopencl/algorithm.py +3 -1
- pyopencl/bitonic_sort.py +2 -0
- pyopencl/characterize/__init__.py +23 -0
- pyopencl/compyte/.git +1 -0
- pyopencl/compyte/.gitignore +21 -0
- pyopencl/compyte/ndarray/Makefile +31 -0
- pyopencl/compyte/ndarray/gpu_ndarray.h +35 -0
- pyopencl/compyte/ndarray/pygpu_language.h +207 -0
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +622 -0
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +317 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +1546 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.h +71 -0
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +232 -0
- pyopencl/tools.py +60 -56
- pyopencl/version.py +9 -3
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/METADATA +105 -105
- pyopencl-2024.2.5.dist-info/RECORD +56 -0
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/WHEEL +1 -1
- pyopencl-2024.2.2.data/data/CITATION.cff +0 -74
- pyopencl-2024.2.2.data/data/CMakeLists.txt +0 -83
- pyopencl-2024.2.2.data/data/Makefile.in +0 -21
- pyopencl-2024.2.2.data/data/README.rst +0 -70
- pyopencl-2024.2.2.data/data/README_SETUP.txt +0 -34
- pyopencl-2024.2.2.data/data/aksetup_helper.py +0 -1013
- pyopencl-2024.2.2.data/data/configure.py +0 -6
- pyopencl-2024.2.2.data/data/contrib/cldis.py +0 -91
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/README +0 -29
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/translate.py +0 -1441
- pyopencl-2024.2.2.data/data/contrib/pyopencl.vim +0 -84
- pyopencl-2024.2.2.data/data/doc/Makefile +0 -23
- pyopencl-2024.2.2.data/data/doc/algorithm.rst +0 -214
- pyopencl-2024.2.2.data/data/doc/array.rst +0 -305
- pyopencl-2024.2.2.data/data/doc/conf.py +0 -26
- pyopencl-2024.2.2.data/data/doc/howto.rst +0 -105
- pyopencl-2024.2.2.data/data/doc/index.rst +0 -137
- pyopencl-2024.2.2.data/data/doc/make_constants.py +0 -561
- pyopencl-2024.2.2.data/data/doc/misc.rst +0 -885
- pyopencl-2024.2.2.data/data/doc/runtime.rst +0 -51
- pyopencl-2024.2.2.data/data/doc/runtime_const.rst +0 -30
- pyopencl-2024.2.2.data/data/doc/runtime_gl.rst +0 -78
- pyopencl-2024.2.2.data/data/doc/runtime_memory.rst +0 -527
- pyopencl-2024.2.2.data/data/doc/runtime_platform.rst +0 -184
- pyopencl-2024.2.2.data/data/doc/runtime_program.rst +0 -364
- pyopencl-2024.2.2.data/data/doc/runtime_queue.rst +0 -182
- pyopencl-2024.2.2.data/data/doc/subst.rst +0 -36
- pyopencl-2024.2.2.data/data/doc/tools.rst +0 -4
- pyopencl-2024.2.2.data/data/doc/types.rst +0 -42
- pyopencl-2024.2.2.data/data/examples/black-hole-accretion.py +0 -2227
- pyopencl-2024.2.2.data/data/examples/demo-struct-reduce.py +0 -75
- pyopencl-2024.2.2.data/data/examples/demo.py +0 -39
- pyopencl-2024.2.2.data/data/examples/demo_array.py +0 -32
- pyopencl-2024.2.2.data/data/examples/demo_array_svm.py +0 -37
- pyopencl-2024.2.2.data/data/examples/demo_elementwise.py +0 -34
- pyopencl-2024.2.2.data/data/examples/demo_elementwise_complex.py +0 -53
- pyopencl-2024.2.2.data/data/examples/demo_mandelbrot.py +0 -183
- pyopencl-2024.2.2.data/data/examples/demo_meta_codepy.py +0 -56
- pyopencl-2024.2.2.data/data/examples/demo_meta_template.py +0 -55
- pyopencl-2024.2.2.data/data/examples/dump-performance.py +0 -38
- pyopencl-2024.2.2.data/data/examples/dump-properties.py +0 -86
- pyopencl-2024.2.2.data/data/examples/gl_interop_demo.py +0 -84
- pyopencl-2024.2.2.data/data/examples/gl_particle_animation.py +0 -218
- pyopencl-2024.2.2.data/data/examples/ipython-demo.ipynb +0 -203
- pyopencl-2024.2.2.data/data/examples/median-filter.py +0 -99
- pyopencl-2024.2.2.data/data/examples/n-body.py +0 -1070
- pyopencl-2024.2.2.data/data/examples/narray.py +0 -37
- pyopencl-2024.2.2.data/data/examples/noisyImage.jpg +0 -0
- pyopencl-2024.2.2.data/data/examples/pi-monte-carlo.py +0 -1166
- pyopencl-2024.2.2.data/data/examples/svm.py +0 -82
- pyopencl-2024.2.2.data/data/examples/transpose.py +0 -229
- pyopencl-2024.2.2.data/data/pytest.ini +0 -3
- pyopencl-2024.2.2.data/data/src/bitlog.cpp +0 -51
- pyopencl-2024.2.2.data/data/src/bitlog.hpp +0 -83
- pyopencl-2024.2.2.data/data/src/clinfo_ext.h +0 -134
- pyopencl-2024.2.2.data/data/src/mempool.hpp +0 -444
- pyopencl-2024.2.2.data/data/src/pyopencl_ext.h +0 -77
- pyopencl-2024.2.2.data/data/src/tools.hpp +0 -90
- pyopencl-2024.2.2.data/data/src/wrap_cl.cpp +0 -61
- pyopencl-2024.2.2.data/data/src/wrap_cl.hpp +0 -5853
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_1.cpp +0 -369
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_2.cpp +0 -702
- pyopencl-2024.2.2.data/data/src/wrap_constants.cpp +0 -1274
- pyopencl-2024.2.2.data/data/src/wrap_helpers.hpp +0 -213
- pyopencl-2024.2.2.data/data/src/wrap_mempool.cpp +0 -738
- pyopencl-2024.2.2.data/data/test/add-vectors-32.spv +0 -0
- pyopencl-2024.2.2.data/data/test/add-vectors-64.spv +0 -0
- pyopencl-2024.2.2.data/data/test/empty-header.h +0 -1
- pyopencl-2024.2.2.data/data/test/test_algorithm.py +0 -1180
- pyopencl-2024.2.2.data/data/test/test_array.py +0 -2392
- pyopencl-2024.2.2.data/data/test/test_arrays_in_structs.py +0 -100
- pyopencl-2024.2.2.data/data/test/test_clmath.py +0 -529
- pyopencl-2024.2.2.data/data/test/test_clrandom.py +0 -75
- pyopencl-2024.2.2.data/data/test/test_enqueue_copy.py +0 -271
- pyopencl-2024.2.2.data/data/test/test_wrapper.py +0 -1565
- pyopencl-2024.2.2.dist-info/LICENSE +0 -282
- pyopencl-2024.2.2.dist-info/RECORD +0 -123
- pyopencl-2024.2.2.dist-info/top_level.txt +0 -1
- {pyopencl-2024.2.2.data/data → pyopencl-2024.2.5.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#ifndef _PYGPU_NDARRAY_H
|
|
2
|
+
#define _PYGPU_NDARRAY_H
|
|
3
|
+
#ifndef OFFSET
|
|
4
|
+
#define OFFSET 0
|
|
5
|
+
#endif
|
|
6
|
+
|
|
7
|
+
//#include <Python.h>
|
|
8
|
+
//#include <structmember.h>
|
|
9
|
+
#include <stdio.h>
|
|
10
|
+
#include <numpy/arrayobject.h>
|
|
11
|
+
|
|
12
|
+
#include "pygpu_ndarray_object.h"
|
|
13
|
+
#include "gpu_ndarray.h"
|
|
14
|
+
#include "pygpu_language.h"
|
|
15
|
+
|
|
16
|
+
/*
|
|
17
|
+
* Return a PyGpuNdArray whose 'nd' dimensions are all 0.
|
|
18
|
+
* if nd==-1, it is not initialized.
|
|
19
|
+
*/
|
|
20
|
+
PyObject * PyGpuNdArray_New(int nd=-1);
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Return 1 for a PyGpuNdArrayObject otw 0
|
|
24
|
+
*/
|
|
25
|
+
int
|
|
26
|
+
PyGpuNdArray_Check(const PyObject * ob);
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Return 1 for a PyGpuNdArrayObject otw 0
|
|
30
|
+
*/
|
|
31
|
+
int
|
|
32
|
+
PyGpuNdArray_CheckExact(const PyObject * ob);
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Transfer the contents of numpy array `obj` to `self`.
|
|
36
|
+
*
|
|
37
|
+
* self is reallocated to have the correct dimensions if necessary.
|
|
38
|
+
*/
|
|
39
|
+
int PyGpuNdArray_CopyFromArray(PyGpuNdArrayObject * self, PyArrayObject*obj);
|
|
40
|
+
|
|
41
|
+
static int
|
|
42
|
+
PyGpuNdArray_add_offset(PyGpuNdArrayObject * self, int offset);
|
|
43
|
+
|
|
44
|
+
static int
|
|
45
|
+
PyGpuNdArray_set_data(PyGpuNdArrayObject * self, char * data, PyObject * base, int offset=0);
|
|
46
|
+
|
|
47
|
+
static PyObject *
|
|
48
|
+
PyGpuNdArray_Subscript(PyObject * py_self, PyObject * key);
|
|
49
|
+
|
|
50
|
+
static PyObject *
|
|
51
|
+
PyGpuNdArray_Copy(PyGpuNdArrayObject * self, NPY_ORDER order=NPY_CORDER);
|
|
52
|
+
|
|
53
|
+
static PyObject *
|
|
54
|
+
PyGpuNdArray_Zeros(int nd, npy_intp* dims, PyArray_Descr* dtype, int fortran);
|
|
55
|
+
|
|
56
|
+
static PyObject *
|
|
57
|
+
PyGpuNdArray_Empty(int nd, npy_intp* dims, PyArray_Descr* dtype, int fortran);
|
|
58
|
+
|
|
59
|
+
#endif
|
|
60
|
+
|
|
61
|
+
/*
|
|
62
|
+
Local Variables:
|
|
63
|
+
mode:c++
|
|
64
|
+
c-basic-offset:4
|
|
65
|
+
c-file-style:"stroustrup"
|
|
66
|
+
c-file-offsets:((innamespace . 0)(inline-open . 0))
|
|
67
|
+
indent-tabs-mode:nil
|
|
68
|
+
fill-column:79
|
|
69
|
+
End:
|
|
70
|
+
*/
|
|
71
|
+
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:textwidth=79 :
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* struct PyGPUArrayObject
|
|
3
|
+
*
|
|
4
|
+
* This is a Python type.
|
|
5
|
+
*
|
|
6
|
+
*/
|
|
7
|
+
#ifndef _PYGPU_NDARRAY_OBJECT_H
|
|
8
|
+
#define _PYGPU_NDARRAY_OBJECT_H
|
|
9
|
+
|
|
10
|
+
#include <Python.h>
|
|
11
|
+
#include <numpy/arrayobject.h>
|
|
12
|
+
#include "gpu_ndarray.h"
|
|
13
|
+
|
|
14
|
+
typedef struct PyGpuNdArrayObject{
|
|
15
|
+
PyObject_HEAD
|
|
16
|
+
|
|
17
|
+
GpuNdArray gpu_ndarray; //no pointer, just inlined.
|
|
18
|
+
PyObject * base;
|
|
19
|
+
PyArray_Descr * descr; // for numpy-like desc
|
|
20
|
+
int data_allocated; //the number of bytes allocated for devdata
|
|
21
|
+
} PyGpuNdArrayObject;
|
|
22
|
+
|
|
23
|
+
#define PyGpuNdArray_NDIM(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.nd)
|
|
24
|
+
#define PyGpuNdArray_DATA(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.data)
|
|
25
|
+
#define PyGpuNdArray_BYTES(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.data)
|
|
26
|
+
#define PyGpuNdArray_OFFSET(obj) (((PyGpuNdArrayObject *)(obj))->gpu_ndarray.offset)
|
|
27
|
+
#define PyGpuNdArray_DIMS(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.dimensions)
|
|
28
|
+
#define PyGpuNdArray_STRIDES(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.strides)
|
|
29
|
+
#define PyGpuNdArray_DIM(obj,n) (PyGpuNdArray_DIMS(obj)[n])
|
|
30
|
+
#define PyGpuNdArray_STRIDE(obj,n) (PyGpuNdArray_STRIDES(obj)[n])
|
|
31
|
+
#define PyGpuNdArray_BASE(obj) (((PyGpuNdArrayObject *)obj)->base)
|
|
32
|
+
#define PyGpuNdArray_DESCR(obj) (((PyGpuNdArrayObject *)obj)->descr)
|
|
33
|
+
#define PyGpuNdArray_FLAGS(obj) (((PyGpuNdArrayObject *)obj)->gpu_ndarray.flags)
|
|
34
|
+
#define PyGpuNdArray_ITEMSIZE(obj) (((PyGpuNdArrayObject *)obj)->descr->elsize)
|
|
35
|
+
#define PyGpuNdArray_TYPE(obj) (((PyGpuNdArrayObject *)(obj))->descr->type_num)
|
|
36
|
+
|
|
37
|
+
#define PyGpuNdArray_SIZE(obj) PyArray_MultiplyList(PyGpuNdArray_DIMS(obj),PyGpuNdArray_NDIM(obj))
|
|
38
|
+
//npy_intp PyGpuNdArray_Size(PyObject* obj);
|
|
39
|
+
//npy_intp PyGpuNdArray_NBYTES(PyObject* arr);
|
|
40
|
+
|
|
41
|
+
/*
|
|
42
|
+
Flags accessor
|
|
43
|
+
*/
|
|
44
|
+
#define PyGpuNdArray_CHKFLAGS(m, FLAGS) \
|
|
45
|
+
((((PyGpuNdArrayObject *)(m))->gpu_ndarray.flags & (FLAGS)) == (FLAGS))
|
|
46
|
+
|
|
47
|
+
#define PyGpuNdArray_ISCONTIGUOUS(m) PyGpuNdArray_CHKFLAGS(m, NPY_CONTIGUOUS)
|
|
48
|
+
#define PyGpuNdArray_ISFORTRAN(m) (PyGpuNdArray_CHKFLAGS(m, NPY_F_CONTIGUOUS) && \
|
|
49
|
+
PyGpuNdArray_NDIM(m) > 1)
|
|
50
|
+
#define PyGpuNdArray_FORTRAN_IF(m) (PyGpuNdArray_CHKFLAGS(m, NPY_F_CONTIGUOUS)? \
|
|
51
|
+
NPY_F_CONTIGUOUS : 0)
|
|
52
|
+
#define PyGpuNdArray_ISONESEGMENT(m) (PyGpuNdArray_NDIM(m) == 0 || \
|
|
53
|
+
PyGpuNdArray_ISCONTIGUOUS(m) || \
|
|
54
|
+
PyGpuNdArray_ISFORTRAN(m))
|
|
55
|
+
#define PyGpuNdArray_ISWRITEABLE(m) PyGpuNdArray_CHKFLAGS(m, NPY_WRITEABLE)
|
|
56
|
+
#define PyGpuNdArray_ISALIGNED(m) PyGpuNdArray_CHKFLAGS(m, NPY_ALIGNED)
|
|
57
|
+
|
|
58
|
+
#define PyGpuNdArray_ISNBO(arg) ((arg) != NPY_OPPBYTE)
|
|
59
|
+
// THE NEXT ONE SEEM BAD...
|
|
60
|
+
#define PyGpuNdArray_IsNativeByteOrder PyArray_ISNBO
|
|
61
|
+
#define PyGpuNdArray_ISNOTSWAPPED(m) PyArray_ISNBO(PyArray_DESCR(m)->byteorder)
|
|
62
|
+
#define PyGpuNdArray_FLAGSWAP(m, flags) (PyGpuNdArray_CHKFLAGS(m, flags) && PyGpuNdArray_ISNOTSWAPPED(m))
|
|
63
|
+
|
|
64
|
+
#define PyGpuNdArray_ISCARRAY(m) PyGpuNdArray_FLAGSWAP(m, NPY_CARRAY)
|
|
65
|
+
#define PyGpuNdArray_ISCARRAY_RO(m) PyGpuNdArray_FLAGSWAP(m, NPY_CARRAY_RO)
|
|
66
|
+
#define PyGpuNdArray_ISFARRAY(m) PyGpuNdArray_FLAGSWAP(m, NPY_FARRAY)
|
|
67
|
+
#define PyGpuNdArray_ISFARRAY_RO(m) PyGpuNdArray_FLAGSWAP(m, NPY_FARRAY_RO)
|
|
68
|
+
#define PyGpuNdArray_ISBEHAVED(m) PyGpuNdArray_FLAGSWAP(m, NPY_BEHAVED)
|
|
69
|
+
#define PyGpuNdArray_ISBEHAVED_RO(m) PyGpuNdArray_FLAGSWAP(m, NPY_ALIGNED)
|
|
70
|
+
|
|
71
|
+
static
|
|
72
|
+
void PyGpuNdArray_fprint(FILE * fd, const PyGpuNdArrayObject *self)
|
|
73
|
+
{
|
|
74
|
+
fprintf(fd, "PyGpuNdArrayObject <%p, %p> nd=%i data_allocated=%d\n",
|
|
75
|
+
self, PyGpuNdArray_DATA(self), PyGpuNdArray_NDIM(self), self->data_allocated);
|
|
76
|
+
fprintf(fd, "\tITEMSIZE: %d\n", PyGpuNdArray_ITEMSIZE(self));
|
|
77
|
+
fprintf(fd, "\tTYPENUM: %d\n", PyGpuNdArray_TYPE(self));
|
|
78
|
+
fprintf(fd, "\tRefcount: %ld\n", (long int)self->ob_refcnt);
|
|
79
|
+
fprintf(fd, "\tBASE: %p\n", PyGpuNdArray_BASE(self));
|
|
80
|
+
fprintf(fd, "\tHOST_DIMS: ");
|
|
81
|
+
for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i)
|
|
82
|
+
{
|
|
83
|
+
fprintf(fd, "%ld\t", PyGpuNdArray_DIMS(self)[i]);
|
|
84
|
+
}
|
|
85
|
+
fprintf(fd, "\n\tHOST_STRIDES: ");
|
|
86
|
+
for (int i = 0; i < PyGpuNdArray_NDIM(self); ++i)
|
|
87
|
+
{
|
|
88
|
+
fprintf(fd, "%ld\t", PyGpuNdArray_STRIDES(self)[i]);
|
|
89
|
+
}
|
|
90
|
+
fprintf(fd, "\n\tFLAGS: ");
|
|
91
|
+
fprintf(fd, "\n\t\tC_CONTIGUOUS: %d", PyGpuNdArray_ISCONTIGUOUS(self));
|
|
92
|
+
fprintf(fd, "\n\t\tPyGpuNdArray_ISFORTRAN: %d PyGpuNdArray_FORTRAN_IF:%d F_CONTIGUOUS: %d",
|
|
93
|
+
PyGpuNdArray_ISFORTRAN(self), PyGpuNdArray_FORTRAN_IF(self), PyGpuNdArray_CHKFLAGS(self, NPY_FORTRAN));
|
|
94
|
+
fprintf(fd, "\n\t\tOWNDATA: %d", PyGpuNdArray_CHKFLAGS(self, NPY_OWNDATA));
|
|
95
|
+
fprintf(fd, "\n\t\tWRITEABLE: %d", PyGpuNdArray_ISWRITEABLE(self));
|
|
96
|
+
fprintf(fd, "\n\t\tALIGNED: %d", PyGpuNdArray_ISALIGNED(self));
|
|
97
|
+
fprintf(fd, "\n\t\tUPDATEIFCOPY: %d", PyGpuNdArray_CHKFLAGS(self, NPY_UPDATEIFCOPY));
|
|
98
|
+
fprintf(fd, "\n");
|
|
99
|
+
|
|
100
|
+
}
|
|
101
|
+
static
|
|
102
|
+
void PyArray_fprint(FILE * fd, const PyArrayObject *self)
|
|
103
|
+
{
|
|
104
|
+
fprintf(fd, "PyArrayObject <%p, %p> nd=%i\n",
|
|
105
|
+
self, PyArray_DATA(self), PyArray_NDIM(self));
|
|
106
|
+
fprintf(fd, "\tITEMSIZE: %d\n", PyArray_ITEMSIZE(self));
|
|
107
|
+
fprintf(fd, "\tTYPENUM: %d\n", PyArray_TYPE(self));
|
|
108
|
+
fprintf(fd, "\tHOST_DIMS: ");
|
|
109
|
+
for (int i = 0; i < PyArray_NDIM(self); ++i)
|
|
110
|
+
{
|
|
111
|
+
fprintf(fd, "%ld\t", PyArray_DIMS(self)[i]);
|
|
112
|
+
}
|
|
113
|
+
fprintf(fd, "\n\tHOST_STRIDES: ");
|
|
114
|
+
for (int i = 0; i < PyArray_NDIM(self); ++i)
|
|
115
|
+
{
|
|
116
|
+
fprintf(fd, "%ld\t", PyArray_STRIDES(self)[i]);
|
|
117
|
+
}
|
|
118
|
+
fprintf(fd, "\n\tFLAGS: ");
|
|
119
|
+
fprintf(fd, "\n\t\tC_CONTIGUOUS: %d", PyArray_ISCONTIGUOUS(self));
|
|
120
|
+
fprintf(fd, "\n\t\tPyArray_ISFORTRAN: %d PyArray_FORTRAN_IF:%d F_CONTIGUOUS: %d",
|
|
121
|
+
PyArray_ISFORTRAN(self), PyArray_FORTRAN_IF(self), PyArray_CHKFLAGS(self, NPY_FORTRAN));
|
|
122
|
+
fprintf(fd, "\n\t\tOWNDATA: %d", PyArray_CHKFLAGS(self, NPY_OWNDATA));
|
|
123
|
+
fprintf(fd, "\n\t\tWRITEABLE: %d", PyArray_ISWRITEABLE(self));
|
|
124
|
+
fprintf(fd, "\n\t\tALIGNED: %d", PyArray_ISALIGNED(self));
|
|
125
|
+
fprintf(fd, "\n\t\tUPDATEIFCOPY: %d", PyArray_CHKFLAGS(self, NPY_UPDATEIFCOPY));
|
|
126
|
+
fprintf(fd, "\n");
|
|
127
|
+
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
template <typename T>
|
|
131
|
+
static T ceil_intdiv(T a, T b)
|
|
132
|
+
{
|
|
133
|
+
return (a/b) + ((a % b) ? 1: 0);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
//Compute if the resulting array is c contiguous
|
|
138
|
+
static bool
|
|
139
|
+
PyGpuNdArray_is_c_contiguous(const PyGpuNdArrayObject * self)
|
|
140
|
+
{
|
|
141
|
+
bool c_contiguous = true;
|
|
142
|
+
int size = PyGpuNdArray_ITEMSIZE(self);
|
|
143
|
+
for (int i = PyGpuNdArray_NDIM(self)-1; (i >= 0) && c_contiguous; --i) {
|
|
144
|
+
if (PyGpuNdArray_STRIDE(self, i) != size) {
|
|
145
|
+
c_contiguous = false;
|
|
146
|
+
}
|
|
147
|
+
size = size * PyGpuNdArray_DIM(self, i);
|
|
148
|
+
}
|
|
149
|
+
return c_contiguous;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
//Compute if the resulting array is f contiguous
|
|
153
|
+
static bool
|
|
154
|
+
PyGpuNdArray_is_f_contiguous(const PyGpuNdArrayObject * self)
|
|
155
|
+
{
|
|
156
|
+
bool f_contiguous = true;
|
|
157
|
+
int size = PyGpuNdArray_ITEMSIZE(self);
|
|
158
|
+
for (int i = 0; i < PyGpuNdArray_NDIM(self) && f_contiguous; ++i) {
|
|
159
|
+
if (PyGpuNdArray_STRIDE(self, i) != size) {
|
|
160
|
+
f_contiguous = false;
|
|
161
|
+
}
|
|
162
|
+
size = size * PyGpuNdArray_DIM(self, i);
|
|
163
|
+
}
|
|
164
|
+
return f_contiguous;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
static PyObject *
|
|
168
|
+
PyGpuNdArray_as_c_contiguous(PyObject* dummy, PyObject* args, PyObject *kargs);
|
|
169
|
+
static PyObject *
|
|
170
|
+
PyGpuNdArray_as_f_contiguous(PyObject* dummy, PyObject* args, PyObject *kargs);
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* [Re]allocate a PyGpuNdArrayObject with access to 'nd' dimensions.
|
|
174
|
+
*
|
|
175
|
+
* Note: This does not allocate storage for data.
|
|
176
|
+
*/
|
|
177
|
+
static
|
|
178
|
+
int PyGpuNdArray_set_nd(PyGpuNdArrayObject * self, const int nd)
|
|
179
|
+
{
|
|
180
|
+
if (nd != PyGpuNdArray_NDIM(self))
|
|
181
|
+
{
|
|
182
|
+
if(0) fprintf(stderr, "PyGpuNdArray_set_nd: modif nd=%i to nd=%i\n", PyGpuNdArray_NDIM(self), nd);
|
|
183
|
+
|
|
184
|
+
if (PyGpuNdArray_DIMS(self)){
|
|
185
|
+
free(PyGpuNdArray_DIMS(self));
|
|
186
|
+
PyGpuNdArray_DIMS(self) = NULL;
|
|
187
|
+
PyGpuNdArray_NDIM(self) = -1;
|
|
188
|
+
}
|
|
189
|
+
if (PyGpuNdArray_STRIDES(self)){
|
|
190
|
+
free(PyGpuNdArray_STRIDES(self));
|
|
191
|
+
PyGpuNdArray_STRIDES(self) = NULL;
|
|
192
|
+
PyGpuNdArray_NDIM(self) = -1;
|
|
193
|
+
}
|
|
194
|
+
if (nd == -1) return 0;
|
|
195
|
+
|
|
196
|
+
PyGpuNdArray_DIMS(self) = (npy_intp*)malloc(nd*sizeof(npy_intp));
|
|
197
|
+
if (NULL == PyGpuNdArray_DIMS(self))
|
|
198
|
+
{
|
|
199
|
+
PyErr_SetString(PyExc_MemoryError, "PyGpuNdArray_set_nd: Failed to allocate dimensions");
|
|
200
|
+
return -1;
|
|
201
|
+
}
|
|
202
|
+
PyGpuNdArray_STRIDES(self) = (npy_intp*)malloc(nd*sizeof(npy_intp));
|
|
203
|
+
if (NULL == PyGpuNdArray_STRIDES(self))
|
|
204
|
+
{
|
|
205
|
+
PyErr_SetString(PyExc_MemoryError, "PyGpuNdArray_set_nd: Failed to allocate str");
|
|
206
|
+
return -1;
|
|
207
|
+
}
|
|
208
|
+
//initialize all dimensions and strides to 0
|
|
209
|
+
for (int i = 0; i < nd; ++i)
|
|
210
|
+
{
|
|
211
|
+
PyGpuNdArray_DIM(self, i) = 0;
|
|
212
|
+
PyGpuNdArray_STRIDES(self)[i] = 0;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
PyGpuNdArray_NDIM(self) = nd;
|
|
216
|
+
if(0) fprintf(stderr, "PyGpuNdArray_set_nd: end\n");
|
|
217
|
+
}
|
|
218
|
+
return 0;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
#endif
|
|
222
|
+
/*
|
|
223
|
+
Local Variables:
|
|
224
|
+
mode:c++
|
|
225
|
+
c-basic-offset:4
|
|
226
|
+
c-file-style:"stroustrup"
|
|
227
|
+
c-file-offsets:((innamespace . 0)(inline-open . 0))
|
|
228
|
+
indent-tabs-mode:nil
|
|
229
|
+
fill-column:79
|
|
230
|
+
End:
|
|
231
|
+
*/
|
|
232
|
+
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:textwidth=79 :
|
pyopencl/tools.py
CHANGED
|
@@ -348,96 +348,98 @@ def _monkeypatch_svm_docstrings():
|
|
|
348
348
|
|
|
349
349
|
# {{{ PooledSVM
|
|
350
350
|
|
|
351
|
-
PooledSVM.__doc__ =
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
351
|
+
PooledSVM.__doc__ = ( # pylint: disable=possibly-used-before-assignment
|
|
352
|
+
"""An object representing a :class:`SVMPool`-based allocation of
|
|
353
|
+
:ref:`svm`. Analogous to :class:`~pyopencl.SVMAllocation`, however once
|
|
354
|
+
this object is deleted, its associated device memory is returned to the
|
|
355
|
+
pool from which it came.
|
|
356
356
|
|
|
357
|
-
|
|
357
|
+
.. versionadded:: 2022.2
|
|
358
358
|
|
|
359
|
-
|
|
359
|
+
.. note::
|
|
360
360
|
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
361
|
+
If the :class:`SVMAllocator` for the :class:`SVMPool` that allocated an
|
|
362
|
+
object of this type is associated with an (in-order)
|
|
363
|
+
:class:`~pyopencl.CommandQueue`, sufficient synchronization is provided
|
|
364
|
+
to ensure operations enqueued before deallocation complete before
|
|
365
|
+
operations from a different use (possibly in a different queue) are
|
|
366
|
+
permitted to start. This applies when :class:`release` is called and
|
|
367
|
+
also when the object is freed automatically by the garbage collector.
|
|
368
368
|
|
|
369
|
-
|
|
369
|
+
Is a :class:`pyopencl.SVMPointer`.
|
|
370
370
|
|
|
371
|
-
|
|
371
|
+
Supports structural equality and hashing.
|
|
372
372
|
|
|
373
|
-
|
|
373
|
+
.. automethod:: release
|
|
374
374
|
|
|
375
|
-
|
|
376
|
-
|
|
375
|
+
Return the held memory to the pool. See the note about synchronization
|
|
376
|
+
behavior during deallocation above.
|
|
377
377
|
|
|
378
|
-
|
|
378
|
+
.. automethod:: enqueue_release
|
|
379
379
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
380
|
+
Synonymous to :meth:`release`, for consistency with
|
|
381
|
+
:class:`~pyopencl.SVMAllocation`. Note that, unlike
|
|
382
|
+
:meth:`pyopencl.SVMAllocation.enqueue_release`, specifying a queue
|
|
383
|
+
or events to be waited for is not supported.
|
|
384
384
|
|
|
385
|
-
|
|
385
|
+
.. automethod:: bind_to_queue
|
|
386
386
|
|
|
387
|
-
|
|
387
|
+
Analogous to :meth:`pyopencl.SVMAllocation.bind_to_queue`.
|
|
388
388
|
|
|
389
|
-
|
|
389
|
+
.. automethod:: unbind_from_queue
|
|
390
390
|
|
|
391
|
-
|
|
392
|
-
|
|
391
|
+
Analogous to :meth:`pyopencl.SVMAllocation.unbind_from_queue`.
|
|
392
|
+
""")
|
|
393
393
|
|
|
394
394
|
# }}}
|
|
395
395
|
|
|
396
396
|
# {{{ SVMAllocator
|
|
397
397
|
|
|
398
|
-
SVMAllocator.__doc__ =
|
|
399
|
-
|
|
398
|
+
SVMAllocator.__doc__ = ( # pylint: disable=possibly-used-before-assignment
|
|
399
|
+
"""
|
|
400
|
+
.. versionadded:: 2022.2
|
|
400
401
|
|
|
401
|
-
|
|
402
|
+
.. automethod:: __init__
|
|
402
403
|
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
404
|
+
:arg flags: See :class:`~pyopencl.svm_mem_flags`.
|
|
405
|
+
:arg queue: If not specified, allocations will be freed
|
|
406
|
+
eagerly, irrespective of whether pending/enqueued operations
|
|
407
|
+
are still using the memory.
|
|
407
408
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
409
|
+
If specified, deallocation of memory will be enqueued
|
|
410
|
+
with the given queue, and will only be performed
|
|
411
|
+
after previously-enqueue operations in the queue have
|
|
412
|
+
completed.
|
|
412
413
|
|
|
413
|
-
|
|
414
|
+
It is an error to specify an out-of-order queue.
|
|
414
415
|
|
|
415
|
-
|
|
416
|
+
.. warning::
|
|
416
417
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
418
|
+
Not specifying a queue will typically lead to undesired
|
|
419
|
+
behavior, including crashes and memory corruption.
|
|
420
|
+
See the warning in :ref:`svm`.
|
|
420
421
|
|
|
421
|
-
|
|
422
|
+
.. automethod:: __call__
|
|
422
423
|
|
|
423
|
-
|
|
424
|
-
|
|
424
|
+
Return a :class:`~pyopencl.SVMAllocation` of the given *size*.
|
|
425
|
+
""")
|
|
425
426
|
|
|
426
427
|
# }}}
|
|
427
428
|
|
|
428
429
|
# {{{ SVMPool
|
|
429
430
|
|
|
430
|
-
SVMPool.__doc__ =
|
|
431
|
-
|
|
432
|
-
|
|
431
|
+
SVMPool.__doc__ = ( # pylint: disable=possibly-used-before-assignment
|
|
432
|
+
remove_common_indentation("""
|
|
433
|
+
A memory pool for OpenCL device memory in :ref:`SVM <svm>` form.
|
|
434
|
+
*allocator* must be an instance of :class:`SVMAllocator`.
|
|
433
435
|
|
|
434
|
-
|
|
436
|
+
.. versionadded:: 2022.2
|
|
435
437
|
|
|
436
|
-
|
|
437
|
-
|
|
438
|
+
.. automethod:: __init__
|
|
439
|
+
.. automethod:: __call__
|
|
438
440
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
+
Return a :class:`PooledSVM` of the given *size*.
|
|
442
|
+
""") + _MEMPOOL_IFACE_DOCS)
|
|
441
443
|
|
|
442
444
|
# }}}
|
|
443
445
|
|
|
@@ -1363,6 +1365,8 @@ class _TemplateRenderer:
|
|
|
1363
1365
|
parsed_arg = arg
|
|
1364
1366
|
elif isinstance(arg, tuple):
|
|
1365
1367
|
parsed_arg = ScalarArg(self.parse_type(arg[0]), arg[1])
|
|
1368
|
+
else:
|
|
1369
|
+
raise TypeError("unexpected argument type: %s" % type(arg))
|
|
1366
1370
|
|
|
1367
1371
|
parsed_args.append(parsed_arg)
|
|
1368
1372
|
|
pyopencl/version.py
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
import re
|
|
2
|
+
from importlib import metadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
VERSION_TEXT = metadata.version("pyopencl")
|
|
6
|
+
_match = re.match("^([0-9.]+)([a-z0-9]*?)$", VERSION_TEXT)
|
|
7
|
+
assert _match is not None
|
|
8
|
+
VERSION_STATUS = _match.group(2)
|
|
9
|
+
VERSION = tuple(int(nr) for nr in _match.group(1).split("."))
|