pyopencl 2024.2.2__cp39-cp39-macosx_10_14_x86_64.whl → 2024.2.5__cp39-cp39-macosx_10_14_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyopencl might be problematic. Click here for more details.
- pyopencl/__init__.py +16 -4
- pyopencl/_cl.cpython-39-darwin.so +0 -0
- pyopencl/algorithm.py +3 -1
- pyopencl/bitonic_sort.py +2 -0
- pyopencl/characterize/__init__.py +23 -0
- pyopencl/compyte/.git +1 -0
- pyopencl/compyte/.gitignore +21 -0
- pyopencl/compyte/ndarray/Makefile +31 -0
- pyopencl/compyte/ndarray/gpu_ndarray.h +35 -0
- pyopencl/compyte/ndarray/pygpu_language.h +207 -0
- pyopencl/compyte/ndarray/pygpu_language_cuda.cu +622 -0
- pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +317 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.cpp +1546 -0
- pyopencl/compyte/ndarray/pygpu_ndarray.h +71 -0
- pyopencl/compyte/ndarray/pygpu_ndarray_object.h +232 -0
- pyopencl/tools.py +60 -56
- pyopencl/version.py +9 -3
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/METADATA +14 -14
- pyopencl-2024.2.5.dist-info/RECORD +56 -0
- {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.5.dist-info}/WHEEL +1 -1
- pyopencl-2024.2.2.data/data/CITATION.cff +0 -74
- pyopencl-2024.2.2.data/data/CMakeLists.txt +0 -83
- pyopencl-2024.2.2.data/data/Makefile.in +0 -21
- pyopencl-2024.2.2.data/data/README.rst +0 -70
- pyopencl-2024.2.2.data/data/README_SETUP.txt +0 -34
- pyopencl-2024.2.2.data/data/aksetup_helper.py +0 -1013
- pyopencl-2024.2.2.data/data/configure.py +0 -6
- pyopencl-2024.2.2.data/data/contrib/cldis.py +0 -91
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/README +0 -29
- pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/translate.py +0 -1441
- pyopencl-2024.2.2.data/data/contrib/pyopencl.vim +0 -84
- pyopencl-2024.2.2.data/data/doc/Makefile +0 -23
- pyopencl-2024.2.2.data/data/doc/algorithm.rst +0 -214
- pyopencl-2024.2.2.data/data/doc/array.rst +0 -305
- pyopencl-2024.2.2.data/data/doc/conf.py +0 -26
- pyopencl-2024.2.2.data/data/doc/howto.rst +0 -105
- pyopencl-2024.2.2.data/data/doc/index.rst +0 -137
- pyopencl-2024.2.2.data/data/doc/make_constants.py +0 -561
- pyopencl-2024.2.2.data/data/doc/misc.rst +0 -885
- pyopencl-2024.2.2.data/data/doc/runtime.rst +0 -51
- pyopencl-2024.2.2.data/data/doc/runtime_const.rst +0 -30
- pyopencl-2024.2.2.data/data/doc/runtime_gl.rst +0 -78
- pyopencl-2024.2.2.data/data/doc/runtime_memory.rst +0 -527
- pyopencl-2024.2.2.data/data/doc/runtime_platform.rst +0 -184
- pyopencl-2024.2.2.data/data/doc/runtime_program.rst +0 -364
- pyopencl-2024.2.2.data/data/doc/runtime_queue.rst +0 -182
- pyopencl-2024.2.2.data/data/doc/subst.rst +0 -36
- pyopencl-2024.2.2.data/data/doc/tools.rst +0 -4
- pyopencl-2024.2.2.data/data/doc/types.rst +0 -42
- pyopencl-2024.2.2.data/data/examples/black-hole-accretion.py +0 -2227
- pyopencl-2024.2.2.data/data/examples/demo-struct-reduce.py +0 -75
- pyopencl-2024.2.2.data/data/examples/demo.py +0 -39
- pyopencl-2024.2.2.data/data/examples/demo_array.py +0 -32
- pyopencl-2024.2.2.data/data/examples/demo_array_svm.py +0 -37
- pyopencl-2024.2.2.data/data/examples/demo_elementwise.py +0 -34
- pyopencl-2024.2.2.data/data/examples/demo_elementwise_complex.py +0 -53
- pyopencl-2024.2.2.data/data/examples/demo_mandelbrot.py +0 -183
- pyopencl-2024.2.2.data/data/examples/demo_meta_codepy.py +0 -56
- pyopencl-2024.2.2.data/data/examples/demo_meta_template.py +0 -55
- pyopencl-2024.2.2.data/data/examples/dump-performance.py +0 -38
- pyopencl-2024.2.2.data/data/examples/dump-properties.py +0 -86
- pyopencl-2024.2.2.data/data/examples/gl_interop_demo.py +0 -84
- pyopencl-2024.2.2.data/data/examples/gl_particle_animation.py +0 -218
- pyopencl-2024.2.2.data/data/examples/ipython-demo.ipynb +0 -203
- pyopencl-2024.2.2.data/data/examples/median-filter.py +0 -99
- pyopencl-2024.2.2.data/data/examples/n-body.py +0 -1070
- pyopencl-2024.2.2.data/data/examples/narray.py +0 -37
- pyopencl-2024.2.2.data/data/examples/noisyImage.jpg +0 -0
- pyopencl-2024.2.2.data/data/examples/pi-monte-carlo.py +0 -1166
- pyopencl-2024.2.2.data/data/examples/svm.py +0 -82
- pyopencl-2024.2.2.data/data/examples/transpose.py +0 -229
- pyopencl-2024.2.2.data/data/pytest.ini +0 -3
- pyopencl-2024.2.2.data/data/src/bitlog.cpp +0 -51
- pyopencl-2024.2.2.data/data/src/bitlog.hpp +0 -83
- pyopencl-2024.2.2.data/data/src/clinfo_ext.h +0 -134
- pyopencl-2024.2.2.data/data/src/mempool.hpp +0 -444
- pyopencl-2024.2.2.data/data/src/pyopencl_ext.h +0 -77
- pyopencl-2024.2.2.data/data/src/tools.hpp +0 -90
- pyopencl-2024.2.2.data/data/src/wrap_cl.cpp +0 -61
- pyopencl-2024.2.2.data/data/src/wrap_cl.hpp +0 -5853
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_1.cpp +0 -369
- pyopencl-2024.2.2.data/data/src/wrap_cl_part_2.cpp +0 -702
- pyopencl-2024.2.2.data/data/src/wrap_constants.cpp +0 -1274
- pyopencl-2024.2.2.data/data/src/wrap_helpers.hpp +0 -213
- pyopencl-2024.2.2.data/data/src/wrap_mempool.cpp +0 -738
- pyopencl-2024.2.2.data/data/test/add-vectors-32.spv +0 -0
- pyopencl-2024.2.2.data/data/test/add-vectors-64.spv +0 -0
- pyopencl-2024.2.2.data/data/test/empty-header.h +0 -1
- pyopencl-2024.2.2.data/data/test/test_algorithm.py +0 -1180
- pyopencl-2024.2.2.data/data/test/test_array.py +0 -2392
- pyopencl-2024.2.2.data/data/test/test_arrays_in_structs.py +0 -100
- pyopencl-2024.2.2.data/data/test/test_clmath.py +0 -529
- pyopencl-2024.2.2.data/data/test/test_clrandom.py +0 -75
- pyopencl-2024.2.2.data/data/test/test_enqueue_copy.py +0 -271
- pyopencl-2024.2.2.data/data/test/test_wrapper.py +0 -1565
- pyopencl-2024.2.2.dist-info/LICENSE +0 -282
- pyopencl-2024.2.2.dist-info/RECORD +0 -123
- pyopencl-2024.2.2.dist-info/top_level.txt +0 -1
- {pyopencl-2024.2.2.data/data → pyopencl-2024.2.5.dist-info/licenses}/LICENSE +0 -0
pyopencl/__init__.py
CHANGED
|
@@ -30,6 +30,8 @@ import pyopencl.cltypes # noqa: F401
|
|
|
30
30
|
from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa: F401
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
__version__ = VERSION_TEXT
|
|
34
|
+
|
|
33
35
|
logger = logging.getLogger(__name__)
|
|
34
36
|
|
|
35
37
|
# This supports ocl-icd find shipped OpenCL ICDs, cf.
|
|
@@ -491,8 +493,16 @@ class Program:
|
|
|
491
493
|
cache_dir = getattr(self._context, "cache_dir", None)
|
|
492
494
|
|
|
493
495
|
build_descr = None
|
|
494
|
-
|
|
495
|
-
|
|
496
|
+
from pyopencl.characterize import has_src_build_cache
|
|
497
|
+
|
|
498
|
+
if (
|
|
499
|
+
(_PYOPENCL_NO_CACHE or has_src_build_cache(self._context.devices[0]))
|
|
500
|
+
and self._prg is None):
|
|
501
|
+
if _PYOPENCL_NO_CACHE:
|
|
502
|
+
build_descr = "uncached source build (cache disabled by user)"
|
|
503
|
+
else:
|
|
504
|
+
build_descr = "uncached source build (assuming cached by ICD)"
|
|
505
|
+
|
|
496
506
|
self._prg = _cl._Program(self._context, self._source)
|
|
497
507
|
|
|
498
508
|
from time import time
|
|
@@ -977,7 +987,8 @@ def _add_functionality():
|
|
|
977
987
|
else:
|
|
978
988
|
raise ValueError("images cannot have more than three dimensions")
|
|
979
989
|
|
|
980
|
-
desc = ImageDescriptor()
|
|
990
|
+
desc = ImageDescriptor() \
|
|
991
|
+
# pylint: disable=possibly-used-before-assignment
|
|
981
992
|
|
|
982
993
|
desc.image_type = image_type
|
|
983
994
|
desc.shape = shape # also sets desc.array_size
|
|
@@ -1352,7 +1363,8 @@ def _add_functionality():
|
|
|
1352
1363
|
svm_old_init = SVM.__init__
|
|
1353
1364
|
|
|
1354
1365
|
def svm_init(self, mem):
|
|
1355
|
-
|
|
1366
|
+
if get_cl_header_version() >= (2, 0):
|
|
1367
|
+
svm_old_init(self, mem)
|
|
1356
1368
|
|
|
1357
1369
|
self.mem = mem
|
|
1358
1370
|
|
|
Binary file
|
pyopencl/algorithm.py
CHANGED
|
@@ -1225,7 +1225,9 @@ class ListOfListsBuilder:
|
|
|
1225
1225
|
info_record.compressed_indices = cl.array.empty(
|
|
1226
1226
|
queue, (n_objects + 1,), index_dtype, allocator=allocator)
|
|
1227
1227
|
info_record.compressed_indices[0] = 0
|
|
1228
|
-
|
|
1228
|
+
|
|
1229
|
+
compress_events[name] = compress_kernel( \
|
|
1230
|
+
# pylint: disable=possibly-used-before-assignment
|
|
1229
1231
|
info_record.starts,
|
|
1230
1232
|
compressed_counts,
|
|
1231
1233
|
info_record.nonempty_indices,
|
pyopencl/bitonic_sort.py
CHANGED
|
@@ -393,6 +393,8 @@ def has_struct_arg_count_bug(dev, ctx=None):
|
|
|
393
393
|
return False
|
|
394
394
|
|
|
395
395
|
|
|
396
|
+
# {{{ SVM capabilities
|
|
397
|
+
|
|
396
398
|
def _may_have_svm(dev):
|
|
397
399
|
has_svm = (dev.platform._get_cl_version() >= (2, 0)
|
|
398
400
|
and cl.get_cl_header_version() >= (2, 0))
|
|
@@ -431,3 +433,24 @@ def has_fine_grain_buffer_svm_atomics(dev):
|
|
|
431
433
|
def has_fine_grain_system_svm_atomics(dev):
|
|
432
434
|
return has_fine_grain_system_svm(dev) and bool(dev.svm_capabilities
|
|
433
435
|
& cl.device_svm_capabilities.ATOMICS)
|
|
436
|
+
|
|
437
|
+
# }}}
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def has_src_build_cache(dev: cl.Device) -> Optional[bool]:
|
|
441
|
+
"""
|
|
442
|
+
Return *True* if *dev* has internal support for caching builds from source,
|
|
443
|
+
*False* if it doesn't, and *None* if unknown.
|
|
444
|
+
"""
|
|
445
|
+
if dev.platform.name == "Portable Computing Language":
|
|
446
|
+
return True
|
|
447
|
+
|
|
448
|
+
if nv_compute_capability(dev) is not None:
|
|
449
|
+
return True
|
|
450
|
+
|
|
451
|
+
if dev.platform.name == "AMD Accelerated Parallel Processing":
|
|
452
|
+
return False
|
|
453
|
+
|
|
454
|
+
return None
|
|
455
|
+
|
|
456
|
+
# vim: foldmethod=marker
|
pyopencl/compyte/.git
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
gitdir: ../../.git/modules/pyopencl/compyte
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
all: pygpu_ndarray.so
|
|
2
|
+
|
|
3
|
+
PYTHONVERSION ?= $(shell python -c "import sys; print '%d.%d'%(sys.version_info[0], sys.version_info[1]"))
|
|
4
|
+
CUDA_ROOT ?= /opt/lisa/os/cuda
|
|
5
|
+
THEANO_ROOT ?= /u/bastienf/repos/Theano
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
CFLAGS=-g -DDEBUG -DOFFSET
|
|
9
|
+
# By default enable the OFFSET usage. Otherwise some test fail.
|
|
10
|
+
CFLAGS=-g -DOFFSET
|
|
11
|
+
#BINDIR=--compiler-bindir ${HOME}/.theano.nvcc-bindir
|
|
12
|
+
|
|
13
|
+
#NPY_PATH!=python -c "import numpy;print numpy.__path__"
|
|
14
|
+
#NPY_INCLUDE=-I${NPY_PATH}/core/include
|
|
15
|
+
CUDA_INCLUDE=-I${CUDA_ROOT}/include
|
|
16
|
+
PYTHON_INCLUDE=-I$(shell python -c "import distutils.sysconfig;print distutils.sysconfig.get_python_inc()")
|
|
17
|
+
INCLUDES=${CUDA_INCLUDE} ${PYTHON_INCLUDE}
|
|
18
|
+
CUDA_FLAGS=-Xlinker -rpath,${CUDA_ROOT}/lib64 -Xlinker -rpath,${CUDA_ROOT}/lib
|
|
19
|
+
|
|
20
|
+
pygpu_language_cuda.o: pygpu_language_cuda.cu pygpu_language.h
|
|
21
|
+
nvcc -c ${CFLAGS} -m64 -Xcompiler -fPIC,-m64 ${CUDA_FLAGS} ${INCLUDES} ${BINDIR} -o $@ $<
|
|
22
|
+
|
|
23
|
+
pygpu_ndarray.so: pygpu_ndarray.cpp pygpu_ndarray.h pygpu_language_cuda.o pygpu_ndarray_object.h
|
|
24
|
+
nvcc -shared ${CFLAGS} -m64 -Xcompiler -fPIC,-m64 ${CUDA_FLAGS} ${INCLUDES} ${BINDIR} -o $@ pygpu_language_cuda.o $< -lpython${PYTHONVERSION} -lcublas -lcudart
|
|
25
|
+
|
|
26
|
+
clean:
|
|
27
|
+
rm -f pygpu_ndarray.so core.* *.o *~
|
|
28
|
+
rm -rf build
|
|
29
|
+
|
|
30
|
+
cleantmp:
|
|
31
|
+
rm -f core.* *.o *~
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#ifndef _GPU_NDARRAY_H
|
|
2
|
+
#define _GPU_NDARRAY_H
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
typedef struct GpuNdArray{
|
|
6
|
+
char* data; //pointer to data element [0,..,0].
|
|
7
|
+
int offset;
|
|
8
|
+
int nd; //the number of dimensions of the tensor
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* base:
|
|
12
|
+
* either NULL or a pointer to a fellow CudaNdarray into which this one is viewing.
|
|
13
|
+
* This pointer is never followed, except during Py_DECREF when we do not need it any longer.
|
|
14
|
+
*/
|
|
15
|
+
void * base;
|
|
16
|
+
ssize_t * dimensions; //dim0, dim1, ... dim nd
|
|
17
|
+
ssize_t * strides; //stride0, stride1, ... stride nd
|
|
18
|
+
int flags; // Flags, see numpy flags
|
|
19
|
+
//DTYPE dtype; // fine for numeric types
|
|
20
|
+
//DtypeMeta * dtype_meta; // reserved for future use.
|
|
21
|
+
//PyArray_Descr *descr; /* Pointer to type structure */
|
|
22
|
+
} GpuNdArray;
|
|
23
|
+
|
|
24
|
+
#endif
|
|
25
|
+
/*
|
|
26
|
+
Local Variables:
|
|
27
|
+
mode:c++
|
|
28
|
+
c-basic-offset:4
|
|
29
|
+
c-file-style:"stroustrup"
|
|
30
|
+
c-file-offsets:((innamespace . 0)(inline-open . 0))
|
|
31
|
+
indent-tabs-mode:nil
|
|
32
|
+
fill-column:79
|
|
33
|
+
End:
|
|
34
|
+
*/
|
|
35
|
+
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:textwidth=79 :
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This file contain the header for ALL code that depend on cuda or opencl.
|
|
3
|
+
*/
|
|
4
|
+
#ifndef _PYGPU_LANGUAGE_H
|
|
5
|
+
#define _PYGPU_LANGUAGE_H
|
|
6
|
+
#include <Python.h>
|
|
7
|
+
//#include <iostream>
|
|
8
|
+
|
|
9
|
+
#include "pygpu_ndarray_object.h"
|
|
10
|
+
|
|
11
|
+
/////////////////////////
|
|
12
|
+
// Alloc and Free
|
|
13
|
+
/////////////////////////
|
|
14
|
+
//If true, when there is a gpu malloc or free error, we print the size of allocated memory on the device.
|
|
15
|
+
#define COMPUTE_GPU_MEM_USED 0
|
|
16
|
+
#define VERBOSE_ALLOC_FREE 0
|
|
17
|
+
//If true, we fill with NAN allocated device memory.
|
|
18
|
+
#define ALLOC_MEMSET 0
|
|
19
|
+
|
|
20
|
+
static int _outstanding_mallocs[] = {0,0};
|
|
21
|
+
|
|
22
|
+
#ifdef DEBUG
|
|
23
|
+
#define DPRINTF(args...) fprintf(stderr, args)
|
|
24
|
+
#else
|
|
25
|
+
#define DPRINTF(...)
|
|
26
|
+
#endif
|
|
27
|
+
|
|
28
|
+
#if COMPUTE_GPU_MEM_USED
|
|
29
|
+
int _allocated_size = 0;
|
|
30
|
+
const int TABLE_SIZE = 10000;
|
|
31
|
+
struct table_struct{
|
|
32
|
+
void* ptr;
|
|
33
|
+
int size;
|
|
34
|
+
};
|
|
35
|
+
table_struct _alloc_size_table[TABLE_SIZE];
|
|
36
|
+
#endif
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Allocation and freeing of device memory should go through these functions so that the lib can track memory usage.
|
|
40
|
+
*
|
|
41
|
+
* device_malloc will set the Python error message before returning None.
|
|
42
|
+
* device_free will return nonzero on failure (after setting the python error message)
|
|
43
|
+
*/
|
|
44
|
+
void * device_malloc(size_t size);
|
|
45
|
+
int device_free(void * ptr);
|
|
46
|
+
static PyObject *
|
|
47
|
+
outstanding_mallocs(PyObject* self, PyObject * args)
|
|
48
|
+
{
|
|
49
|
+
return PyInt_FromLong(_outstanding_mallocs[0]);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
int PyGpuNdArray_CopyFromPyGpuNdArray(PyGpuNdArrayObject * self, PyGpuNdArrayObject * other, bool unbroadcast = false);
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* PyGpuNdArray_alloc_contiguous
|
|
56
|
+
*
|
|
57
|
+
* Allocate storage space for a tensor of rank 'nd' and given dimensions.
|
|
58
|
+
*
|
|
59
|
+
* Note: PyGpuNdArray_alloc_contiguous is templated to work for both int dimensions and npy_intp dimensions
|
|
60
|
+
*/
|
|
61
|
+
template<typename inttype>
|
|
62
|
+
int PyGpuNdArray_alloc_contiguous(PyGpuNdArrayObject *self, const int nd, const inttype * dim, NPY_ORDER order=NPY_CORDER)
|
|
63
|
+
{
|
|
64
|
+
DPRINTF("PyGpuNdArray_alloc_contiguous: start nd=%i descr=%p\n", nd, self);
|
|
65
|
+
|
|
66
|
+
if (!PyGpuNdArray_DESCR(self)){
|
|
67
|
+
PyErr_SetString(PyExc_ValueError,
|
|
68
|
+
"PyGpuNdArray_alloc_contiguous: The array don't have a type! We can't allocate it!\n");
|
|
69
|
+
return -1;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// allocate an empty ndarray with c_contiguous access
|
|
73
|
+
// return 0 on success
|
|
74
|
+
int size = 1; //set up the strides for contiguous tensor
|
|
75
|
+
assert (nd >= 0);
|
|
76
|
+
if (PyGpuNdArray_set_nd(self, nd))
|
|
77
|
+
{
|
|
78
|
+
return -1;
|
|
79
|
+
}
|
|
80
|
+
//TODO: check if by any chance our current dims are correct,
|
|
81
|
+
// and strides already contiguous
|
|
82
|
+
// in that case we can return right here.
|
|
83
|
+
DPRINTF("PyGpuNdArray_alloc_contiguous: before itemsize descr=%p elsize=%i\n", self->descr, self->descr->elsize);
|
|
84
|
+
int elsize = PyGpuNdArray_ITEMSIZE((PyObject*)self);
|
|
85
|
+
DPRINTF("PyGpuNdArray_alloc_contiguous: set_nd %d! elsize=%i\n", nd, elsize);
|
|
86
|
+
if(order != NPY_FORTRANORDER){
|
|
87
|
+
DPRINTF("PyGpuNdArray_alloc_contiguous: NPY_CORDER\n");
|
|
88
|
+
for (int i = nd-1; i >= 0; --i){
|
|
89
|
+
if (size == 0)
|
|
90
|
+
PyGpuNdArray_STRIDE(self, i) = elsize;
|
|
91
|
+
else
|
|
92
|
+
PyGpuNdArray_STRIDE(self,i) = size * elsize;
|
|
93
|
+
PyGpuNdArray_DIM(self,i) = dim[i];
|
|
94
|
+
size = size * dim[i];
|
|
95
|
+
}
|
|
96
|
+
}else if (nd>0){
|
|
97
|
+
DPRINTF("PyGpuNdArray_alloc_contiguous: NPY_FORTRANORDER\n");
|
|
98
|
+
size = dim[0];
|
|
99
|
+
PyGpuNdArray_STRIDE(self, 0) = elsize;
|
|
100
|
+
PyGpuNdArray_DIM(self, nd-1) = dim[nd-1];
|
|
101
|
+
for (int i = 1; i < nd; ++i){
|
|
102
|
+
if (size == 0)
|
|
103
|
+
PyGpuNdArray_STRIDE(self, i) = elsize;
|
|
104
|
+
else
|
|
105
|
+
PyGpuNdArray_STRIDE(self, i) = PyGpuNdArray_STRIDE(self, i-1) * dim[i-1];
|
|
106
|
+
PyGpuNdArray_DIM(self, nd-i-1) = dim[nd-i-1];
|
|
107
|
+
size = size * dim[i];
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (self->data_allocated != size)
|
|
112
|
+
{
|
|
113
|
+
// If self is a view, do not try to free its memory
|
|
114
|
+
if (self->data_allocated && device_free(PyGpuNdArray_DATA(self))) {
|
|
115
|
+
// Does this ever happen?? Do we need to set data_allocated or devdata to 0?
|
|
116
|
+
PyGpuNdArray_DATA(self) = NULL;
|
|
117
|
+
self->data_allocated = 0;
|
|
118
|
+
return -1;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
assert(size>0);
|
|
122
|
+
DPRINTF("PyGpuNdArray_alloc_contiguous: will allocate for size=%d elements\n", size);
|
|
123
|
+
|
|
124
|
+
PyGpuNdArray_DATA(self) = (char*)device_malloc(size * PyGpuNdArray_ITEMSIZE((PyObject *)self));
|
|
125
|
+
if (!PyGpuNdArray_DATA(self))
|
|
126
|
+
{
|
|
127
|
+
PyGpuNdArray_set_nd(self,-1);
|
|
128
|
+
self->data_allocated = 0;
|
|
129
|
+
PyGpuNdArray_DATA(self) = 0;
|
|
130
|
+
return -1;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// The structure of self will be reused with newly allocated memory.
|
|
134
|
+
// If self was a view, we should remove the reference to its base.
|
|
135
|
+
// (If base was already NULL, the following has no effect.)
|
|
136
|
+
Py_XDECREF(self->base);
|
|
137
|
+
self->base = NULL;
|
|
138
|
+
|
|
139
|
+
self->data_allocated = size;
|
|
140
|
+
self->gpu_ndarray.flags = NPY_DEFAULT;
|
|
141
|
+
PyGpuNdArray_FLAGS(self) |= NPY_WRITEABLE;
|
|
142
|
+
PyGpuNdArray_FLAGS(self) |= NPY_OWNDATA;
|
|
143
|
+
if (nd == 0) {
|
|
144
|
+
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
145
|
+
if (order != NPY_FORTRANORDER) {
|
|
146
|
+
PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
|
|
147
|
+
} else {
|
|
148
|
+
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
}else if(nd == 1){//set c and f contiguous
|
|
152
|
+
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
153
|
+
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
154
|
+
}else if(order != NPY_FORTRANORDER){//set c contiguous
|
|
155
|
+
PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
|
|
156
|
+
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
157
|
+
}else{//set f contiguous
|
|
158
|
+
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
159
|
+
PyGpuNdArray_FLAGS(self) &= ~NPY_C_CONTIGUOUS;
|
|
160
|
+
}
|
|
161
|
+
PyGpuNdArray_FLAGS(self) &= ~NPY_UPDATEIFCOPY;
|
|
162
|
+
}else if(size == 0){
|
|
163
|
+
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
164
|
+
PyGpuNdArray_FLAGS(self) |= NPY_OWNDATA;
|
|
165
|
+
if (nd == 0) {
|
|
166
|
+
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
167
|
+
if (order != NPY_FORTRANORDER) {
|
|
168
|
+
PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
|
|
169
|
+
} else {
|
|
170
|
+
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
}else if(nd == 1){//set c and f contiguous
|
|
174
|
+
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
175
|
+
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
176
|
+
}else if(order != NPY_FORTRANORDER){//set c contiguous
|
|
177
|
+
PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
|
|
178
|
+
PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
|
|
179
|
+
}else{//set f contiguous
|
|
180
|
+
PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
|
|
181
|
+
PyGpuNdArray_FLAGS(self) &= ~NPY_C_CONTIGUOUS;
|
|
182
|
+
}
|
|
183
|
+
PyGpuNdArray_FLAGS(self) &= ~NPY_UPDATEIFCOPY;
|
|
184
|
+
return 0;
|
|
185
|
+
}else{
|
|
186
|
+
// How to check for the flags? Need to check if already contiguous.
|
|
187
|
+
PyErr_Format(PyExc_RuntimeError,
|
|
188
|
+
"PyGpuNdArray_alloc_contiguous: self->data_allocated=%d, size=%d, cmp=%d",
|
|
189
|
+
self->data_allocated, size, self->data_allocated != size
|
|
190
|
+
);
|
|
191
|
+
return -1;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
if (order != NPY_FORTRANORDER) {
|
|
195
|
+
assert(PyGpuNdArray_is_c_contiguous(self));
|
|
196
|
+
} else {
|
|
197
|
+
assert(PyGpuNdArray_is_f_contiguous(self));
|
|
198
|
+
}
|
|
199
|
+
DPRINTF("PyGpuNdArray_alloc_contiguous: end\n");
|
|
200
|
+
return 0;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
enum PyGpuTransfert { PyGpuHostToDevice, PyGpuDeviceToHost };
|
|
204
|
+
int PyGpuMemcpy(void * dst, const void * src, int dev_offset, size_t bytes, PyGpuTransfert direction);
|
|
205
|
+
|
|
206
|
+
int PyGpuMemset(void * dst, int data, size_t bytes);
|
|
207
|
+
#endif
|