pyopencl 2024.2.2__cp39-cp39-win_amd64.whl → 2024.2.4__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyopencl might be problematic. Click here for more details.

Files changed (102) hide show
  1. pyopencl/__init__.py +16 -4
  2. pyopencl/_cl.cp39-win_amd64.pyd +0 -0
  3. pyopencl/algorithm.py +3 -1
  4. pyopencl/bitonic_sort.py +2 -0
  5. pyopencl/characterize/__init__.py +23 -0
  6. pyopencl/compyte/.git +1 -0
  7. pyopencl/compyte/.github/workflows/autopush.yml +21 -0
  8. pyopencl/compyte/.github/workflows/ci.yml +30 -0
  9. pyopencl/compyte/.gitignore +21 -0
  10. pyopencl/compyte/ndarray/Makefile +31 -0
  11. pyopencl/compyte/ndarray/gpu_ndarray.h +35 -0
  12. pyopencl/compyte/ndarray/pygpu_language.h +207 -0
  13. pyopencl/compyte/ndarray/pygpu_language_cuda.cu +622 -0
  14. pyopencl/compyte/ndarray/pygpu_language_opencl.cpp +317 -0
  15. pyopencl/compyte/ndarray/pygpu_ndarray.cpp +1546 -0
  16. pyopencl/compyte/ndarray/pygpu_ndarray.h +71 -0
  17. pyopencl/compyte/ndarray/pygpu_ndarray_object.h +232 -0
  18. pyopencl/compyte/setup.cfg +9 -0
  19. pyopencl/tools.py +60 -56
  20. pyopencl/version.py +7 -3
  21. {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.4.dist-info}/METADATA +105 -105
  22. pyopencl-2024.2.4.dist-info/RECORD +59 -0
  23. {pyopencl-2024.2.2.dist-info → pyopencl-2024.2.4.dist-info}/WHEEL +1 -1
  24. pyopencl-2024.2.2.data/data/CITATION.cff +0 -74
  25. pyopencl-2024.2.2.data/data/CMakeLists.txt +0 -83
  26. pyopencl-2024.2.2.data/data/Makefile.in +0 -21
  27. pyopencl-2024.2.2.data/data/README.rst +0 -70
  28. pyopencl-2024.2.2.data/data/README_SETUP.txt +0 -34
  29. pyopencl-2024.2.2.data/data/aksetup_helper.py +0 -1013
  30. pyopencl-2024.2.2.data/data/configure.py +0 -6
  31. pyopencl-2024.2.2.data/data/contrib/cldis.py +0 -91
  32. pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/README +0 -29
  33. pyopencl-2024.2.2.data/data/contrib/fortran-to-opencl/translate.py +0 -1441
  34. pyopencl-2024.2.2.data/data/contrib/pyopencl.vim +0 -84
  35. pyopencl-2024.2.2.data/data/doc/Makefile +0 -23
  36. pyopencl-2024.2.2.data/data/doc/algorithm.rst +0 -214
  37. pyopencl-2024.2.2.data/data/doc/array.rst +0 -305
  38. pyopencl-2024.2.2.data/data/doc/conf.py +0 -26
  39. pyopencl-2024.2.2.data/data/doc/howto.rst +0 -105
  40. pyopencl-2024.2.2.data/data/doc/index.rst +0 -137
  41. pyopencl-2024.2.2.data/data/doc/make_constants.py +0 -561
  42. pyopencl-2024.2.2.data/data/doc/misc.rst +0 -885
  43. pyopencl-2024.2.2.data/data/doc/runtime.rst +0 -51
  44. pyopencl-2024.2.2.data/data/doc/runtime_const.rst +0 -30
  45. pyopencl-2024.2.2.data/data/doc/runtime_gl.rst +0 -78
  46. pyopencl-2024.2.2.data/data/doc/runtime_memory.rst +0 -527
  47. pyopencl-2024.2.2.data/data/doc/runtime_platform.rst +0 -184
  48. pyopencl-2024.2.2.data/data/doc/runtime_program.rst +0 -364
  49. pyopencl-2024.2.2.data/data/doc/runtime_queue.rst +0 -182
  50. pyopencl-2024.2.2.data/data/doc/subst.rst +0 -36
  51. pyopencl-2024.2.2.data/data/doc/tools.rst +0 -4
  52. pyopencl-2024.2.2.data/data/doc/types.rst +0 -42
  53. pyopencl-2024.2.2.data/data/examples/black-hole-accretion.py +0 -2227
  54. pyopencl-2024.2.2.data/data/examples/demo-struct-reduce.py +0 -75
  55. pyopencl-2024.2.2.data/data/examples/demo.py +0 -39
  56. pyopencl-2024.2.2.data/data/examples/demo_array.py +0 -32
  57. pyopencl-2024.2.2.data/data/examples/demo_array_svm.py +0 -37
  58. pyopencl-2024.2.2.data/data/examples/demo_elementwise.py +0 -34
  59. pyopencl-2024.2.2.data/data/examples/demo_elementwise_complex.py +0 -53
  60. pyopencl-2024.2.2.data/data/examples/demo_mandelbrot.py +0 -183
  61. pyopencl-2024.2.2.data/data/examples/demo_meta_codepy.py +0 -56
  62. pyopencl-2024.2.2.data/data/examples/demo_meta_template.py +0 -55
  63. pyopencl-2024.2.2.data/data/examples/dump-performance.py +0 -38
  64. pyopencl-2024.2.2.data/data/examples/dump-properties.py +0 -86
  65. pyopencl-2024.2.2.data/data/examples/gl_interop_demo.py +0 -84
  66. pyopencl-2024.2.2.data/data/examples/gl_particle_animation.py +0 -218
  67. pyopencl-2024.2.2.data/data/examples/ipython-demo.ipynb +0 -203
  68. pyopencl-2024.2.2.data/data/examples/median-filter.py +0 -99
  69. pyopencl-2024.2.2.data/data/examples/n-body.py +0 -1070
  70. pyopencl-2024.2.2.data/data/examples/narray.py +0 -37
  71. pyopencl-2024.2.2.data/data/examples/noisyImage.jpg +0 -0
  72. pyopencl-2024.2.2.data/data/examples/pi-monte-carlo.py +0 -1166
  73. pyopencl-2024.2.2.data/data/examples/svm.py +0 -82
  74. pyopencl-2024.2.2.data/data/examples/transpose.py +0 -229
  75. pyopencl-2024.2.2.data/data/pytest.ini +0 -3
  76. pyopencl-2024.2.2.data/data/src/bitlog.cpp +0 -51
  77. pyopencl-2024.2.2.data/data/src/bitlog.hpp +0 -83
  78. pyopencl-2024.2.2.data/data/src/clinfo_ext.h +0 -134
  79. pyopencl-2024.2.2.data/data/src/mempool.hpp +0 -444
  80. pyopencl-2024.2.2.data/data/src/pyopencl_ext.h +0 -77
  81. pyopencl-2024.2.2.data/data/src/tools.hpp +0 -90
  82. pyopencl-2024.2.2.data/data/src/wrap_cl.cpp +0 -61
  83. pyopencl-2024.2.2.data/data/src/wrap_cl.hpp +0 -5853
  84. pyopencl-2024.2.2.data/data/src/wrap_cl_part_1.cpp +0 -369
  85. pyopencl-2024.2.2.data/data/src/wrap_cl_part_2.cpp +0 -702
  86. pyopencl-2024.2.2.data/data/src/wrap_constants.cpp +0 -1274
  87. pyopencl-2024.2.2.data/data/src/wrap_helpers.hpp +0 -213
  88. pyopencl-2024.2.2.data/data/src/wrap_mempool.cpp +0 -738
  89. pyopencl-2024.2.2.data/data/test/add-vectors-32.spv +0 -0
  90. pyopencl-2024.2.2.data/data/test/add-vectors-64.spv +0 -0
  91. pyopencl-2024.2.2.data/data/test/empty-header.h +0 -1
  92. pyopencl-2024.2.2.data/data/test/test_algorithm.py +0 -1180
  93. pyopencl-2024.2.2.data/data/test/test_array.py +0 -2392
  94. pyopencl-2024.2.2.data/data/test/test_arrays_in_structs.py +0 -100
  95. pyopencl-2024.2.2.data/data/test/test_clmath.py +0 -529
  96. pyopencl-2024.2.2.data/data/test/test_clrandom.py +0 -75
  97. pyopencl-2024.2.2.data/data/test/test_enqueue_copy.py +0 -271
  98. pyopencl-2024.2.2.data/data/test/test_wrapper.py +0 -1565
  99. pyopencl-2024.2.2.dist-info/LICENSE +0 -282
  100. pyopencl-2024.2.2.dist-info/RECORD +0 -123
  101. pyopencl-2024.2.2.dist-info/top_level.txt +0 -1
  102. {pyopencl-2024.2.2.data/data → pyopencl-2024.2.4.dist-info/licenses}/LICENSE +0 -0
pyopencl/__init__.py CHANGED
@@ -30,6 +30,8 @@ import pyopencl.cltypes # noqa: F401
30
30
  from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa: F401
31
31
 
32
32
 
33
+ __version__ = VERSION_TEXT
34
+
33
35
  logger = logging.getLogger(__name__)
34
36
 
35
37
  # This supports ocl-icd find shipped OpenCL ICDs, cf.
@@ -491,8 +493,16 @@ class Program:
491
493
  cache_dir = getattr(self._context, "cache_dir", None)
492
494
 
493
495
  build_descr = None
494
- if _PYOPENCL_NO_CACHE and self._prg is None:
495
- build_descr = "uncached source build (cache disabled by user)"
496
+ from pyopencl.characterize import has_src_build_cache
497
+
498
+ if (
499
+ (_PYOPENCL_NO_CACHE or has_src_build_cache(self._context.devices[0]))
500
+ and self._prg is None):
501
+ if _PYOPENCL_NO_CACHE:
502
+ build_descr = "uncached source build (cache disabled by user)"
503
+ else:
504
+ build_descr = "uncached source build (assuming cached by ICD)"
505
+
496
506
  self._prg = _cl._Program(self._context, self._source)
497
507
 
498
508
  from time import time
@@ -977,7 +987,8 @@ def _add_functionality():
977
987
  else:
978
988
  raise ValueError("images cannot have more than three dimensions")
979
989
 
980
- desc = ImageDescriptor()
990
+ desc = ImageDescriptor() \
991
+ # pylint: disable=possibly-used-before-assignment
981
992
 
982
993
  desc.image_type = image_type
983
994
  desc.shape = shape # also sets desc.array_size
@@ -1352,7 +1363,8 @@ def _add_functionality():
1352
1363
  svm_old_init = SVM.__init__
1353
1364
 
1354
1365
  def svm_init(self, mem):
1355
- svm_old_init(self, mem)
1366
+ if get_cl_header_version() >= (2, 0):
1367
+ svm_old_init(self, mem)
1356
1368
 
1357
1369
  self.mem = mem
1358
1370
 
Binary file
pyopencl/algorithm.py CHANGED
@@ -1225,7 +1225,9 @@ class ListOfListsBuilder:
1225
1225
  info_record.compressed_indices = cl.array.empty(
1226
1226
  queue, (n_objects + 1,), index_dtype, allocator=allocator)
1227
1227
  info_record.compressed_indices[0] = 0
1228
- compress_events[name] = compress_kernel(
1228
+
1229
+ compress_events[name] = compress_kernel( \
1230
+ # pylint: disable=possibly-used-before-assignment
1229
1231
  info_record.starts,
1230
1232
  compressed_counts,
1231
1233
  info_record.nonempty_indices,
pyopencl/bitonic_sort.py CHANGED
@@ -225,6 +225,8 @@ class BitonicSort:
225
225
  elif inc >= 0:
226
226
  letter = "B2"
227
227
  ninc = 1
228
+ else:
229
+ raise AssertionError("Should not happen")
228
230
 
229
231
  nthreads = size >> ninc
230
232
 
@@ -393,6 +393,8 @@ def has_struct_arg_count_bug(dev, ctx=None):
393
393
  return False
394
394
 
395
395
 
396
+ # {{{ SVM capabilities
397
+
396
398
  def _may_have_svm(dev):
397
399
  has_svm = (dev.platform._get_cl_version() >= (2, 0)
398
400
  and cl.get_cl_header_version() >= (2, 0))
@@ -431,3 +433,24 @@ def has_fine_grain_buffer_svm_atomics(dev):
431
433
  def has_fine_grain_system_svm_atomics(dev):
432
434
  return has_fine_grain_system_svm(dev) and bool(dev.svm_capabilities
433
435
  & cl.device_svm_capabilities.ATOMICS)
436
+
437
+ # }}}
438
+
439
+
440
+ def has_src_build_cache(dev: cl.Device) -> Optional[bool]:
441
+ """
442
+ Return *True* if *dev* has internal support for caching builds from source,
443
+ *False* if it doesn't, and *None* if unknown.
444
+ """
445
+ if dev.platform.name == "Portable Computing Language":
446
+ return True
447
+
448
+ if nv_compute_capability(dev) is not None:
449
+ return True
450
+
451
+ if dev.platform.name == "AMD Accelerated Parallel Processing":
452
+ return False
453
+
454
+ return None
455
+
456
+ # vim: foldmethod=marker
pyopencl/compyte/.git ADDED
@@ -0,0 +1 @@
1
+ gitdir: ../../.git/modules/pyopencl/compyte
@@ -0,0 +1,21 @@
1
+ name: Gitlab mirror
2
+ on:
3
+ push:
4
+ branches:
5
+ - main
6
+
7
+ jobs:
8
+ autopush:
9
+ name: Automatic push to gitlab.tiker.net
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v3
13
+ - run: |
14
+ mkdir ~/.ssh && echo -e "Host gitlab.tiker.net\n\tStrictHostKeyChecking no\n" >> ~/.ssh/config
15
+ eval $(ssh-agent) && echo "$GITLAB_AUTOPUSH_KEY" | ssh-add -
16
+ git fetch --unshallow
17
+ git push "git@gitlab.tiker.net:inducer/$(basename $GITHUB_REPOSITORY).git" main
18
+ env:
19
+ GITLAB_AUTOPUSH_KEY: ${{ secrets.GITLAB_AUTOPUSH_KEY }}
20
+
21
+ # vim: sw=4
@@ -0,0 +1,30 @@
1
+ name: CI
2
+ on:
3
+ push:
4
+ branches:
5
+ - main
6
+ pull_request:
7
+ schedule:
8
+ - cron: '17 3 * * 0'
9
+
10
+ jobs:
11
+ flake8:
12
+ name: Flake8
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v3
16
+ -
17
+ uses: actions/setup-python@v4
18
+ with:
19
+ # matches compat target in setup.py
20
+ python-version: '3.8'
21
+ - name: "Main Script"
22
+ run: |
23
+ curl -L -O https://tiker.net/ci-support-v0
24
+ . ./ci-support-v0
25
+ print_status_message
26
+ clean_up_repo_and_working_env
27
+ create_and_set_up_virtualenv
28
+ cd ..
29
+ cp compyte/setup.cfg .
30
+ install_and_run_flake8 compyte/*.py
@@ -0,0 +1,21 @@
1
+ build
2
+ .*.sw[po]
3
+ .sw[po]
4
+ *~
5
+ *.pyc
6
+ *.pyo
7
+ *.egg-info
8
+ MANIFEST
9
+ dist
10
+ setuptools*egg
11
+ setuptools.pth
12
+ distribute*egg
13
+ distribute*tar.gz
14
+ *.so
15
+ *.o
16
+ *.aux
17
+ *.bbl
18
+ *.blg
19
+ *.log
20
+
21
+ .cache
@@ -0,0 +1,31 @@
1
+ all: pygpu_ndarray.so
2
+
3
+ PYTHONVERSION ?= $(shell python -c "import sys; print '%d.%d'%(sys.version_info[0], sys.version_info[1]"))
4
+ CUDA_ROOT ?= /opt/lisa/os/cuda
5
+ THEANO_ROOT ?= /u/bastienf/repos/Theano
6
+
7
+
8
+ CFLAGS=-g -DDEBUG -DOFFSET
9
+ # By default enable the OFFSET usage. Otherwise some test fail.
10
+ CFLAGS=-g -DOFFSET
11
+ #BINDIR=--compiler-bindir ${HOME}/.theano.nvcc-bindir
12
+
13
+ #NPY_PATH!=python -c "import numpy;print numpy.__path__"
14
+ #NPY_INCLUDE=-I${NPY_PATH}/core/include
15
+ CUDA_INCLUDE=-I${CUDA_ROOT}/include
16
+ PYTHON_INCLUDE=-I$(shell python -c "import distutils.sysconfig;print distutils.sysconfig.get_python_inc()")
17
+ INCLUDES=${CUDA_INCLUDE} ${PYTHON_INCLUDE}
18
+ CUDA_FLAGS=-Xlinker -rpath,${CUDA_ROOT}/lib64 -Xlinker -rpath,${CUDA_ROOT}/lib
19
+
20
+ pygpu_language_cuda.o: pygpu_language_cuda.cu pygpu_language.h
21
+ nvcc -c ${CFLAGS} -m64 -Xcompiler -fPIC,-m64 ${CUDA_FLAGS} ${INCLUDES} ${BINDIR} -o $@ $<
22
+
23
+ pygpu_ndarray.so: pygpu_ndarray.cpp pygpu_ndarray.h pygpu_language_cuda.o pygpu_ndarray_object.h
24
+ nvcc -shared ${CFLAGS} -m64 -Xcompiler -fPIC,-m64 ${CUDA_FLAGS} ${INCLUDES} ${BINDIR} -o $@ pygpu_language_cuda.o $< -lpython${PYTHONVERSION} -lcublas -lcudart
25
+
26
+ clean:
27
+ rm -f pygpu_ndarray.so core.* *.o *~
28
+ rm -rf build
29
+
30
+ cleantmp:
31
+ rm -f core.* *.o *~
@@ -0,0 +1,35 @@
1
+ #ifndef _GPU_NDARRAY_H
2
+ #define _GPU_NDARRAY_H
3
+
4
+
5
+ typedef struct GpuNdArray{
6
+ char* data; //pointer to data element [0,..,0].
7
+ int offset;
8
+ int nd; //the number of dimensions of the tensor
9
+
10
+ /**
11
+ * base:
12
+ * either NULL or a pointer to a fellow CudaNdarray into which this one is viewing.
13
+ * This pointer is never followed, except during Py_DECREF when we do not need it any longer.
14
+ */
15
+ void * base;
16
+ ssize_t * dimensions; //dim0, dim1, ... dim nd
17
+ ssize_t * strides; //stride0, stride1, ... stride nd
18
+ int flags; // Flags, see numpy flags
19
+ //DTYPE dtype; // fine for numeric types
20
+ //DtypeMeta * dtype_meta; // reserved for future use.
21
+ //PyArray_Descr *descr; /* Pointer to type structure */
22
+ } GpuNdArray;
23
+
24
+ #endif
25
+ /*
26
+ Local Variables:
27
+ mode:c++
28
+ c-basic-offset:4
29
+ c-file-style:"stroustrup"
30
+ c-file-offsets:((innamespace . 0)(inline-open . 0))
31
+ indent-tabs-mode:nil
32
+ fill-column:79
33
+ End:
34
+ */
35
+ // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:textwidth=79 :
@@ -0,0 +1,207 @@
1
+ /**
2
+ * This file contain the header for ALL code that depend on cuda or opencl.
3
+ */
4
+ #ifndef _PYGPU_LANGUAGE_H
5
+ #define _PYGPU_LANGUAGE_H
6
+ #include <Python.h>
7
+ //#include <iostream>
8
+
9
+ #include "pygpu_ndarray_object.h"
10
+
11
+ /////////////////////////
12
+ // Alloc and Free
13
+ /////////////////////////
14
+ //If true, when there is a gpu malloc or free error, we print the size of allocated memory on the device.
15
+ #define COMPUTE_GPU_MEM_USED 0
16
+ #define VERBOSE_ALLOC_FREE 0
17
+ //If true, we fill with NAN allocated device memory.
18
+ #define ALLOC_MEMSET 0
19
+
20
+ static int _outstanding_mallocs[] = {0,0};
21
+
22
+ #ifdef DEBUG
23
+ #define DPRINTF(args...) fprintf(stderr, args)
24
+ #else
25
+ #define DPRINTF(...)
26
+ #endif
27
+
28
+ #if COMPUTE_GPU_MEM_USED
29
+ int _allocated_size = 0;
30
+ const int TABLE_SIZE = 10000;
31
+ struct table_struct{
32
+ void* ptr;
33
+ int size;
34
+ };
35
+ table_struct _alloc_size_table[TABLE_SIZE];
36
+ #endif
37
+
38
+ /**
39
+ * Allocation and freeing of device memory should go through these functions so that the lib can track memory usage.
40
+ *
41
+ * device_malloc will set the Python error message before returning None.
42
+ * device_free will return nonzero on failure (after setting the python error message)
43
+ */
44
+ void * device_malloc(size_t size);
45
+ int device_free(void * ptr);
46
+ static PyObject *
47
+ outstanding_mallocs(PyObject* self, PyObject * args)
48
+ {
49
+ return PyInt_FromLong(_outstanding_mallocs[0]);
50
+ }
51
+
52
+ int PyGpuNdArray_CopyFromPyGpuNdArray(PyGpuNdArrayObject * self, PyGpuNdArrayObject * other, bool unbroadcast = false);
53
+
54
+ /**
55
+ * PyGpuNdArray_alloc_contiguous
56
+ *
57
+ * Allocate storage space for a tensor of rank 'nd' and given dimensions.
58
+ *
59
+ * Note: PyGpuNdArray_alloc_contiguous is templated to work for both int dimensions and npy_intp dimensions
60
+ */
61
+ template<typename inttype>
62
+ int PyGpuNdArray_alloc_contiguous(PyGpuNdArrayObject *self, const int nd, const inttype * dim, NPY_ORDER order=NPY_CORDER)
63
+ {
64
+ DPRINTF("PyGpuNdArray_alloc_contiguous: start nd=%i descr=%p\n", nd, self);
65
+
66
+ if (!PyGpuNdArray_DESCR(self)){
67
+ PyErr_SetString(PyExc_ValueError,
68
+ "PyGpuNdArray_alloc_contiguous: The array don't have a type! We can't allocate it!\n");
69
+ return -1;
70
+ }
71
+
72
+ // allocate an empty ndarray with c_contiguous access
73
+ // return 0 on success
74
+ int size = 1; //set up the strides for contiguous tensor
75
+ assert (nd >= 0);
76
+ if (PyGpuNdArray_set_nd(self, nd))
77
+ {
78
+ return -1;
79
+ }
80
+ //TODO: check if by any chance our current dims are correct,
81
+ // and strides already contiguous
82
+ // in that case we can return right here.
83
+ DPRINTF("PyGpuNdArray_alloc_contiguous: before itemsize descr=%p elsize=%i\n", self->descr, self->descr->elsize);
84
+ int elsize = PyGpuNdArray_ITEMSIZE((PyObject*)self);
85
+ DPRINTF("PyGpuNdArray_alloc_contiguous: set_nd %d! elsize=%i\n", nd, elsize);
86
+ if(order != NPY_FORTRANORDER){
87
+ DPRINTF("PyGpuNdArray_alloc_contiguous: NPY_CORDER\n");
88
+ for (int i = nd-1; i >= 0; --i){
89
+ if (size == 0)
90
+ PyGpuNdArray_STRIDE(self, i) = elsize;
91
+ else
92
+ PyGpuNdArray_STRIDE(self,i) = size * elsize;
93
+ PyGpuNdArray_DIM(self,i) = dim[i];
94
+ size = size * dim[i];
95
+ }
96
+ }else if (nd>0){
97
+ DPRINTF("PyGpuNdArray_alloc_contiguous: NPY_FORTRANORDER\n");
98
+ size = dim[0];
99
+ PyGpuNdArray_STRIDE(self, 0) = elsize;
100
+ PyGpuNdArray_DIM(self, nd-1) = dim[nd-1];
101
+ for (int i = 1; i < nd; ++i){
102
+ if (size == 0)
103
+ PyGpuNdArray_STRIDE(self, i) = elsize;
104
+ else
105
+ PyGpuNdArray_STRIDE(self, i) = PyGpuNdArray_STRIDE(self, i-1) * dim[i-1];
106
+ PyGpuNdArray_DIM(self, nd-i-1) = dim[nd-i-1];
107
+ size = size * dim[i];
108
+ }
109
+ }
110
+
111
+ if (self->data_allocated != size)
112
+ {
113
+ // If self is a view, do not try to free its memory
114
+ if (self->data_allocated && device_free(PyGpuNdArray_DATA(self))) {
115
+ // Does this ever happen?? Do we need to set data_allocated or devdata to 0?
116
+ PyGpuNdArray_DATA(self) = NULL;
117
+ self->data_allocated = 0;
118
+ return -1;
119
+ }
120
+
121
+ assert(size>0);
122
+ DPRINTF("PyGpuNdArray_alloc_contiguous: will allocate for size=%d elements\n", size);
123
+
124
+ PyGpuNdArray_DATA(self) = (char*)device_malloc(size * PyGpuNdArray_ITEMSIZE((PyObject *)self));
125
+ if (!PyGpuNdArray_DATA(self))
126
+ {
127
+ PyGpuNdArray_set_nd(self,-1);
128
+ self->data_allocated = 0;
129
+ PyGpuNdArray_DATA(self) = 0;
130
+ return -1;
131
+ }
132
+
133
+ // The structure of self will be reused with newly allocated memory.
134
+ // If self was a view, we should remove the reference to its base.
135
+ // (If base was already NULL, the following has no effect.)
136
+ Py_XDECREF(self->base);
137
+ self->base = NULL;
138
+
139
+ self->data_allocated = size;
140
+ self->gpu_ndarray.flags = NPY_DEFAULT;
141
+ PyGpuNdArray_FLAGS(self) |= NPY_WRITEABLE;
142
+ PyGpuNdArray_FLAGS(self) |= NPY_OWNDATA;
143
+ if (nd == 0) {
144
+ PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
145
+ if (order != NPY_FORTRANORDER) {
146
+ PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
147
+ } else {
148
+ PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
149
+ }
150
+
151
+ }else if(nd == 1){//set c and f contiguous
152
+ PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
153
+ PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
154
+ }else if(order != NPY_FORTRANORDER){//set c contiguous
155
+ PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
156
+ PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
157
+ }else{//set f contiguous
158
+ PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
159
+ PyGpuNdArray_FLAGS(self) &= ~NPY_C_CONTIGUOUS;
160
+ }
161
+ PyGpuNdArray_FLAGS(self) &= ~NPY_UPDATEIFCOPY;
162
+ }else if(size == 0){
163
+ PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
164
+ PyGpuNdArray_FLAGS(self) |= NPY_OWNDATA;
165
+ if (nd == 0) {
166
+ PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
167
+ if (order != NPY_FORTRANORDER) {
168
+ PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
169
+ } else {
170
+ PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
171
+ }
172
+
173
+ }else if(nd == 1){//set c and f contiguous
174
+ PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
175
+ PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
176
+ }else if(order != NPY_FORTRANORDER){//set c contiguous
177
+ PyGpuNdArray_FLAGS(self) &= ~NPY_F_CONTIGUOUS;
178
+ PyGpuNdArray_FLAGS(self) |= NPY_C_CONTIGUOUS;
179
+ }else{//set f contiguous
180
+ PyGpuNdArray_FLAGS(self) |= NPY_F_CONTIGUOUS;
181
+ PyGpuNdArray_FLAGS(self) &= ~NPY_C_CONTIGUOUS;
182
+ }
183
+ PyGpuNdArray_FLAGS(self) &= ~NPY_UPDATEIFCOPY;
184
+ return 0;
185
+ }else{
186
+ // How to check for the flags? Need to check if already contiguous.
187
+ PyErr_Format(PyExc_RuntimeError,
188
+ "PyGpuNdArray_alloc_contiguous: self->data_allocated=%d, size=%d, cmp=%d",
189
+ self->data_allocated, size, self->data_allocated != size
190
+ );
191
+ return -1;
192
+ }
193
+
194
+ if (order != NPY_FORTRANORDER) {
195
+ assert(PyGpuNdArray_is_c_contiguous(self));
196
+ } else {
197
+ assert(PyGpuNdArray_is_f_contiguous(self));
198
+ }
199
+ DPRINTF("PyGpuNdArray_alloc_contiguous: end\n");
200
+ return 0;
201
+ }
202
+
203
+ enum PyGpuTransfert { PyGpuHostToDevice, PyGpuDeviceToHost };
204
+ int PyGpuMemcpy(void * dst, const void * src, int dev_offset, size_t bytes, PyGpuTransfert direction);
205
+
206
+ int PyGpuMemset(void * dst, int data, size_t bytes);
207
+ #endif