PyPI - numba-cuda - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

numba-cuda 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

numba_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.2.0
1	+ 0.3.0

numba_cuda/numba/cuda/dispatcher.py CHANGED Viewed

@@ -37,6 +37,8 @@ cuda_fp16_math_funcs = ['hsin', 'hcos',
                         'hrcp', 'hrint',
                         'htrunc', 'hdiv']
+reshape_funcs = ['nocopy_empty_reshape', 'numba_attempt_nocopy_reshape']
 class _Kernel(serialize.ReduceMixin):
     '''
@@ -117,25 +119,43 @@ class _Kernel(serialize.ReduceMixin):
         if not link:
             link = []
+        asm = lib.get_asm_str()
         # A kernel needs cooperative launch if grid_sync is being used.
-        self.cooperative = 'cudaCGGetIntrinsicHandle' in lib.get_asm_str()
+        self.cooperative = 'cudaCGGetIntrinsicHandle' in asm
         # We need to link against cudadevrt if grid sync is being used.
         if self.cooperative:
             lib.needs_cudadevrt = True
-        basedir = os.path.dirname(os.path.abspath(__file__))
-        asm = lib.get_asm_str()
+        def link_to_library_functions(library_functions, library_path,
+                                      prefix=None):
+            """
+            Dynamically links to library functions by searching for their names
+            in the specified library and linking to the corresponding source
+            file.
+            """
+            if prefix is not None:
+                library_functions = [f"{prefix}{fn}" for fn in
+                                     library_functions]
-        res = [fn for fn in cuda_fp16_math_funcs
-               if (f'__numba_wrapper_{fn}' in asm)]
+            found_functions = [fn for fn in library_functions
+                               if f'{fn}' in asm]
-        if res:
-            # Path to the source containing the foreign function
-            functions_cu_path = os.path.join(basedir,
-                                             'cpp_function_wrappers.cu')
-            link.append(functions_cu_path)
+            if found_functions:
+                basedir = os.path.dirname(os.path.abspath(__file__))
+                source_file_path = os.path.join(basedir, library_path)
+                link.append(source_file_path)
-        link = self.maybe_link_nrt(link, tgt_ctx, asm)
+            return found_functions
+        # Link to the helper library functions if needed
+        link_to_library_functions(reshape_funcs, 'reshape_funcs.cu')
+        # Link to the CUDA FP16 math library functions if needed
+        link_to_library_functions(cuda_fp16_math_funcs,
+                                  'cpp_function_wrappers.cu',
+                                  '__numba_wrapper_')
+        self.maybe_link_nrt(link, tgt_ctx, asm)
         for filepath in link:
             lib.add_linking_file(filepath)
@@ -160,7 +180,7 @@ class _Kernel(serialize.ReduceMixin):
     def maybe_link_nrt(self, link, tgt_ctx, asm):
         if not tgt_ctx.enable_nrt:
-            return link
+            return
         all_nrt = "|".join(self.NRT_functions)
         pattern = (
@@ -175,8 +195,6 @@ class _Kernel(serialize.ReduceMixin):
             nrt_path = os.path.join(basedir, 'runtime', 'nrt.cu')
             link.append(nrt_path)
-        return link
     @property
     def library(self):
         return self._codelibrary

numba_cuda/numba/cuda/reshape_funcs.cu ADDED Viewed

@@ -0,0 +1,151 @@
+/*
+ * Handle reshaping of zero-sized array.
+ * See numba_attempt_nocopy_reshape() below.
+ */
+#define NPY_MAXDIMS 32
+typedef long long int npy_intp;
+extern "C" __device__ int
+nocopy_empty_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides,
+                     npy_intp newnd, const npy_intp *newdims,
+                     npy_intp *newstrides, npy_intp itemsize,
+                     int is_f_order)
+{
+    int i;
+    /* Just make the strides vaguely reasonable
+     * (they can have any value in theory).
+     */
+    for (i = 0; i < newnd; i++)
+        newstrides[i] = itemsize;
+    return 1;  /* reshape successful */
+}
+/*
+ * Straight from Numpy's _attempt_nocopy_reshape()
+ * (np/core/src/multiarray/shape.c).
+ * Attempt to reshape an array without copying data
+ *
+ * This function should correctly handle all reshapes, including
+ * axes of length 1. Zero strides should work but are untested.
+ *
+ * If a copy is needed, returns 0
+ * If no copy is needed, returns 1 and fills `npy_intp *newstrides`
+ *     with appropriate strides
+ */
+extern "C" __device__ int
+numba_attempt_nocopy_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides,
+                             npy_intp newnd, const npy_intp *newdims,
+                             npy_intp *newstrides, npy_intp itemsize,
+                             int is_f_order)
+{
+    int oldnd;
+    npy_intp olddims[NPY_MAXDIMS];
+    npy_intp oldstrides[NPY_MAXDIMS];
+    npy_intp np, op, last_stride;
+    int oi, oj, ok, ni, nj, nk;
+    oldnd = 0;
+    /*
+     * Remove axes with dimension 1 from the old array. They have no effect
+     * but would need special cases since their strides do not matter.
+     */
+    for (oi = 0; oi < nd; oi++) {
+        if (dims[oi]!= 1) {
+            olddims[oldnd] = dims[oi];
+            oldstrides[oldnd] = strides[oi];
+            oldnd++;
+        }
+    }
+    np = 1;
+    for (ni = 0; ni < newnd; ni++) {
+        np *= newdims[ni];
+    }
+    op = 1;
+    for (oi = 0; oi < oldnd; oi++) {
+        op *= olddims[oi];
+    }
+    if (np != op) {
+        /* different total sizes; no hope */
+        return 0;
+    }
+    if (np == 0) {
+        /* the Numpy code does not handle 0-sized arrays */
+        return nocopy_empty_reshape(nd, dims, strides,
+                                    newnd, newdims, newstrides,
+                                    itemsize, is_f_order);
+    }
+    /* oi to oj and ni to nj give the axis ranges currently worked with */
+    oi = 0;
+    oj = 1;
+    ni = 0;
+    nj = 1;
+    while (ni < newnd && oi < oldnd) {
+        np = newdims[ni];
+        op = olddims[oi];
+        while (np != op) {
+            if (np < op) {
+                /* Misses trailing 1s, these are handled later */
+                np *= newdims[nj++];
+            } else {
+                op *= olddims[oj++];
+            }
+        }
+        /* Check whether the original axes can be combined */
+        for (ok = oi; ok < oj - 1; ok++) {
+            if (is_f_order) {
+                if (oldstrides[ok+1] != olddims[ok]*oldstrides[ok]) {
+                     /* not contiguous enough */
+                    return 0;
+                }
+            }
+            else {
+                /* C order */
+                if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]) {
+                    /* not contiguous enough */
+                    return 0;
+                }
+            }
+        }
+        /* Calculate new strides for all axes currently worked with */
+        if (is_f_order) {
+            newstrides[ni] = oldstrides[oi];
+            for (nk = ni + 1; nk < nj; nk++) {
+                newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1];
+            }
+        }
+        else {
+            /* C order */
+            newstrides[nj - 1] = oldstrides[oj - 1];
+            for (nk = nj - 1; nk > ni; nk--) {
+                newstrides[nk - 1] = newstrides[nk]*newdims[nk];
+            }
+        }
+        ni = nj++;
+        oi = oj++;
+    }
+    /*
+     * Set strides corresponding to trailing 1s of the new shape.
+     */
+    if (ni >= 1) {
+        last_stride = newstrides[ni - 1];
+    }
+    else {
+        last_stride = itemsize;
+    }
+    if (is_f_order) {
+        last_stride *= newdims[ni - 1];
+    }
+    for (nk = ni; nk < newnd; nk++) {
+        newstrides[nk] = last_stride;
+    }
+    return 1;
+}

numba_cuda/numba/cuda/tests/cudapy/test_array.py CHANGED Viewed

@@ -12,6 +12,31 @@ else:
                             cuda.pinned_array_like)
+def array_reshape1d(arr, newshape, got):
+    y = arr.reshape(newshape)
+    for i in range(y.shape[0]):
+        got[i] = y[i]
+def array_reshape2d(arr, newshape, got):
+    y = arr.reshape(newshape)
+    for i in range(y.shape[0]):
+        for j in range(y.shape[1]):
+            got[i, j] = y[i, j]
+def array_reshape3d(arr, newshape, got):
+    y = arr.reshape(newshape)
+    for i in range(y.shape[0]):
+        for j in range(y.shape[1]):
+            for k in range(y.shape[2]):
+                got[i, j, k] = y[i, j, k]
+def array_reshape(arr, newshape):
+    return arr.reshape(newshape)
 class TestCudaArray(CUDATestCase):
     def test_gpu_array_zero_length(self):
         x = np.arange(0)
@@ -255,6 +280,54 @@ class TestCudaArray(CUDATestCase):
         self.assertEqual(1, len(func.overloads))
+    def test_array_reshape(self):
+        def check(pyfunc, kernelfunc, arr, shape):
+            kernel = cuda.jit(kernelfunc)
+            expected = pyfunc(arr, shape)
+            got = np.zeros(expected.shape, dtype=arr.dtype)
+            kernel[1, 1](arr, shape, got)
+            self.assertPreciseEqual(got, expected)
+        def check_only_shape(kernelfunc, arr, shape, expected_shape):
+            kernel = cuda.jit(kernelfunc)
+            got = np.zeros(expected_shape, dtype=arr.dtype)
+            kernel[1, 1](arr, shape, got)
+            self.assertEqual(got.shape, expected_shape)
+            self.assertEqual(got.size, arr.size)
+        # 0-sized arrays
+        def check_empty(arr):
+            check(array_reshape, array_reshape1d, arr, 0)
+            check(array_reshape, array_reshape1d, arr, (0,))
+            check(array_reshape, array_reshape3d, arr, (1, 0, 2))
+            check_only_shape(array_reshape2d, arr, (0, -1), (0, 0))
+            check_only_shape(array_reshape2d, arr, (4, -1), (4, 0))
+            check_only_shape(array_reshape3d, arr, (-1, 0, 4), (0, 0, 4))
+        # C-contiguous
+        arr = np.arange(24)
+        check(array_reshape, array_reshape1d, arr, (24,))
+        check(array_reshape, array_reshape2d, arr, (4, 6))
+        check(array_reshape, array_reshape2d, arr, (8, 3))
+        check(array_reshape, array_reshape3d, arr, (8, 1, 3))
+        arr = np.arange(24).reshape((1, 8, 1, 1, 3, 1))
+        check(array_reshape, array_reshape1d, arr, (24,))
+        check(array_reshape, array_reshape2d, arr, (4, 6))
+        check(array_reshape, array_reshape2d, arr, (8, 3))
+        check(array_reshape, array_reshape3d, arr, (8, 1, 3))
+        # Test negative shape value
+        arr = np.arange(25).reshape(5,5)
+        check(array_reshape, array_reshape1d, arr, -1)
+        check(array_reshape, array_reshape1d, arr, (-1,))
+        check(array_reshape, array_reshape2d, arr, (-1, 5))
+        check(array_reshape, array_reshape3d, arr, (5, -1, 5))
+        check(array_reshape, array_reshape3d, arr, (5, 5, -1))
+        arr = np.array([])
+        check_empty(arr)
 if __name__ == '__main__':
     unittest.main()

{numba_cuda-0.2.0.dist-info → numba_cuda-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: numba-cuda
-Version: 0.2.0
+Version: 0.3.0
 Summary: CUDA target for Numba
 Author: Anaconda Inc., NVIDIA Corporation
 License: BSD 2-clause

{numba_cuda-0.2.0.dist-info → numba_cuda-0.3.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
 _numba_cuda_redirector.py,sha256=QKJmYICSQvjvph0Zw9OW015MsuKxIF28GPFjR35AXLM,2681
-numba_cuda/VERSION,sha256=H5MN0fEzwfl6lP46y42zQ3LPTAH_2ys_9Mpy-UlBIek,6
+numba_cuda/VERSION,sha256=2RXMldbKj0euKXcT7UbU5cXZnd0p_Dxh4mO98wXytbA,6
 numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
 numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
 numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
@@ -21,7 +21,7 @@ numba_cuda/numba/cuda/decorators.py,sha256=qSpir16-jPYSe2YuRZ6g9INeobmsMNg6ab9IZ
 numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
 numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
 numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
-numba_cuda/numba/cuda/dispatcher.py,sha256=nDfPCzxJ7UwA4uiz-fsMMgQb2WXByvzHLtkLMXW9JXk,41244
+numba_cuda/numba/cuda/dispatcher.py,sha256=Q8WN7jTAX3xy_D2sEgSeFHAivqavI2PRlfDjR7ysing,42073
 numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
 numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
 numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
@@ -36,6 +36,7 @@ numba_cuda/numba/cuda/models.py,sha256=2c_seT-cWX-VyWYmcapaqOEl1M4FX6_kdIOusj4s5
 numba_cuda/numba/cuda/nvvmutils.py,sha256=W1zr1TpnmFjTkHF0qeu5wnBHub6gzrnpzsvgmu2OLcU,8295
 numba_cuda/numba/cuda/printimpl.py,sha256=Y1BCQ7EgO2wQ7O6LibNVYBG3tmjVTvmURATW403rLao,3504
 numba_cuda/numba/cuda/random.py,sha256=khX8iDdde_RTUPWhAqrxZacHRQAorFr7BokPuxRWzrg,10456
+numba_cuda/numba/cuda/reshape_funcs.cu,sha256=H5UAa-VAvoxW9SQwJO88ZrDXC64nWALW3Ch4cHAAqO4,4325
 numba_cuda/numba/cuda/simulator_init.py,sha256=W_bPRtmPGOQVuiprbgt7ENnnnELv_LPCeLDIsfsvFZ8,460
 numba_cuda/numba/cuda/stubs.py,sha256=W3tozv4ganMnfbdFqyPjgQXYeX8GQhwx_xXgv8jk6iM,22270
 numba_cuda/numba/cuda/target.py,sha256=hBflzmxCGlmTugWT1sYhZj9f4HkQAMK2RQ9lO85pMW4,17052
@@ -119,7 +120,7 @@ numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py,sha256=l-tW4F935zxOvKb
 numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx,sha256=8D6OWUO4GnjUTqyzQc_epd7pT8fPy0_bJdkmu6Bbm4Q,521
 numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py,sha256=7Wz7i_6VVq5EeZuqkcg1dVfW9DbfC1rp44H7pe4voqI,1781
 numba_cuda/numba/cuda/tests/cudapy/test_alignment.py,sha256=dik8i4fG6MPlxVilW4l9pM5o_vBMAsRGItldeE9hvvU,1218
-numba_cuda/numba/cuda/tests/cudapy/test_array.py,sha256=bS6rzvp6BKVLFyW8mFRbVoZbxIbc2WCl5SzQ6XG0s8c,10515
+numba_cuda/numba/cuda/tests/cudapy/test_array.py,sha256=ty1s2yiX7dump54lOQsBykRJQxzi78wrka5GbQrB1Qo,13216
 numba_cuda/numba/cuda/tests/cudapy/test_array_args.py,sha256=XTX4cT7BZexmw0BZPzeudf4OZgM6GNqzjDPyIxJyTdk,4979
 numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py,sha256=shdeSAOKaoZbrvC8hXhETWH8FhyZPTmHg_TMw2DcdUA,969
 numba_cuda/numba/cuda/tests/cudapy/test_atomics.py,sha256=yQWTHQH7WPafLwNhnfOWqAskybXTw1BBwvxL5OLqEAk,58177
@@ -236,8 +237,8 @@ numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=P2WzCc5d64JGq
 numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=V0raLZLGSiWbE_K-JluI0CnmNkXbhlMVj-TH7P1OV8E,5014
 numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
 numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
-numba_cuda-0.2.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
-numba_cuda-0.2.0.dist-info/METADATA,sha256=u3e2Hm6iPkdyyDwsvGJ7B3RecpE7X3zA2SHrX-z7Kc4,1496
-numba_cuda-0.2.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-numba_cuda-0.2.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
-numba_cuda-0.2.0.dist-info/RECORD,,
+numba_cuda-0.3.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
+numba_cuda-0.3.0.dist-info/METADATA,sha256=rbDC27qfmpgf9Qw5_p5YiSRyqc9hd_W2rAsA-geDRKk,1496
+numba_cuda-0.3.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+numba_cuda-0.3.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
+numba_cuda-0.3.0.dist-info/RECORD,,

{numba_cuda-0.2.0.dist-info → numba_cuda-0.3.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{numba_cuda-0.2.0.dist-info → numba_cuda-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{numba_cuda-0.2.0.dist-info → numba_cuda-0.3.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

numba-cuda 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

numba-cuda 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl