numba-cuda 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +1 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
- numba_cuda/numba/cuda/api.py +6 -1
- numba_cuda/numba/cuda/bf16.py +285 -2
- numba_cuda/numba/cuda/cgutils.py +2 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +1 -1
- numba_cuda/numba/cuda/compiler.py +373 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +2 -2
- numba_cuda/numba/cuda/core/compiler.py +6 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +48 -26
- numba_cuda/numba/cuda/core/ir_utils.py +15 -26
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +738 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +422 -246
- numba_cuda/numba/cuda/cudadecl.py +1 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
- numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
- numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
- numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +5 -1
- numba_cuda/numba/cuda/debuginfo.py +85 -2
- numba_cuda/numba/cuda/decorators.py +3 -3
- numba_cuda/numba/cuda/descriptor.py +3 -4
- numba_cuda/numba/cuda/deviceufunc.py +66 -2
- numba_cuda/numba/cuda/dispatcher.py +18 -39
- numba_cuda/numba/cuda/flags.py +141 -1
- numba_cuda/numba/cuda/fp16.py +0 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/lowering.py +7 -144
- numba_cuda/numba/cuda/mathimpl.py +2 -1
- numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +9 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +1 -1
- numba_cuda/numba/cuda/printimpl.py +12 -1
- numba_cuda/numba/cuda/random.py +1 -1
- numba_cuda/numba/cuda/serialize.py +1 -1
- numba_cuda/numba/cuda/simulator/__init__.py +1 -1
- numba_cuda/numba/cuda/simulator/api.py +1 -1
- numba_cuda/numba/cuda/simulator/compiler.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
- numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
- numba_cuda/numba/cuda/target.py +35 -17
- numba_cuda/numba/cuda/testing.py +4 -19
- numba_cuda/numba/cuda/tests/__init__.py +1 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
- numba_cuda/numba/cuda/tests/support.py +55 -15
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +56 -0
- numba_cuda/numba/cuda/typing/__init__.py +9 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +7 -6
- numba_cuda/numba/cuda/ufuncs.py +3 -3
- numba_cuda/numba/cuda/utils.py +6 -112
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +2 -1
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/RECORD +170 -115
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -2,63 +2,34 @@
|
|
|
2
2
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
3
|
|
|
4
4
|
from numba.cuda.testing import CUDATestCase
|
|
5
|
-
import
|
|
6
|
-
from numba.
|
|
7
|
-
from numba.core.
|
|
8
|
-
from numba.core.compiler_machinery import PassManager
|
|
5
|
+
from numba.cuda.core.compiler import CompilerBase
|
|
6
|
+
from numba.cuda.flags import Flags
|
|
7
|
+
from numba.cuda.core.compiler_machinery import PassManager
|
|
9
8
|
from numba.cuda.core import ir_utils
|
|
10
|
-
from numba.core import types, ir, bytecode,
|
|
11
|
-
from numba.
|
|
9
|
+
from numba.core import types, ir, bytecode, registry
|
|
10
|
+
from numba.cuda import compiler
|
|
11
|
+
from numba.cuda.core.untyped_passes import (
|
|
12
12
|
ExtractByteCode,
|
|
13
13
|
TranslateByteCode,
|
|
14
14
|
FixupArgs,
|
|
15
15
|
IRProcessing,
|
|
16
16
|
)
|
|
17
|
-
from numba.
|
|
18
|
-
from numba.core.typed_passes import (
|
|
17
|
+
from numba.cuda.core.typed_passes import (
|
|
19
18
|
NopythonTypeInference,
|
|
20
|
-
type_inference_stage,
|
|
21
19
|
DeadCodeElimination,
|
|
22
20
|
)
|
|
21
|
+
from numba.cuda.testing import skip_on_cudasim
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
# global constant for testing find_const
|
|
26
25
|
GLOBAL_B = 11
|
|
27
26
|
|
|
28
27
|
|
|
29
|
-
@jitclass([("val", numba.core.types.List(numba.intp))])
|
|
30
|
-
class Dummy(object):
|
|
31
|
-
def __init__(self, val):
|
|
32
|
-
self.val = val
|
|
33
|
-
|
|
34
|
-
|
|
35
28
|
class TestIrUtils(CUDATestCase):
|
|
36
29
|
"""
|
|
37
30
|
Tests ir handling utility functions like find_callname.
|
|
38
31
|
"""
|
|
39
32
|
|
|
40
|
-
def test_obj_func_match(self):
|
|
41
|
-
"""Test matching of an object method (other than Array see #3449)"""
|
|
42
|
-
|
|
43
|
-
def test_func():
|
|
44
|
-
d = Dummy([1])
|
|
45
|
-
d.val.append(2)
|
|
46
|
-
|
|
47
|
-
test_ir = compiler.run_frontend(test_func)
|
|
48
|
-
typingctx = cpu_target.typing_context
|
|
49
|
-
targetctx = cpu_target.target_context
|
|
50
|
-
typing_res = type_inference_stage(
|
|
51
|
-
typingctx, targetctx, test_ir, (), None
|
|
52
|
-
)
|
|
53
|
-
matched_call = ir_utils.find_callname(
|
|
54
|
-
test_ir, test_ir.blocks[0].body[7].value, typing_res.typemap
|
|
55
|
-
)
|
|
56
|
-
self.assertTrue(
|
|
57
|
-
isinstance(matched_call, tuple)
|
|
58
|
-
and len(matched_call) == 2
|
|
59
|
-
and matched_call[0] == "append"
|
|
60
|
-
)
|
|
61
|
-
|
|
62
33
|
def test_dead_code_elimination(self):
|
|
63
34
|
class Tester(CompilerBase):
|
|
64
35
|
@classmethod
|
|
@@ -177,6 +148,7 @@ class TestIrUtils(CUDATestCase):
|
|
|
177
148
|
len(no_dce.blocks[0].body) - len(removed), len(w_dce.blocks[0].body)
|
|
178
149
|
)
|
|
179
150
|
|
|
151
|
+
@skip_on_cudasim("Skipping ir utils tests on CUDA simulator")
|
|
180
152
|
def test_find_const_global(self):
|
|
181
153
|
"""
|
|
182
154
|
Test find_const() for values in globals (ir.Global) and freevars
|
|
@@ -203,6 +175,7 @@ class TestIrUtils(CUDATestCase):
|
|
|
203
175
|
self.assertEqual(const_b, GLOBAL_B)
|
|
204
176
|
self.assertEqual(const_c, FREEVAR_C)
|
|
205
177
|
|
|
178
|
+
@skip_on_cudasim("Skipping ir utils tests on CUDA simulator")
|
|
206
179
|
def test_flatten_labels(self):
|
|
207
180
|
"""tests flatten_labels"""
|
|
208
181
|
|
|
@@ -5,7 +5,7 @@ import numpy as np
|
|
|
5
5
|
|
|
6
6
|
from numba import cuda, float32, void
|
|
7
7
|
from numba.cuda.testing import unittest, CUDATestCase
|
|
8
|
-
from numba.core import config
|
|
8
|
+
from numba.cuda.core import config
|
|
9
9
|
|
|
10
10
|
# Ensure the test takes a reasonable amount of time in the simulator
|
|
11
11
|
if config.ENABLE_CUDASIM:
|
|
@@ -15,7 +15,7 @@ from numba.core.types import f2, b1
|
|
|
15
15
|
from numba.cuda.typing import signature
|
|
16
16
|
import operator
|
|
17
17
|
import itertools
|
|
18
|
-
from numba.np.numpy_support import from_dtype
|
|
18
|
+
from numba.cuda.np.numpy_support import from_dtype
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def simple_fp16_div_scalar(ary, a, b):
|
|
@@ -102,6 +102,21 @@ print_too_many[1, 1](np.arange(33))
|
|
|
102
102
|
cuda.synchronize()
|
|
103
103
|
"""
|
|
104
104
|
|
|
105
|
+
print_bfloat16_usecase = """\
|
|
106
|
+
from numba import cuda
|
|
107
|
+
from numba.cuda import config
|
|
108
|
+
|
|
109
|
+
@cuda.jit
|
|
110
|
+
def print_bfloat16():
|
|
111
|
+
# 0.9375 is a dyadic rational, it's integer significand can expand within 7 digits.
|
|
112
|
+
# printing this should not give any rounding error.
|
|
113
|
+
a = cuda.types.bfloat16(0.9375)
|
|
114
|
+
print(a, a, a)
|
|
115
|
+
|
|
116
|
+
print_bfloat16[1, 1]()
|
|
117
|
+
cuda.synchronize()
|
|
118
|
+
"""
|
|
119
|
+
|
|
105
120
|
|
|
106
121
|
class TestPrint(CUDATestCase):
|
|
107
122
|
# Note that in these tests we generally strip the output to avoid dealing
|
|
@@ -148,6 +163,11 @@ class TestPrint(CUDATestCase):
|
|
|
148
163
|
expected = [str(i) for i in np.ndindex(2, 2, 2)]
|
|
149
164
|
self.assertEqual(sorted(lines), expected)
|
|
150
165
|
|
|
166
|
+
@skip_on_cudasim("bfloat16 on host is not yet supported.")
|
|
167
|
+
def test_bfloat16(self):
|
|
168
|
+
output, _ = self.run_code(print_bfloat16_usecase)
|
|
169
|
+
self.assertEqual(output.strip(), "0.937500 0.937500 0.937500")
|
|
170
|
+
|
|
151
171
|
@skip_on_cudasim("cudasim can print unlimited output")
|
|
152
172
|
def test_too_many_args(self):
|
|
153
173
|
# Tests that we emit the format string and warn when there are more
|
|
@@ -7,7 +7,7 @@ from numba import cuda, vectorize
|
|
|
7
7
|
from numba.core import types
|
|
8
8
|
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
|
9
9
|
import unittest
|
|
10
|
-
from numba.np import numpy_support
|
|
10
|
+
from numba.cuda.np import numpy_support
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
@skip_on_cudasim("pickling not supported in CUDASIM")
|
|
@@ -7,7 +7,7 @@ from numba.core import types
|
|
|
7
7
|
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
|
-
from numba.np import numpy_support as nps
|
|
10
|
+
from numba.cuda.np import numpy_support as nps
|
|
11
11
|
|
|
12
12
|
from .extensions_usecases import test_struct_model_type, TestStruct
|
|
13
13
|
|
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
"""
|
|
4
|
+
Tests for SSA reconstruction
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import copy
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from numba import types, cuda
|
|
14
|
+
from numba.cuda import jit
|
|
15
|
+
from numba.core import errors
|
|
16
|
+
|
|
17
|
+
from numba.extending import overload
|
|
18
|
+
from numba.cuda.tests.support import override_config
|
|
19
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
_DEBUG = False
|
|
23
|
+
|
|
24
|
+
if _DEBUG:
|
|
25
|
+
# Enable debug logger on SSA reconstruction
|
|
26
|
+
ssa_logger = logging.getLogger("numba.cuda.core.ssa")
|
|
27
|
+
ssa_logger.setLevel(level=logging.DEBUG)
|
|
28
|
+
ssa_logger.addHandler(logging.StreamHandler(sys.stderr))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SSABaseTest(CUDATestCase):
|
|
32
|
+
"""
|
|
33
|
+
This class comes from numba tests, but has been modified to work with CUDA kernels.
|
|
34
|
+
Return values were replaced by output arrays, and tuple returns assign to elements of the output array.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def check_func(self, func, result_array, *args):
|
|
38
|
+
# For CUDA kernels, we need to create output arrays and call with [1,1] launch config
|
|
39
|
+
# Create GPU array with same shape as expected result array
|
|
40
|
+
gpu_result_array = cuda.to_device(np.zeros_like(result_array))
|
|
41
|
+
|
|
42
|
+
# Call the CUDA kernel
|
|
43
|
+
func[1, 1](gpu_result_array, *copy.deepcopy(args))
|
|
44
|
+
gpu_result = gpu_result_array.copy_to_host()
|
|
45
|
+
|
|
46
|
+
# Call the original Python function for expected result
|
|
47
|
+
cpu_result = np.zeros_like(result_array)
|
|
48
|
+
func.py_func(cpu_result, *copy.deepcopy(args))
|
|
49
|
+
|
|
50
|
+
# Compare all results
|
|
51
|
+
np.testing.assert_array_equal(gpu_result, cpu_result)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class TestSSA(SSABaseTest):
|
|
55
|
+
"""
|
|
56
|
+
Contains tests to help isolate problems in SSA
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def test_argument_name_reused(self):
|
|
60
|
+
@jit
|
|
61
|
+
def foo(result, x):
|
|
62
|
+
x += 1
|
|
63
|
+
result[0] = x
|
|
64
|
+
|
|
65
|
+
self.check_func(foo, np.array([124.0]), 123)
|
|
66
|
+
|
|
67
|
+
def test_if_else_redefine(self):
|
|
68
|
+
@jit
|
|
69
|
+
def foo(result, x, y):
|
|
70
|
+
z = x * y
|
|
71
|
+
if x < y:
|
|
72
|
+
z = x
|
|
73
|
+
else:
|
|
74
|
+
z = y
|
|
75
|
+
result[0] = z
|
|
76
|
+
|
|
77
|
+
self.check_func(foo, np.array([2.0]), 3, 2)
|
|
78
|
+
self.check_func(foo, np.array([2.0]), 2, 3)
|
|
79
|
+
|
|
80
|
+
def test_sum_loop(self):
|
|
81
|
+
@jit
|
|
82
|
+
def foo(result, n):
|
|
83
|
+
c = 0
|
|
84
|
+
for i in range(n):
|
|
85
|
+
c += i
|
|
86
|
+
result[0] = c
|
|
87
|
+
|
|
88
|
+
self.check_func(foo, np.array([0.0]), 0)
|
|
89
|
+
self.check_func(foo, np.array([45.0]), 10)
|
|
90
|
+
|
|
91
|
+
def test_sum_loop_2vars(self):
|
|
92
|
+
@jit
|
|
93
|
+
def foo(result, n):
|
|
94
|
+
c = 0
|
|
95
|
+
d = n
|
|
96
|
+
for i in range(n):
|
|
97
|
+
c += i
|
|
98
|
+
d += n
|
|
99
|
+
result[0] = c
|
|
100
|
+
result[1] = d
|
|
101
|
+
|
|
102
|
+
self.check_func(foo, np.array([0.0, 0.0]), 0)
|
|
103
|
+
self.check_func(foo, np.array([45.0, 110.0]), 10)
|
|
104
|
+
|
|
105
|
+
def test_sum_2d_loop(self):
|
|
106
|
+
@jit
|
|
107
|
+
def foo(result, n):
|
|
108
|
+
c = 0
|
|
109
|
+
for i in range(n):
|
|
110
|
+
for j in range(n):
|
|
111
|
+
c += j
|
|
112
|
+
c += i
|
|
113
|
+
result[0] = c
|
|
114
|
+
|
|
115
|
+
self.check_func(foo, np.array([0.0]), 0)
|
|
116
|
+
self.check_func(foo, np.array([495.0]), 10)
|
|
117
|
+
|
|
118
|
+
def check_undefined_var(self, should_warn):
|
|
119
|
+
@jit
|
|
120
|
+
def foo(result, n):
|
|
121
|
+
if n:
|
|
122
|
+
if n > 0:
|
|
123
|
+
c = 0
|
|
124
|
+
result[0] = c
|
|
125
|
+
else:
|
|
126
|
+
# variable c is not defined in this branch
|
|
127
|
+
c += 1
|
|
128
|
+
result[0] = c
|
|
129
|
+
|
|
130
|
+
if should_warn:
|
|
131
|
+
with self.assertWarns(errors.NumbaWarning) as warns:
|
|
132
|
+
# n=1 so we won't actually run the branch with the uninitialized
|
|
133
|
+
self.check_func(foo, np.array([0]), 1)
|
|
134
|
+
self.assertIn(
|
|
135
|
+
"Detected uninitialized variable c", str(warns.warning)
|
|
136
|
+
)
|
|
137
|
+
else:
|
|
138
|
+
self.check_func(foo, np.array([0]), 1)
|
|
139
|
+
|
|
140
|
+
with self.assertRaises(UnboundLocalError):
|
|
141
|
+
result = np.array([0])
|
|
142
|
+
foo.py_func(result, 0)
|
|
143
|
+
|
|
144
|
+
@skip_on_cudasim(
|
|
145
|
+
"Numba variable warnings are not supported in the simulator"
|
|
146
|
+
)
|
|
147
|
+
def test_undefined_var(self):
|
|
148
|
+
with override_config("ALWAYS_WARN_UNINIT_VAR", 0):
|
|
149
|
+
self.check_undefined_var(should_warn=False)
|
|
150
|
+
with override_config("ALWAYS_WARN_UNINIT_VAR", 1):
|
|
151
|
+
self.check_undefined_var(should_warn=True)
|
|
152
|
+
|
|
153
|
+
def test_phi_propagation(self):
|
|
154
|
+
@jit
|
|
155
|
+
def foo(result, actions):
|
|
156
|
+
n = 1
|
|
157
|
+
|
|
158
|
+
i = 0
|
|
159
|
+
ct = 0
|
|
160
|
+
while n > 0 and i < len(actions):
|
|
161
|
+
n -= 1
|
|
162
|
+
|
|
163
|
+
while actions[i]:
|
|
164
|
+
if actions[i]:
|
|
165
|
+
if actions[i]:
|
|
166
|
+
n += 10
|
|
167
|
+
actions[i] -= 1
|
|
168
|
+
else:
|
|
169
|
+
if actions[i]:
|
|
170
|
+
n += 20
|
|
171
|
+
actions[i] += 1
|
|
172
|
+
|
|
173
|
+
ct += n
|
|
174
|
+
ct += n
|
|
175
|
+
result[0] = ct
|
|
176
|
+
result[1] = n
|
|
177
|
+
|
|
178
|
+
self.check_func(foo, np.array([1, 2]), np.array([1, 2]))
|
|
179
|
+
|
|
180
|
+
def test_unhandled_undefined(self):
|
|
181
|
+
@cuda.jit
|
|
182
|
+
def function1(arg1, arg2, arg3, arg4, arg5):
|
|
183
|
+
# This function is auto-generated.
|
|
184
|
+
if arg1:
|
|
185
|
+
var1 = arg2
|
|
186
|
+
var2 = arg3
|
|
187
|
+
var3 = var2
|
|
188
|
+
var4 = arg1
|
|
189
|
+
return
|
|
190
|
+
else:
|
|
191
|
+
if arg2:
|
|
192
|
+
if arg4:
|
|
193
|
+
var5 = arg4 # noqa: F841
|
|
194
|
+
return
|
|
195
|
+
else:
|
|
196
|
+
var6 = var4
|
|
197
|
+
return
|
|
198
|
+
return var6
|
|
199
|
+
else:
|
|
200
|
+
if arg5:
|
|
201
|
+
if var1:
|
|
202
|
+
if arg5:
|
|
203
|
+
var1 = var6
|
|
204
|
+
return
|
|
205
|
+
else:
|
|
206
|
+
var7 = arg2 # noqa: F841
|
|
207
|
+
return arg2
|
|
208
|
+
return
|
|
209
|
+
else:
|
|
210
|
+
if var2:
|
|
211
|
+
arg5 = arg2
|
|
212
|
+
return arg1
|
|
213
|
+
else:
|
|
214
|
+
var6 = var3
|
|
215
|
+
return var4
|
|
216
|
+
return
|
|
217
|
+
return
|
|
218
|
+
else:
|
|
219
|
+
var8 = var1
|
|
220
|
+
return
|
|
221
|
+
return var8
|
|
222
|
+
var9 = var3 # noqa: F841
|
|
223
|
+
var10 = arg5 # noqa: F841
|
|
224
|
+
return var1
|
|
225
|
+
|
|
226
|
+
NONE_SENTINEL = 99
|
|
227
|
+
|
|
228
|
+
@cuda.jit
|
|
229
|
+
def function1_caller(result, arg1, arg2, arg3, arg4, arg5):
|
|
230
|
+
retval = function1(arg1, arg2, arg3, arg4, arg5)
|
|
231
|
+
if retval is None:
|
|
232
|
+
result[0] = NONE_SENTINEL
|
|
233
|
+
else:
|
|
234
|
+
result[0] = retval
|
|
235
|
+
|
|
236
|
+
# The argument values is not critical for re-creating the bug
|
|
237
|
+
# because the bug is in compile-time.
|
|
238
|
+
|
|
239
|
+
expect = function1.py_func(2, 3, 6, 0, 7)
|
|
240
|
+
if expect is None:
|
|
241
|
+
expect = NONE_SENTINEL
|
|
242
|
+
result = np.zeros(1, dtype=np.int64)
|
|
243
|
+
function1_caller[1, 1](result, 2, 3, 6, 0, 7)
|
|
244
|
+
got = result[0]
|
|
245
|
+
self.assertEqual(expect, got)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
class TestReportedSSAIssues(SSABaseTest):
|
|
249
|
+
# Tests from issues
|
|
250
|
+
# https://github.com/numba/numba/issues?q=is%3Aopen+is%3Aissue+label%3ASSA
|
|
251
|
+
|
|
252
|
+
def test_issue2194(self):
|
|
253
|
+
@jit
|
|
254
|
+
def foo(result, V):
|
|
255
|
+
s = np.uint32(1)
|
|
256
|
+
|
|
257
|
+
for i in range(s):
|
|
258
|
+
V[i] = 1
|
|
259
|
+
for i in range(s, 1):
|
|
260
|
+
pass
|
|
261
|
+
result[0] = V[0]
|
|
262
|
+
|
|
263
|
+
V = np.empty(1)
|
|
264
|
+
self.check_func(foo, np.array([1.0]), V)
|
|
265
|
+
|
|
266
|
+
def test_issue3094(self):
|
|
267
|
+
@jit
|
|
268
|
+
def foo(result, pred):
|
|
269
|
+
if pred:
|
|
270
|
+
x = 1
|
|
271
|
+
else:
|
|
272
|
+
x = 0
|
|
273
|
+
result[0] = x
|
|
274
|
+
|
|
275
|
+
self.check_func(foo, np.array([0]), False)
|
|
276
|
+
|
|
277
|
+
def test_issue3931(self):
|
|
278
|
+
@jit
|
|
279
|
+
def foo(result, arr):
|
|
280
|
+
for i in range(1):
|
|
281
|
+
arr = arr.reshape(3 * 2)
|
|
282
|
+
arr = arr.reshape(3, 2)
|
|
283
|
+
# Copy result array elements
|
|
284
|
+
for i in range(arr.shape[0]):
|
|
285
|
+
for j in range(arr.shape[1]):
|
|
286
|
+
result[i, j] = arr[i, j]
|
|
287
|
+
|
|
288
|
+
result_gpu = np.zeros((3, 2))
|
|
289
|
+
self.check_func(foo, result_gpu, np.zeros((3, 2)))
|
|
290
|
+
|
|
291
|
+
def test_issue3976(self):
|
|
292
|
+
def overload_this(a):
|
|
293
|
+
return 42
|
|
294
|
+
|
|
295
|
+
@jit
|
|
296
|
+
def foo(result, a):
|
|
297
|
+
if a:
|
|
298
|
+
s = 5
|
|
299
|
+
s = overload_this(s)
|
|
300
|
+
else:
|
|
301
|
+
s = 99
|
|
302
|
+
|
|
303
|
+
result[0] = s
|
|
304
|
+
|
|
305
|
+
@overload(overload_this)
|
|
306
|
+
def ol(a):
|
|
307
|
+
return overload_this
|
|
308
|
+
|
|
309
|
+
self.check_func(foo, np.array([42]), True)
|
|
310
|
+
|
|
311
|
+
def test_issue3979(self):
|
|
312
|
+
@jit
|
|
313
|
+
def foo(result, A, B):
|
|
314
|
+
x = A[0]
|
|
315
|
+
y = B[0]
|
|
316
|
+
for i in A:
|
|
317
|
+
x = i
|
|
318
|
+
for i in B:
|
|
319
|
+
y = i
|
|
320
|
+
result[0] = x
|
|
321
|
+
result[1] = y
|
|
322
|
+
|
|
323
|
+
self.check_func(
|
|
324
|
+
foo, np.array([2, 4]), np.array([1, 2]), np.array([3, 4])
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
def test_issue5219(self):
|
|
328
|
+
def overload_this(a, b=None):
|
|
329
|
+
if isinstance(b, tuple):
|
|
330
|
+
b = b[0]
|
|
331
|
+
return b
|
|
332
|
+
|
|
333
|
+
@overload(overload_this)
|
|
334
|
+
def ol(a, b=None):
|
|
335
|
+
b_is_tuple = isinstance(b, (types.Tuple, types.UniTuple))
|
|
336
|
+
|
|
337
|
+
def impl(a, b=None):
|
|
338
|
+
if b_is_tuple is True:
|
|
339
|
+
b = b[0]
|
|
340
|
+
return b
|
|
341
|
+
|
|
342
|
+
return impl
|
|
343
|
+
|
|
344
|
+
@jit
|
|
345
|
+
def test_tuple(result, a, b):
|
|
346
|
+
result[0] = overload_this(a, b)
|
|
347
|
+
|
|
348
|
+
self.check_func(test_tuple, np.array([2]), 1, (2,))
|
|
349
|
+
|
|
350
|
+
def test_issue5223(self):
|
|
351
|
+
@jit
|
|
352
|
+
def bar(result, x):
|
|
353
|
+
if len(x) == 5:
|
|
354
|
+
for i in range(len(x)):
|
|
355
|
+
result[i] = x[i]
|
|
356
|
+
else:
|
|
357
|
+
# Manual copy since .copy() not available in CUDA
|
|
358
|
+
for i in range(len(x)):
|
|
359
|
+
result[i] = x[i] + 1
|
|
360
|
+
|
|
361
|
+
a = np.ones(5)
|
|
362
|
+
a.flags.writeable = False
|
|
363
|
+
expected = np.ones(5) # Since len(a) == 5, it should return unchanged
|
|
364
|
+
self.check_func(bar, expected, a)
|
|
365
|
+
|
|
366
|
+
def test_issue5243(self):
|
|
367
|
+
@jit
|
|
368
|
+
def foo(result, q, lin):
|
|
369
|
+
stencil_val = 0.0 # noqa: F841
|
|
370
|
+
stencil_val = q[0, 0] # noqa: F841
|
|
371
|
+
result[0] = lin[0]
|
|
372
|
+
|
|
373
|
+
lin = np.array([0.1, 0.6, 0.3])
|
|
374
|
+
self.check_func(foo, np.array([0.1]), np.zeros((2, 2)), lin)
|
|
375
|
+
|
|
376
|
+
def test_issue5482_missing_variable_init(self):
|
|
377
|
+
# Test error that lowering fails because variable is missing
|
|
378
|
+
# a definition before use.
|
|
379
|
+
@jit
|
|
380
|
+
def foo(result, x, v, n):
|
|
381
|
+
for i in range(n):
|
|
382
|
+
if i == 0:
|
|
383
|
+
if i == x:
|
|
384
|
+
pass
|
|
385
|
+
else:
|
|
386
|
+
problematic = v
|
|
387
|
+
else:
|
|
388
|
+
if i == x:
|
|
389
|
+
pass
|
|
390
|
+
else:
|
|
391
|
+
problematic = problematic + v
|
|
392
|
+
result[0] = problematic
|
|
393
|
+
|
|
394
|
+
self.check_func(foo, np.array([10]), 1, 5, 3)
|
|
395
|
+
|
|
396
|
+
def test_issue5493_unneeded_phi(self):
|
|
397
|
+
# Test error that unneeded phi is inserted because variable does not
|
|
398
|
+
# have a dominance definition.
|
|
399
|
+
data = (np.ones(2), np.ones(2))
|
|
400
|
+
A = np.ones(1)
|
|
401
|
+
B = np.ones(1)
|
|
402
|
+
|
|
403
|
+
@jit
|
|
404
|
+
def foo(res, m, n, data):
|
|
405
|
+
if len(data) == 1:
|
|
406
|
+
v0 = data[0]
|
|
407
|
+
else:
|
|
408
|
+
v0 = data[0]
|
|
409
|
+
# Unneeded PHI node for `problematic` would be placed here
|
|
410
|
+
for _ in range(1, len(data)):
|
|
411
|
+
v0[0] += A[0]
|
|
412
|
+
|
|
413
|
+
for t in range(1, m):
|
|
414
|
+
for idx in range(n):
|
|
415
|
+
t = B
|
|
416
|
+
|
|
417
|
+
if idx == 0:
|
|
418
|
+
if idx == n - 1:
|
|
419
|
+
pass
|
|
420
|
+
else:
|
|
421
|
+
res[0] = t[0]
|
|
422
|
+
else:
|
|
423
|
+
if idx == n - 1:
|
|
424
|
+
pass
|
|
425
|
+
else:
|
|
426
|
+
res[0] += t[0]
|
|
427
|
+
|
|
428
|
+
self.check_func(foo, np.array([10]), 10, 10, data)
|
|
429
|
+
|
|
430
|
+
def test_issue5623_equal_statements_in_same_bb(self):
|
|
431
|
+
def foo(pred, stack):
|
|
432
|
+
i = 0
|
|
433
|
+
c = 1
|
|
434
|
+
|
|
435
|
+
if pred is True:
|
|
436
|
+
stack[i] = c
|
|
437
|
+
i += 1
|
|
438
|
+
stack[i] = c
|
|
439
|
+
i += 1
|
|
440
|
+
|
|
441
|
+
python = np.array([0, 666])
|
|
442
|
+
foo(True, python)
|
|
443
|
+
|
|
444
|
+
nb = np.array([0, 666])
|
|
445
|
+
|
|
446
|
+
# Convert to CUDA kernel
|
|
447
|
+
foo_cuda = jit(foo)
|
|
448
|
+
foo_cuda[1, 1](True, nb)
|
|
449
|
+
|
|
450
|
+
expect = np.array([1, 1])
|
|
451
|
+
|
|
452
|
+
np.testing.assert_array_equal(python, expect)
|
|
453
|
+
np.testing.assert_array_equal(nb, expect)
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
from numba import cuda, int32, float32
|
|
6
6
|
from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
|
|
7
|
-
from numba.core.config import ENABLE_CUDASIM
|
|
7
|
+
from numba.cuda.core.config import ENABLE_CUDASIM
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def useless_syncthreads(ary):
|