numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +1 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
- numba_cuda/numba/cuda/api.py +6 -1
- numba_cuda/numba/cuda/bf16.py +285 -2
- numba_cuda/numba/cuda/cgutils.py +2 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +1 -1
- numba_cuda/numba/cuda/compiler.py +373 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +2 -2
- numba_cuda/numba/cuda/core/compiler.py +6 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +48 -26
- numba_cuda/numba/cuda/core/ir_utils.py +15 -26
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +738 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +422 -246
- numba_cuda/numba/cuda/cudadecl.py +1 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
- numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
- numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
- numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +5 -1
- numba_cuda/numba/cuda/debuginfo.py +85 -2
- numba_cuda/numba/cuda/decorators.py +3 -3
- numba_cuda/numba/cuda/descriptor.py +3 -4
- numba_cuda/numba/cuda/deviceufunc.py +66 -2
- numba_cuda/numba/cuda/dispatcher.py +18 -39
- numba_cuda/numba/cuda/flags.py +141 -1
- numba_cuda/numba/cuda/fp16.py +0 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/lowering.py +7 -144
- numba_cuda/numba/cuda/mathimpl.py +2 -1
- numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +9 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +1 -1
- numba_cuda/numba/cuda/printimpl.py +12 -1
- numba_cuda/numba/cuda/random.py +1 -1
- numba_cuda/numba/cuda/serialize.py +1 -1
- numba_cuda/numba/cuda/simulator/__init__.py +1 -1
- numba_cuda/numba/cuda/simulator/api.py +1 -1
- numba_cuda/numba/cuda/simulator/compiler.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
- numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
- numba_cuda/numba/cuda/target.py +35 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +1 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
- numba_cuda/numba/cuda/tests/support.py +55 -15
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +56 -0
- numba_cuda/numba/cuda/typing/__init__.py +9 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +7 -6
- numba_cuda/numba/cuda/ufuncs.py +3 -3
- numba_cuda/numba/cuda/utils.py +6 -112
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
|
@@ -9,7 +9,8 @@ import cffi
|
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
|
|
12
|
-
from numba import
|
|
12
|
+
from numba import cuda, int32
|
|
13
|
+
from numba.cuda import config
|
|
13
14
|
from numba.types import CPointer
|
|
14
15
|
from numba.cuda.testing import (
|
|
15
16
|
unittest,
|
|
@@ -17,7 +18,6 @@ from numba.cuda.testing import (
|
|
|
17
18
|
skip_on_cudasim,
|
|
18
19
|
skip_unless_cc_60,
|
|
19
20
|
skip_if_cudadevrt_missing,
|
|
20
|
-
skip_if_mvc_enabled,
|
|
21
21
|
)
|
|
22
22
|
from numba.core.typing import signature
|
|
23
23
|
|
|
@@ -63,7 +63,6 @@ def sequential_rows(M):
|
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
@skip_if_cudadevrt_missing
|
|
66
|
-
@skip_if_mvc_enabled("CG not supported with MVC")
|
|
67
66
|
class TestCudaCooperativeGroups(CUDATestCase):
|
|
68
67
|
@skip_unless_cc_60
|
|
69
68
|
def test_this_grid(self):
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2017 Intel Corporation
|
|
4
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
from numba.core import types, ir, config
|
|
8
|
+
from numba.cuda import compiler
|
|
9
|
+
from numba.cuda.core.annotations import type_annotations
|
|
10
|
+
from numba.cuda.core.ir_utils import (
|
|
11
|
+
copy_propagate,
|
|
12
|
+
apply_copy_propagate,
|
|
13
|
+
get_name_var_table,
|
|
14
|
+
)
|
|
15
|
+
from numba.cuda.core.typed_passes import type_inference_stage
|
|
16
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
|
17
|
+
import unittest
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _test_will_propagate(b, z, w):
|
|
21
|
+
x = 3
|
|
22
|
+
x1 = x
|
|
23
|
+
if b > 0:
|
|
24
|
+
y = z + w # noqa: F821
|
|
25
|
+
else:
|
|
26
|
+
y = 0 # noqa: F841
|
|
27
|
+
a = 2 * x1
|
|
28
|
+
return a < b
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _test_wont_propagate(b, z, w):
|
|
32
|
+
x = 3
|
|
33
|
+
if b > 0:
|
|
34
|
+
y = z + w # noqa: F841
|
|
35
|
+
x = 1
|
|
36
|
+
else:
|
|
37
|
+
y = 0 # noqa: F841
|
|
38
|
+
a = 2 * x
|
|
39
|
+
return a < b
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _in_list_var(list_var, var):
|
|
43
|
+
for i in list_var:
|
|
44
|
+
if i.name == var:
|
|
45
|
+
return True
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _find_assign(func_ir, var):
|
|
50
|
+
for label, block in func_ir.blocks.items():
|
|
51
|
+
for i, inst in enumerate(block.body):
|
|
52
|
+
if isinstance(inst, ir.Assign) and inst.target.name != var:
|
|
53
|
+
all_var = inst.list_vars()
|
|
54
|
+
if _in_list_var(all_var, var):
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@skip_on_cudasim("cudasim doesn't support run_frontend")
|
|
61
|
+
class TestCopyPropagate(CUDATestCase):
|
|
62
|
+
def test1(self):
|
|
63
|
+
from numba.cuda.descriptor import cuda_target
|
|
64
|
+
|
|
65
|
+
typingctx = cuda_target.typing_context
|
|
66
|
+
targetctx = cuda_target.target_context
|
|
67
|
+
test_ir = compiler.run_frontend(_test_will_propagate)
|
|
68
|
+
typingctx.refresh()
|
|
69
|
+
targetctx.refresh()
|
|
70
|
+
args = (types.int64, types.int64, types.int64)
|
|
71
|
+
typemap, return_type, calltypes, _ = type_inference_stage(
|
|
72
|
+
typingctx, targetctx, test_ir, args, None
|
|
73
|
+
)
|
|
74
|
+
_ = type_annotations.TypeAnnotation(
|
|
75
|
+
func_ir=test_ir,
|
|
76
|
+
typemap=typemap,
|
|
77
|
+
calltypes=calltypes,
|
|
78
|
+
lifted=(),
|
|
79
|
+
lifted_from=None,
|
|
80
|
+
args=args,
|
|
81
|
+
return_type=return_type,
|
|
82
|
+
html_output=config.HTML,
|
|
83
|
+
)
|
|
84
|
+
in_cps, out_cps = copy_propagate(test_ir.blocks, typemap)
|
|
85
|
+
_ = apply_copy_propagate(
|
|
86
|
+
test_ir.blocks,
|
|
87
|
+
in_cps,
|
|
88
|
+
get_name_var_table(test_ir.blocks),
|
|
89
|
+
typemap,
|
|
90
|
+
calltypes,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
self.assertFalse(_find_assign(test_ir, "x1"))
|
|
94
|
+
|
|
95
|
+
def test2(self):
|
|
96
|
+
from numba.cuda.descriptor import cuda_target
|
|
97
|
+
|
|
98
|
+
typingctx = cuda_target.typing_context
|
|
99
|
+
targetctx = cuda_target.target_context
|
|
100
|
+
test_ir = compiler.run_frontend(_test_wont_propagate)
|
|
101
|
+
typingctx.refresh()
|
|
102
|
+
targetctx.refresh()
|
|
103
|
+
args = (types.int64, types.int64, types.int64)
|
|
104
|
+
typemap, return_type, calltypes, _ = type_inference_stage(
|
|
105
|
+
typingctx, targetctx, test_ir, args, None
|
|
106
|
+
)
|
|
107
|
+
_ = type_annotations.TypeAnnotation(
|
|
108
|
+
func_ir=test_ir,
|
|
109
|
+
typemap=typemap,
|
|
110
|
+
calltypes=calltypes,
|
|
111
|
+
lifted=(),
|
|
112
|
+
lifted_from=None,
|
|
113
|
+
args=args,
|
|
114
|
+
return_type=return_type,
|
|
115
|
+
html_output=config.HTML,
|
|
116
|
+
)
|
|
117
|
+
in_cps, out_cps = copy_propagate(test_ir.blocks, typemap)
|
|
118
|
+
_ = apply_copy_propagate(
|
|
119
|
+
test_ir.blocks,
|
|
120
|
+
in_cps,
|
|
121
|
+
get_name_var_table(test_ir.blocks),
|
|
122
|
+
typemap,
|
|
123
|
+
calltypes,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
self.assertTrue(_find_assign(test_ir, "x"))
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
if __name__ == "__main__":
|
|
130
|
+
unittest.main()
|
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
3
|
|
|
4
|
+
from collections import namedtuple
|
|
4
5
|
from numba.cuda.tests.support import override_config, captured_stdout
|
|
5
6
|
from numba.cuda.testing import skip_on_cudasim
|
|
6
7
|
from numba import cuda
|
|
7
8
|
from numba.core import types
|
|
8
9
|
from numba.cuda.testing import CUDATestCase
|
|
10
|
+
from textwrap import dedent
|
|
11
|
+
import math
|
|
9
12
|
import itertools
|
|
10
|
-
import numpy as np
|
|
11
13
|
import re
|
|
12
14
|
import unittest
|
|
15
|
+
import warnings
|
|
16
|
+
from numba.core.errors import NumbaDebugInfoWarning
|
|
17
|
+
from numba.cuda.tests.support import ignore_internal_warnings
|
|
18
|
+
import numpy as np
|
|
19
|
+
import inspect
|
|
13
20
|
|
|
14
21
|
|
|
15
22
|
@skip_on_cudasim("Simulator does not produce debug dumps")
|
|
@@ -30,7 +37,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
|
30
37
|
assertfn(match, msg=asm)
|
|
31
38
|
|
|
32
39
|
def test_no_debuginfo_in_asm(self):
|
|
33
|
-
@cuda.jit(debug=False)
|
|
40
|
+
@cuda.jit(debug=False, opt=False)
|
|
34
41
|
def foo(x):
|
|
35
42
|
x[0] = 1
|
|
36
43
|
|
|
@@ -426,7 +433,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
|
426
433
|
self.assertIn(expected, out.getvalue())
|
|
427
434
|
|
|
428
435
|
def test_DW_LANG(self):
|
|
429
|
-
@cuda.jit(debug=True)
|
|
436
|
+
@cuda.jit(debug=True, opt=False)
|
|
430
437
|
def foo():
|
|
431
438
|
"""
|
|
432
439
|
CHECK: distinct !DICompileUnit
|
|
@@ -465,7 +472,7 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
|
465
472
|
"""
|
|
466
473
|
sig = (types.float64,)
|
|
467
474
|
|
|
468
|
-
@cuda.jit(sig, debug=True)
|
|
475
|
+
@cuda.jit(sig, debug=True, opt=False)
|
|
469
476
|
def foo(a):
|
|
470
477
|
"""
|
|
471
478
|
CHECK-LABEL: define void @{{.+}}foo
|
|
@@ -523,6 +530,288 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
|
523
530
|
ir = foo.inspect_llvm()[sig]
|
|
524
531
|
self.assertFileCheckMatches(ir, foo.__doc__)
|
|
525
532
|
|
|
533
|
+
def test_missing_source(self):
|
|
534
|
+
strsrc = """
|
|
535
|
+
def foo():
|
|
536
|
+
pass
|
|
537
|
+
"""
|
|
538
|
+
l = dict()
|
|
539
|
+
exec(dedent(strsrc), {}, l)
|
|
540
|
+
foo = cuda.jit(debug=True, opt=False)(l["foo"])
|
|
541
|
+
|
|
542
|
+
with warnings.catch_warnings(record=True) as w:
|
|
543
|
+
warnings.simplefilter("always", NumbaDebugInfoWarning)
|
|
544
|
+
ignore_internal_warnings()
|
|
545
|
+
foo[1, 1]()
|
|
546
|
+
|
|
547
|
+
self.assertEqual(len(w), 1)
|
|
548
|
+
found = w[0]
|
|
549
|
+
self.assertEqual(found.category, NumbaDebugInfoWarning)
|
|
550
|
+
msg = str(found.message)
|
|
551
|
+
# make sure the warning contains the right message
|
|
552
|
+
self.assertIn("Could not find source for function", msg)
|
|
553
|
+
# and refers to the offending function
|
|
554
|
+
self.assertIn(str(foo.py_func), msg)
|
|
555
|
+
|
|
556
|
+
def test_no_if_op_bools_declared(self):
|
|
557
|
+
@cuda.jit(
|
|
558
|
+
"int64(boolean, boolean)",
|
|
559
|
+
debug=True,
|
|
560
|
+
opt=False,
|
|
561
|
+
_dbg_optnone=True,
|
|
562
|
+
device=True,
|
|
563
|
+
)
|
|
564
|
+
def choice(cond1, cond2):
|
|
565
|
+
"""
|
|
566
|
+
CHECK: define void @{{.+}}choices
|
|
567
|
+
"""
|
|
568
|
+
if cond1 and cond2:
|
|
569
|
+
return 1
|
|
570
|
+
else:
|
|
571
|
+
return 2
|
|
572
|
+
|
|
573
|
+
ir_content = choice.inspect_llvm()[choice.signatures[0]]
|
|
574
|
+
# We should not declare variables used as the condition in if ops.
|
|
575
|
+
# See Numba PR #9888: https://github.com/numba/numba/pull/9888
|
|
576
|
+
|
|
577
|
+
for line in ir_content.splitlines():
|
|
578
|
+
if "llvm.dbg.declare" in line:
|
|
579
|
+
self.assertNotIn("bool", line)
|
|
580
|
+
|
|
581
|
+
def test_llvm_inliner_flag_conflict(self):
|
|
582
|
+
# bar will be marked as 'alwaysinline', but when DEBUGINFO_DEFAULT is
|
|
583
|
+
# set functions are not marked as 'alwaysinline' and this results in a
|
|
584
|
+
# conflict. baz will not be marked as 'alwaysinline' as a result of
|
|
585
|
+
# DEBUGINFO_DEFAULT
|
|
586
|
+
|
|
587
|
+
@cuda.jit(forceinline=True)
|
|
588
|
+
def bar(x):
|
|
589
|
+
return math.sin(x)
|
|
590
|
+
|
|
591
|
+
@cuda.jit(forceinline=False)
|
|
592
|
+
def baz(x):
|
|
593
|
+
return math.cos(x)
|
|
594
|
+
|
|
595
|
+
@cuda.jit(opt=True)
|
|
596
|
+
def foo(x, y):
|
|
597
|
+
"""
|
|
598
|
+
CHECK-LABEL: define void @{{.+}}foo
|
|
599
|
+
CHECK: call i32 @"[[BAR:.+]]"(
|
|
600
|
+
CHECK: call i32 @"[[BAZ:.+]]"(
|
|
601
|
+
|
|
602
|
+
CHECK-DAG: declare i32 @"[[BAR]]"({{.+}}alwaysinline
|
|
603
|
+
CHECK-DAG: declare i32 @"[[BAZ]]"(
|
|
604
|
+
CHECK-DAG: define linkonce_odr i32 @"[[BAR]]"({{.+}}alwaysinline
|
|
605
|
+
CHECK-DAG: define linkonce_odr i32 @"[[BAZ]]"(
|
|
606
|
+
"""
|
|
607
|
+
a = bar(y)
|
|
608
|
+
b = baz(y)
|
|
609
|
+
x[0] = a + b
|
|
610
|
+
|
|
611
|
+
# check it compiles
|
|
612
|
+
with override_config("DEBUGINFO_DEFAULT", 1):
|
|
613
|
+
result = cuda.device_array(1, dtype=np.float32)
|
|
614
|
+
foo[1, 1](result, np.pi)
|
|
615
|
+
result.copy_to_host()
|
|
616
|
+
|
|
617
|
+
result_host = math.sin(np.pi) + math.cos(np.pi)
|
|
618
|
+
self.assertPreciseEqual(result[0], result_host)
|
|
619
|
+
|
|
620
|
+
ir_content = foo.inspect_llvm()[foo.signatures[0]]
|
|
621
|
+
self.assertFileCheckMatches(ir_content, foo.__doc__)
|
|
622
|
+
|
|
623
|
+
# Check that the device functions call the appropriate device
|
|
624
|
+
# math functions and have the correct attributes.
|
|
625
|
+
self.assertFileCheckMatches(
|
|
626
|
+
ir_content,
|
|
627
|
+
"""
|
|
628
|
+
CHECK: define linkonce_odr i32 @{{.+}}bar
|
|
629
|
+
CHECK-SAME: alwaysinline
|
|
630
|
+
CHECK-NEXT: {
|
|
631
|
+
CHECK-NEXT: {{.*}}:
|
|
632
|
+
CHECK-NEXT: br label {{.*}}
|
|
633
|
+
CHECK-NEXT: {{.*}}:
|
|
634
|
+
CHECK-NEXT: call double @"__nv_sin"
|
|
635
|
+
CHECK-NEXT: store double {{.*}}, double* {{.*}}
|
|
636
|
+
CHECK-NEXT: ret i32 0
|
|
637
|
+
CHECK-NEXT: }
|
|
638
|
+
""",
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
self.assertFileCheckMatches(
|
|
642
|
+
ir_content,
|
|
643
|
+
"""
|
|
644
|
+
CHECK: define linkonce_odr i32 @{{.+}}baz
|
|
645
|
+
CHECK-NOT: alwaysinline
|
|
646
|
+
CHECK-NEXT: {
|
|
647
|
+
CHECK-NEXT: {{.*}}:
|
|
648
|
+
CHECK-NEXT: br label {{.*}}
|
|
649
|
+
CHECK-NEXT: {{.*}}:
|
|
650
|
+
CHECK-NEXT: call double @"__nv_cos"
|
|
651
|
+
CHECK-NEXT: store double {{.*}}, double* {{.*}}
|
|
652
|
+
CHECK-NEXT: ret i32 0
|
|
653
|
+
CHECK-NEXT: }
|
|
654
|
+
""",
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
def test_DILocation_versioned_variables(self):
|
|
658
|
+
"""Tests that DILocation information for versions of variables matches
|
|
659
|
+
up to their definition site."""
|
|
660
|
+
|
|
661
|
+
@cuda.jit(debug=True, opt=False)
|
|
662
|
+
def foo(dest, n):
|
|
663
|
+
"""
|
|
664
|
+
CHECK: define void @{{.+}}foo
|
|
665
|
+
CHECK: store i64 5, i64* %"c{{.+}} !dbg ![[STORE5:.+]]
|
|
666
|
+
CHECK: store i64 1, i64* %"c{{.+}} !dbg ![[STORE1:.+]]
|
|
667
|
+
CHECK: [[STORE5]] = !DILocation(
|
|
668
|
+
CHECK: [[STORE1]] = !DILocation(
|
|
669
|
+
"""
|
|
670
|
+
if n:
|
|
671
|
+
c = 5
|
|
672
|
+
else:
|
|
673
|
+
c = 1
|
|
674
|
+
dest[0] = c
|
|
675
|
+
|
|
676
|
+
foo_source_lines, foo_source_lineno = inspect.getsourcelines(
|
|
677
|
+
foo.py_func
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
result = cuda.device_array(1, dtype=np.int32)
|
|
681
|
+
foo[1, 1](result, 1)
|
|
682
|
+
result.copy_to_host()
|
|
683
|
+
self.assertEqual(result[0], 5)
|
|
684
|
+
|
|
685
|
+
ir_content = foo.inspect_llvm()[foo.signatures[0]]
|
|
686
|
+
self.assertFileCheckMatches(ir_content, foo.__doc__)
|
|
687
|
+
|
|
688
|
+
# Collect lines pertaining to the function `foo` and debuginfo
|
|
689
|
+
# metadata
|
|
690
|
+
lines = ir_content.splitlines()
|
|
691
|
+
debuginfo_equals = re.compile(r"!(\d+) = ")
|
|
692
|
+
debug_info_lines = list(
|
|
693
|
+
filter(lambda x: debuginfo_equals.search(x), lines)
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
function_start_regex = re.compile(r"define void @.+foo")
|
|
697
|
+
function_start_lines = list(
|
|
698
|
+
filter(
|
|
699
|
+
lambda x: function_start_regex.search(x[1]), enumerate(lines)
|
|
700
|
+
)
|
|
701
|
+
)
|
|
702
|
+
function_end_lines = list(
|
|
703
|
+
filter(lambda x: x[1] == "}", enumerate(lines))
|
|
704
|
+
)
|
|
705
|
+
foo_ir_lines = lines[
|
|
706
|
+
function_start_lines[0][0] : function_end_lines[0][0]
|
|
707
|
+
]
|
|
708
|
+
|
|
709
|
+
# Check the if condition's debuginfo
|
|
710
|
+
cond_branch = list(filter(lambda x: "br i1" in x, foo_ir_lines))
|
|
711
|
+
self.assertEqual(len(cond_branch), 1)
|
|
712
|
+
self.assertIn("!dbg", cond_branch[0])
|
|
713
|
+
cond_branch_dbginfo_node = cond_branch[0].split("!dbg")[1].strip()
|
|
714
|
+
cond_branch_dbginfos = list(
|
|
715
|
+
filter(
|
|
716
|
+
lambda x: cond_branch_dbginfo_node + " = " in x,
|
|
717
|
+
debug_info_lines,
|
|
718
|
+
)
|
|
719
|
+
)
|
|
720
|
+
self.assertEqual(len(cond_branch_dbginfos), 1)
|
|
721
|
+
cond_branch_dbginfo = cond_branch_dbginfos[0]
|
|
722
|
+
|
|
723
|
+
# Check debuginfo for the store instructions
|
|
724
|
+
store_1_lines = list(filter(lambda x: "store i64 1" in x, foo_ir_lines))
|
|
725
|
+
store_5_lines = list(filter(lambda x: "store i64 5" in x, foo_ir_lines))
|
|
726
|
+
|
|
727
|
+
self.assertEqual(len(store_1_lines), 2)
|
|
728
|
+
self.assertEqual(len(store_5_lines), 2)
|
|
729
|
+
|
|
730
|
+
store_1_dbginfo_set = set(
|
|
731
|
+
map(lambda x: x.split("!dbg")[1].strip(), store_1_lines)
|
|
732
|
+
)
|
|
733
|
+
store_5_dbginfo_set = set(
|
|
734
|
+
map(lambda x: x.split("!dbg")[1].strip(), store_5_lines)
|
|
735
|
+
)
|
|
736
|
+
self.assertEqual(len(store_1_dbginfo_set), 1)
|
|
737
|
+
self.assertEqual(len(store_5_dbginfo_set), 1)
|
|
738
|
+
store_1_dbginfo_node = store_1_dbginfo_set.pop()
|
|
739
|
+
store_5_dbginfo_node = store_5_dbginfo_set.pop()
|
|
740
|
+
store_1_dbginfos = list(
|
|
741
|
+
filter(
|
|
742
|
+
lambda x: store_1_dbginfo_node + " = " in x, debug_info_lines
|
|
743
|
+
)
|
|
744
|
+
)
|
|
745
|
+
store_5_dbginfos = list(
|
|
746
|
+
filter(
|
|
747
|
+
lambda x: store_5_dbginfo_node + " = " in x, debug_info_lines
|
|
748
|
+
)
|
|
749
|
+
)
|
|
750
|
+
self.assertEqual(len(store_1_dbginfos), 1)
|
|
751
|
+
self.assertEqual(len(store_5_dbginfos), 1)
|
|
752
|
+
store_1_dbginfo = store_1_dbginfos[0]
|
|
753
|
+
store_5_dbginfo = store_5_dbginfos[0]
|
|
754
|
+
|
|
755
|
+
# Ensure the line numbers match what we expect based on the Python source
|
|
756
|
+
line_number_regex = re.compile(r"line: (\d+)")
|
|
757
|
+
LineNumbers = namedtuple(
|
|
758
|
+
"LineNumbers", ["cond_branch", "store_5", "store_1"]
|
|
759
|
+
)
|
|
760
|
+
line_number_matches = LineNumbers(
|
|
761
|
+
*map(
|
|
762
|
+
lambda x: line_number_regex.search(x),
|
|
763
|
+
[cond_branch_dbginfo, store_5_dbginfo, store_1_dbginfo],
|
|
764
|
+
)
|
|
765
|
+
)
|
|
766
|
+
self.assertTrue(
|
|
767
|
+
all(
|
|
768
|
+
map(
|
|
769
|
+
lambda x: x is not None,
|
|
770
|
+
line_number_matches,
|
|
771
|
+
)
|
|
772
|
+
)
|
|
773
|
+
)
|
|
774
|
+
line_numbers = LineNumbers(
|
|
775
|
+
*map(
|
|
776
|
+
lambda x: int(x.group(1)),
|
|
777
|
+
line_number_matches,
|
|
778
|
+
)
|
|
779
|
+
)
|
|
780
|
+
source_line_numbers = LineNumbers(
|
|
781
|
+
*map(
|
|
782
|
+
lambda x: x[0] + foo_source_lineno,
|
|
783
|
+
filter(
|
|
784
|
+
lambda x: "c = " in x[1] or "if n:" in x[1],
|
|
785
|
+
enumerate(foo_source_lines),
|
|
786
|
+
),
|
|
787
|
+
)
|
|
788
|
+
)
|
|
789
|
+
self.assertEqual(line_numbers, source_line_numbers)
|
|
790
|
+
|
|
791
|
+
def test_debuginfo_asm(self):
|
|
792
|
+
def foo():
|
|
793
|
+
pass
|
|
794
|
+
|
|
795
|
+
foo_debug = cuda.jit(debug=True, opt=False)(foo)
|
|
796
|
+
foo_debug[1, 1]()
|
|
797
|
+
asm = foo_debug.inspect_asm()[foo_debug.signatures[0]]
|
|
798
|
+
self.assertFileCheckMatches(
|
|
799
|
+
asm,
|
|
800
|
+
"""
|
|
801
|
+
CHECK: .section{{.+}}.debug
|
|
802
|
+
""",
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
foo_nodebug = cuda.jit(debug=False)(foo)
|
|
806
|
+
foo_nodebug[1, 1]()
|
|
807
|
+
asm = foo_nodebug.inspect_asm()[foo_nodebug.signatures[0]]
|
|
808
|
+
self.assertFileCheckMatches(
|
|
809
|
+
asm,
|
|
810
|
+
"""
|
|
811
|
+
CHECK-NOT: .section{{.+}}.debug
|
|
812
|
+
""",
|
|
813
|
+
)
|
|
814
|
+
|
|
526
815
|
|
|
527
816
|
if __name__ == "__main__":
|
|
528
817
|
unittest.main()
|
|
@@ -7,7 +7,6 @@ import threading
|
|
|
7
7
|
|
|
8
8
|
from numba import (
|
|
9
9
|
boolean,
|
|
10
|
-
config,
|
|
11
10
|
cuda,
|
|
12
11
|
float32,
|
|
13
12
|
float64,
|
|
@@ -17,6 +16,7 @@ from numba import (
|
|
|
17
16
|
uint32,
|
|
18
17
|
void,
|
|
19
18
|
)
|
|
19
|
+
from numba.cuda import config
|
|
20
20
|
from numba.core.errors import TypingError
|
|
21
21
|
from numba.cuda.testing import (
|
|
22
22
|
cc_X_or_above,
|
|
@@ -13,7 +13,7 @@ from numba.cuda.testing import (
|
|
|
13
13
|
|
|
14
14
|
@skip_on_cudasim("Cudasim does not support inline and forceinline")
|
|
15
15
|
class TestCudaInline(CUDATestCase):
|
|
16
|
-
def _test_call_inline(self, inline):
|
|
16
|
+
def _test_call_inline(self, inline, inline_expected):
|
|
17
17
|
"""Test @cuda.jit(inline=...)"""
|
|
18
18
|
a = np.ones(2, dtype=np.int32)
|
|
19
19
|
|
|
@@ -36,12 +36,10 @@ class TestCudaInline(CUDATestCase):
|
|
|
36
36
|
pat = r"call [a-zA-Z0-9]* @"
|
|
37
37
|
match = re.compile(pat).search(llvm_ir)
|
|
38
38
|
|
|
39
|
-
if
|
|
39
|
+
if inline_expected:
|
|
40
40
|
# check that call was inlined
|
|
41
41
|
self.assertIsNone(match, msg=llvm_ir)
|
|
42
42
|
else:
|
|
43
|
-
assert inline == "never" or inline is False
|
|
44
|
-
|
|
45
43
|
# check that call was not inlined
|
|
46
44
|
self.assertIsNotNone(match, msg=llvm_ir)
|
|
47
45
|
|
|
@@ -49,16 +47,28 @@ class TestCudaInline(CUDATestCase):
|
|
|
49
47
|
self.assertNotIn("alwaysinline", llvm_ir)
|
|
50
48
|
|
|
51
49
|
def test_call_inline_always(self):
|
|
52
|
-
self._test_call_inline("always")
|
|
50
|
+
self._test_call_inline("always", True)
|
|
53
51
|
|
|
54
52
|
def test_call_inline_never(self):
|
|
55
|
-
self._test_call_inline("never")
|
|
53
|
+
self._test_call_inline("never", False)
|
|
56
54
|
|
|
57
55
|
def test_call_inline_true(self):
|
|
58
|
-
self._test_call_inline(True)
|
|
56
|
+
self._test_call_inline(True, True)
|
|
59
57
|
|
|
60
58
|
def test_call_inline_false(self):
|
|
61
|
-
self._test_call_inline(False)
|
|
59
|
+
self._test_call_inline(False, False)
|
|
60
|
+
|
|
61
|
+
def test_call_inline_costmodel_false(self):
|
|
62
|
+
def cost_model(expr, caller_info, callee_info):
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
self._test_call_inline(cost_model, False)
|
|
66
|
+
|
|
67
|
+
def test_call_inline_costmodel_true(self):
|
|
68
|
+
def cost_model(expr, caller_info, callee_info):
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
self._test_call_inline(cost_model, True)
|
|
62
72
|
|
|
63
73
|
def _test_call_forceinline(self, forceinline):
|
|
64
74
|
"""Test @cuda.jit(forceinline=...)"""
|