numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (172) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +7 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
  129. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  130. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  134. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  139. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  141. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  143. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  146. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  147. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  148. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  151. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  152. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  153. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  154. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  155. numba_cuda/numba/cuda/tests/support.py +55 -15
  156. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  157. numba_cuda/numba/cuda/types.py +56 -0
  158. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  159. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  160. numba_cuda/numba/cuda/typing/context.py +751 -0
  161. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  162. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  163. numba_cuda/numba/cuda/typing/templates.py +7 -6
  164. numba_cuda/numba/cuda/ufuncs.py +3 -3
  165. numba_cuda/numba/cuda/utils.py +6 -112
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
  167. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
  168. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
  172. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,8 @@ import cffi
9
9
 
10
10
  import numpy as np
11
11
 
12
- from numba import config, cuda, int32
12
+ from numba import cuda, int32
13
+ from numba.cuda import config
13
14
  from numba.types import CPointer
14
15
  from numba.cuda.testing import (
15
16
  unittest,
@@ -17,7 +18,6 @@ from numba.cuda.testing import (
17
18
  skip_on_cudasim,
18
19
  skip_unless_cc_60,
19
20
  skip_if_cudadevrt_missing,
20
- skip_if_mvc_enabled,
21
21
  )
22
22
  from numba.core.typing import signature
23
23
 
@@ -63,7 +63,6 @@ def sequential_rows(M):
63
63
 
64
64
 
65
65
  @skip_if_cudadevrt_missing
66
- @skip_if_mvc_enabled("CG not supported with MVC")
67
66
  class TestCudaCooperativeGroups(CUDATestCase):
68
67
  @skip_unless_cc_60
69
68
  def test_this_grid(self):
@@ -0,0 +1,130 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ #
3
+ # Copyright (c) 2017 Intel Corporation
4
+ # SPDX-License-Identifier: BSD-2-Clause
5
+ #
6
+
7
+ from numba.core import types, ir, config
8
+ from numba.cuda import compiler
9
+ from numba.cuda.core.annotations import type_annotations
10
+ from numba.cuda.core.ir_utils import (
11
+ copy_propagate,
12
+ apply_copy_propagate,
13
+ get_name_var_table,
14
+ )
15
+ from numba.cuda.core.typed_passes import type_inference_stage
16
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
17
+ import unittest
18
+
19
+
20
+ def _test_will_propagate(b, z, w):
21
+ x = 3
22
+ x1 = x
23
+ if b > 0:
24
+ y = z + w # noqa: F821
25
+ else:
26
+ y = 0 # noqa: F841
27
+ a = 2 * x1
28
+ return a < b
29
+
30
+
31
+ def _test_wont_propagate(b, z, w):
32
+ x = 3
33
+ if b > 0:
34
+ y = z + w # noqa: F841
35
+ x = 1
36
+ else:
37
+ y = 0 # noqa: F841
38
+ a = 2 * x
39
+ return a < b
40
+
41
+
42
+ def _in_list_var(list_var, var):
43
+ for i in list_var:
44
+ if i.name == var:
45
+ return True
46
+ return False
47
+
48
+
49
+ def _find_assign(func_ir, var):
50
+ for label, block in func_ir.blocks.items():
51
+ for i, inst in enumerate(block.body):
52
+ if isinstance(inst, ir.Assign) and inst.target.name != var:
53
+ all_var = inst.list_vars()
54
+ if _in_list_var(all_var, var):
55
+ return True
56
+
57
+ return False
58
+
59
+
60
+ @skip_on_cudasim("cudasim doesn't support run_frontend")
61
+ class TestCopyPropagate(CUDATestCase):
62
+ def test1(self):
63
+ from numba.cuda.descriptor import cuda_target
64
+
65
+ typingctx = cuda_target.typing_context
66
+ targetctx = cuda_target.target_context
67
+ test_ir = compiler.run_frontend(_test_will_propagate)
68
+ typingctx.refresh()
69
+ targetctx.refresh()
70
+ args = (types.int64, types.int64, types.int64)
71
+ typemap, return_type, calltypes, _ = type_inference_stage(
72
+ typingctx, targetctx, test_ir, args, None
73
+ )
74
+ _ = type_annotations.TypeAnnotation(
75
+ func_ir=test_ir,
76
+ typemap=typemap,
77
+ calltypes=calltypes,
78
+ lifted=(),
79
+ lifted_from=None,
80
+ args=args,
81
+ return_type=return_type,
82
+ html_output=config.HTML,
83
+ )
84
+ in_cps, out_cps = copy_propagate(test_ir.blocks, typemap)
85
+ _ = apply_copy_propagate(
86
+ test_ir.blocks,
87
+ in_cps,
88
+ get_name_var_table(test_ir.blocks),
89
+ typemap,
90
+ calltypes,
91
+ )
92
+
93
+ self.assertFalse(_find_assign(test_ir, "x1"))
94
+
95
+ def test2(self):
96
+ from numba.cuda.descriptor import cuda_target
97
+
98
+ typingctx = cuda_target.typing_context
99
+ targetctx = cuda_target.target_context
100
+ test_ir = compiler.run_frontend(_test_wont_propagate)
101
+ typingctx.refresh()
102
+ targetctx.refresh()
103
+ args = (types.int64, types.int64, types.int64)
104
+ typemap, return_type, calltypes, _ = type_inference_stage(
105
+ typingctx, targetctx, test_ir, args, None
106
+ )
107
+ _ = type_annotations.TypeAnnotation(
108
+ func_ir=test_ir,
109
+ typemap=typemap,
110
+ calltypes=calltypes,
111
+ lifted=(),
112
+ lifted_from=None,
113
+ args=args,
114
+ return_type=return_type,
115
+ html_output=config.HTML,
116
+ )
117
+ in_cps, out_cps = copy_propagate(test_ir.blocks, typemap)
118
+ _ = apply_copy_propagate(
119
+ test_ir.blocks,
120
+ in_cps,
121
+ get_name_var_table(test_ir.blocks),
122
+ typemap,
123
+ calltypes,
124
+ )
125
+
126
+ self.assertTrue(_find_assign(test_ir, "x"))
127
+
128
+
129
+ if __name__ == "__main__":
130
+ unittest.main()
@@ -4,7 +4,7 @@
4
4
  import numpy as np
5
5
 
6
6
  from numba import cuda, vectorize, guvectorize
7
- from numba.np.numpy_support import from_dtype
7
+ from numba.cuda.np.numpy_support import from_dtype
8
8
  from numba.cuda.testing import CUDATestCase, skip_on_cudasim
9
9
  import unittest
10
10
 
@@ -3,7 +3,7 @@
3
3
 
4
4
  import numpy as np
5
5
 
6
- from numba.core.utils import PYVERSION
6
+ from numba.cuda.utils import PYVERSION
7
7
  from numba.cuda.testing import skip_on_cudasim, CUDATestCase
8
8
  from numba.cuda.tests.support import (
9
9
  override_config,
@@ -1,15 +1,22 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
+ from collections import namedtuple
4
5
  from numba.cuda.tests.support import override_config, captured_stdout
5
6
  from numba.cuda.testing import skip_on_cudasim
6
7
  from numba import cuda
7
8
  from numba.core import types
8
9
  from numba.cuda.testing import CUDATestCase
10
+ from textwrap import dedent
11
+ import math
9
12
  import itertools
10
- import numpy as np
11
13
  import re
12
14
  import unittest
15
+ import warnings
16
+ from numba.core.errors import NumbaDebugInfoWarning
17
+ from numba.cuda.tests.support import ignore_internal_warnings
18
+ import numpy as np
19
+ import inspect
13
20
 
14
21
 
15
22
  @skip_on_cudasim("Simulator does not produce debug dumps")
@@ -30,7 +37,7 @@ class TestCudaDebugInfo(CUDATestCase):
30
37
  assertfn(match, msg=asm)
31
38
 
32
39
  def test_no_debuginfo_in_asm(self):
33
- @cuda.jit(debug=False)
40
+ @cuda.jit(debug=False, opt=False)
34
41
  def foo(x):
35
42
  x[0] = 1
36
43
 
@@ -426,7 +433,7 @@ class TestCudaDebugInfo(CUDATestCase):
426
433
  self.assertIn(expected, out.getvalue())
427
434
 
428
435
  def test_DW_LANG(self):
429
- @cuda.jit(debug=True)
436
+ @cuda.jit(debug=True, opt=False)
430
437
  def foo():
431
438
  """
432
439
  CHECK: distinct !DICompileUnit
@@ -465,7 +472,7 @@ class TestCudaDebugInfo(CUDATestCase):
465
472
  """
466
473
  sig = (types.float64,)
467
474
 
468
- @cuda.jit(sig, debug=True)
475
+ @cuda.jit(sig, debug=True, opt=False)
469
476
  def foo(a):
470
477
  """
471
478
  CHECK-LABEL: define void @{{.+}}foo
@@ -523,6 +530,288 @@ class TestCudaDebugInfo(CUDATestCase):
523
530
  ir = foo.inspect_llvm()[sig]
524
531
  self.assertFileCheckMatches(ir, foo.__doc__)
525
532
 
533
+ def test_missing_source(self):
534
+ strsrc = """
535
+ def foo():
536
+ pass
537
+ """
538
+ l = dict()
539
+ exec(dedent(strsrc), {}, l)
540
+ foo = cuda.jit(debug=True, opt=False)(l["foo"])
541
+
542
+ with warnings.catch_warnings(record=True) as w:
543
+ warnings.simplefilter("always", NumbaDebugInfoWarning)
544
+ ignore_internal_warnings()
545
+ foo[1, 1]()
546
+
547
+ self.assertEqual(len(w), 1)
548
+ found = w[0]
549
+ self.assertEqual(found.category, NumbaDebugInfoWarning)
550
+ msg = str(found.message)
551
+ # make sure the warning contains the right message
552
+ self.assertIn("Could not find source for function", msg)
553
+ # and refers to the offending function
554
+ self.assertIn(str(foo.py_func), msg)
555
+
556
+ def test_no_if_op_bools_declared(self):
557
+ @cuda.jit(
558
+ "int64(boolean, boolean)",
559
+ debug=True,
560
+ opt=False,
561
+ _dbg_optnone=True,
562
+ device=True,
563
+ )
564
+ def choice(cond1, cond2):
565
+ """
566
+ CHECK: define void @{{.+}}choices
567
+ """
568
+ if cond1 and cond2:
569
+ return 1
570
+ else:
571
+ return 2
572
+
573
+ ir_content = choice.inspect_llvm()[choice.signatures[0]]
574
+ # We should not declare variables used as the condition in if ops.
575
+ # See Numba PR #9888: https://github.com/numba/numba/pull/9888
576
+
577
+ for line in ir_content.splitlines():
578
+ if "llvm.dbg.declare" in line:
579
+ self.assertNotIn("bool", line)
580
+
581
+ def test_llvm_inliner_flag_conflict(self):
582
+ # bar will be marked as 'alwaysinline', but when DEBUGINFO_DEFAULT is
583
+ # set functions are not marked as 'alwaysinline' and this results in a
584
+ # conflict. baz will not be marked as 'alwaysinline' as a result of
585
+ # DEBUGINFO_DEFAULT
586
+
587
+ @cuda.jit(forceinline=True)
588
+ def bar(x):
589
+ return math.sin(x)
590
+
591
+ @cuda.jit(forceinline=False)
592
+ def baz(x):
593
+ return math.cos(x)
594
+
595
+ @cuda.jit(opt=True)
596
+ def foo(x, y):
597
+ """
598
+ CHECK-LABEL: define void @{{.+}}foo
599
+ CHECK: call i32 @"[[BAR:.+]]"(
600
+ CHECK: call i32 @"[[BAZ:.+]]"(
601
+
602
+ CHECK-DAG: declare i32 @"[[BAR]]"({{.+}}alwaysinline
603
+ CHECK-DAG: declare i32 @"[[BAZ]]"(
604
+ CHECK-DAG: define linkonce_odr i32 @"[[BAR]]"({{.+}}alwaysinline
605
+ CHECK-DAG: define linkonce_odr i32 @"[[BAZ]]"(
606
+ """
607
+ a = bar(y)
608
+ b = baz(y)
609
+ x[0] = a + b
610
+
611
+ # check it compiles
612
+ with override_config("DEBUGINFO_DEFAULT", 1):
613
+ result = cuda.device_array(1, dtype=np.float32)
614
+ foo[1, 1](result, np.pi)
615
+ result.copy_to_host()
616
+
617
+ result_host = math.sin(np.pi) + math.cos(np.pi)
618
+ self.assertPreciseEqual(result[0], result_host)
619
+
620
+ ir_content = foo.inspect_llvm()[foo.signatures[0]]
621
+ self.assertFileCheckMatches(ir_content, foo.__doc__)
622
+
623
+ # Check that the device functions call the appropriate device
624
+ # math functions and have the correct attributes.
625
+ self.assertFileCheckMatches(
626
+ ir_content,
627
+ """
628
+ CHECK: define linkonce_odr i32 @{{.+}}bar
629
+ CHECK-SAME: alwaysinline
630
+ CHECK-NEXT: {
631
+ CHECK-NEXT: {{.*}}:
632
+ CHECK-NEXT: br label {{.*}}
633
+ CHECK-NEXT: {{.*}}:
634
+ CHECK-NEXT: call double @"__nv_sin"
635
+ CHECK-NEXT: store double {{.*}}, double* {{.*}}
636
+ CHECK-NEXT: ret i32 0
637
+ CHECK-NEXT: }
638
+ """,
639
+ )
640
+
641
+ self.assertFileCheckMatches(
642
+ ir_content,
643
+ """
644
+ CHECK: define linkonce_odr i32 @{{.+}}baz
645
+ CHECK-NOT: alwaysinline
646
+ CHECK-NEXT: {
647
+ CHECK-NEXT: {{.*}}:
648
+ CHECK-NEXT: br label {{.*}}
649
+ CHECK-NEXT: {{.*}}:
650
+ CHECK-NEXT: call double @"__nv_cos"
651
+ CHECK-NEXT: store double {{.*}}, double* {{.*}}
652
+ CHECK-NEXT: ret i32 0
653
+ CHECK-NEXT: }
654
+ """,
655
+ )
656
+
657
+ def test_DILocation_versioned_variables(self):
658
+ """Tests that DILocation information for versions of variables matches
659
+ up to their definition site."""
660
+
661
+ @cuda.jit(debug=True, opt=False)
662
+ def foo(dest, n):
663
+ """
664
+ CHECK: define void @{{.+}}foo
665
+ CHECK: store i64 5, i64* %"c{{.+}} !dbg ![[STORE5:.+]]
666
+ CHECK: store i64 1, i64* %"c{{.+}} !dbg ![[STORE1:.+]]
667
+ CHECK: [[STORE5]] = !DILocation(
668
+ CHECK: [[STORE1]] = !DILocation(
669
+ """
670
+ if n:
671
+ c = 5
672
+ else:
673
+ c = 1
674
+ dest[0] = c
675
+
676
+ foo_source_lines, foo_source_lineno = inspect.getsourcelines(
677
+ foo.py_func
678
+ )
679
+
680
+ result = cuda.device_array(1, dtype=np.int32)
681
+ foo[1, 1](result, 1)
682
+ result.copy_to_host()
683
+ self.assertEqual(result[0], 5)
684
+
685
+ ir_content = foo.inspect_llvm()[foo.signatures[0]]
686
+ self.assertFileCheckMatches(ir_content, foo.__doc__)
687
+
688
+ # Collect lines pertaining to the function `foo` and debuginfo
689
+ # metadata
690
+ lines = ir_content.splitlines()
691
+ debuginfo_equals = re.compile(r"!(\d+) = ")
692
+ debug_info_lines = list(
693
+ filter(lambda x: debuginfo_equals.search(x), lines)
694
+ )
695
+
696
+ function_start_regex = re.compile(r"define void @.+foo")
697
+ function_start_lines = list(
698
+ filter(
699
+ lambda x: function_start_regex.search(x[1]), enumerate(lines)
700
+ )
701
+ )
702
+ function_end_lines = list(
703
+ filter(lambda x: x[1] == "}", enumerate(lines))
704
+ )
705
+ foo_ir_lines = lines[
706
+ function_start_lines[0][0] : function_end_lines[0][0]
707
+ ]
708
+
709
+ # Check the if condition's debuginfo
710
+ cond_branch = list(filter(lambda x: "br i1" in x, foo_ir_lines))
711
+ self.assertEqual(len(cond_branch), 1)
712
+ self.assertIn("!dbg", cond_branch[0])
713
+ cond_branch_dbginfo_node = cond_branch[0].split("!dbg")[1].strip()
714
+ cond_branch_dbginfos = list(
715
+ filter(
716
+ lambda x: cond_branch_dbginfo_node + " = " in x,
717
+ debug_info_lines,
718
+ )
719
+ )
720
+ self.assertEqual(len(cond_branch_dbginfos), 1)
721
+ cond_branch_dbginfo = cond_branch_dbginfos[0]
722
+
723
+ # Check debuginfo for the store instructions
724
+ store_1_lines = list(filter(lambda x: "store i64 1" in x, foo_ir_lines))
725
+ store_5_lines = list(filter(lambda x: "store i64 5" in x, foo_ir_lines))
726
+
727
+ self.assertEqual(len(store_1_lines), 2)
728
+ self.assertEqual(len(store_5_lines), 2)
729
+
730
+ store_1_dbginfo_set = set(
731
+ map(lambda x: x.split("!dbg")[1].strip(), store_1_lines)
732
+ )
733
+ store_5_dbginfo_set = set(
734
+ map(lambda x: x.split("!dbg")[1].strip(), store_5_lines)
735
+ )
736
+ self.assertEqual(len(store_1_dbginfo_set), 1)
737
+ self.assertEqual(len(store_5_dbginfo_set), 1)
738
+ store_1_dbginfo_node = store_1_dbginfo_set.pop()
739
+ store_5_dbginfo_node = store_5_dbginfo_set.pop()
740
+ store_1_dbginfos = list(
741
+ filter(
742
+ lambda x: store_1_dbginfo_node + " = " in x, debug_info_lines
743
+ )
744
+ )
745
+ store_5_dbginfos = list(
746
+ filter(
747
+ lambda x: store_5_dbginfo_node + " = " in x, debug_info_lines
748
+ )
749
+ )
750
+ self.assertEqual(len(store_1_dbginfos), 1)
751
+ self.assertEqual(len(store_5_dbginfos), 1)
752
+ store_1_dbginfo = store_1_dbginfos[0]
753
+ store_5_dbginfo = store_5_dbginfos[0]
754
+
755
+ # Ensure the line numbers match what we expect based on the Python source
756
+ line_number_regex = re.compile(r"line: (\d+)")
757
+ LineNumbers = namedtuple(
758
+ "LineNumbers", ["cond_branch", "store_5", "store_1"]
759
+ )
760
+ line_number_matches = LineNumbers(
761
+ *map(
762
+ lambda x: line_number_regex.search(x),
763
+ [cond_branch_dbginfo, store_5_dbginfo, store_1_dbginfo],
764
+ )
765
+ )
766
+ self.assertTrue(
767
+ all(
768
+ map(
769
+ lambda x: x is not None,
770
+ line_number_matches,
771
+ )
772
+ )
773
+ )
774
+ line_numbers = LineNumbers(
775
+ *map(
776
+ lambda x: int(x.group(1)),
777
+ line_number_matches,
778
+ )
779
+ )
780
+ source_line_numbers = LineNumbers(
781
+ *map(
782
+ lambda x: x[0] + foo_source_lineno,
783
+ filter(
784
+ lambda x: "c = " in x[1] or "if n:" in x[1],
785
+ enumerate(foo_source_lines),
786
+ ),
787
+ )
788
+ )
789
+ self.assertEqual(line_numbers, source_line_numbers)
790
+
791
+ def test_debuginfo_asm(self):
792
+ def foo():
793
+ pass
794
+
795
+ foo_debug = cuda.jit(debug=True, opt=False)(foo)
796
+ foo_debug[1, 1]()
797
+ asm = foo_debug.inspect_asm()[foo_debug.signatures[0]]
798
+ self.assertFileCheckMatches(
799
+ asm,
800
+ """
801
+ CHECK: .section{{.+}}.debug
802
+ """,
803
+ )
804
+
805
+ foo_nodebug = cuda.jit(debug=False)(foo)
806
+ foo_nodebug[1, 1]()
807
+ asm = foo_nodebug.inspect_asm()[foo_nodebug.signatures[0]]
808
+ self.assertFileCheckMatches(
809
+ asm,
810
+ """
811
+ CHECK-NOT: .section{{.+}}.debug
812
+ """,
813
+ )
814
+
526
815
 
527
816
  if __name__ == "__main__":
528
817
  unittest.main()
@@ -468,7 +468,7 @@ class TestCudaDebugInfoTypes(CUDATestCase):
468
468
  f"Test DITypes for {sanitize_name(numba_type.name)}"
469
469
  ):
470
470
 
471
- @cuda.jit((numba_type,), debug=True)
471
+ @cuda.jit((numba_type,), debug=True, opt=False)
472
472
  def foo(a):
473
473
  pass
474
474
 
@@ -7,7 +7,6 @@ import threading
7
7
 
8
8
  from numba import (
9
9
  boolean,
10
- config,
11
10
  cuda,
12
11
  float32,
13
12
  float64,
@@ -17,6 +16,7 @@ from numba import (
17
16
  uint32,
18
17
  void,
19
18
  )
19
+ from numba.cuda import config
20
20
  from numba.core.errors import TypingError
21
21
  from numba.cuda.testing import (
22
22
  cc_X_or_above,
@@ -4,7 +4,7 @@
4
4
  from numba import cuda
5
5
  from numba.core.errors import TypingError
6
6
  from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
7
- from numba import config
7
+ from numba.cuda import config
8
8
 
9
9
 
10
10
  def noop(x):
@@ -5,7 +5,7 @@ import numpy as np
5
5
 
6
6
  from numba import cuda
7
7
  from numba.cuda.testing import unittest, xfail_unless_cudasim, CUDATestCase
8
- from numba.core import config
8
+ from numba.cuda.core import config
9
9
 
10
10
 
11
11
  class TestException(CUDATestCase):
@@ -7,7 +7,8 @@ from llvmlite import ir
7
7
 
8
8
  import numpy as np
9
9
  import os
10
- from numba import config, cuda, njit, types
10
+ from numba import cuda, njit, types
11
+ from numba.cuda import config
11
12
  from numba.extending import overload
12
13
 
13
14
 
@@ -13,7 +13,7 @@ from numba.cuda.testing import (
13
13
 
14
14
  @skip_on_cudasim("Cudasim does not support inline and forceinline")
15
15
  class TestCudaInline(CUDATestCase):
16
- def _test_call_inline(self, inline):
16
+ def _test_call_inline(self, inline, inline_expected):
17
17
  """Test @cuda.jit(inline=...)"""
18
18
  a = np.ones(2, dtype=np.int32)
19
19
 
@@ -36,12 +36,10 @@ class TestCudaInline(CUDATestCase):
36
36
  pat = r"call [a-zA-Z0-9]* @"
37
37
  match = re.compile(pat).search(llvm_ir)
38
38
 
39
- if inline == "always" or inline is True:
39
+ if inline_expected:
40
40
  # check that call was inlined
41
41
  self.assertIsNone(match, msg=llvm_ir)
42
42
  else:
43
- assert inline == "never" or inline is False
44
-
45
43
  # check that call was not inlined
46
44
  self.assertIsNotNone(match, msg=llvm_ir)
47
45
 
@@ -49,16 +47,28 @@ class TestCudaInline(CUDATestCase):
49
47
  self.assertNotIn("alwaysinline", llvm_ir)
50
48
 
51
49
  def test_call_inline_always(self):
52
- self._test_call_inline("always")
50
+ self._test_call_inline("always", True)
53
51
 
54
52
  def test_call_inline_never(self):
55
- self._test_call_inline("never")
53
+ self._test_call_inline("never", False)
56
54
 
57
55
  def test_call_inline_true(self):
58
- self._test_call_inline(True)
56
+ self._test_call_inline(True, True)
59
57
 
60
58
  def test_call_inline_false(self):
61
- self._test_call_inline(False)
59
+ self._test_call_inline(False, False)
60
+
61
+ def test_call_inline_costmodel_false(self):
62
+ def cost_model(expr, caller_info, callee_info):
63
+ return False
64
+
65
+ self._test_call_inline(cost_model, False)
66
+
67
+ def test_call_inline_costmodel_true(self):
68
+ def cost_model(expr, caller_info, callee_info):
69
+ return True
70
+
71
+ self._test_call_inline(cost_model, True)
62
72
 
63
73
  def _test_call_forceinline(self, forceinline):
64
74
  """Test @cuda.jit(forceinline=...)"""