numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (172) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +7 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
  129. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  130. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  134. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  139. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  141. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  143. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  146. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  147. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  148. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  151. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  152. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  153. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  154. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  155. numba_cuda/numba/cuda/tests/support.py +55 -15
  156. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  157. numba_cuda/numba/cuda/types.py +56 -0
  158. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  159. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  160. numba_cuda/numba/cuda/typing/context.py +751 -0
  161. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  162. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  163. numba_cuda/numba/cuda/typing/templates.py +7 -6
  164. numba_cuda/numba/cuda/ufuncs.py +3 -3
  165. numba_cuda/numba/cuda/utils.py +6 -112
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
  167. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
  168. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
  172. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,17 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """Compatibility module.
5
+
6
+ It can be necessary to load files generated by previous versions of cloudpickle
7
+ that rely on symbols being defined under the `cloudpickle.cloudpickle_fast`
8
+ namespace.
9
+
10
+ See: tests/test_backward_compat.py
11
+ """
12
+
13
+ from . import cloudpickle
14
+
15
+
16
+ def __getattr__(name):
17
+ return getattr(cloudpickle, name)
@@ -3,7 +3,7 @@
3
3
 
4
4
  from llvmlite import ir
5
5
 
6
- from numba.core import config
6
+ from numba.cuda.core import config
7
7
  from numba.cuda import serialize
8
8
  from .cudadrv import devices, driver, nvvm, runtime, nvrtc
9
9
  from numba.cuda.core.codegen import Codegen, CodeLibrary
@@ -3,49 +3,256 @@
3
3
 
4
4
  from llvmlite import ir
5
5
  from collections import namedtuple
6
+ from warnings import warn, catch_warnings, simplefilter
7
+ import copy
8
+
6
9
  from numba.core import ir as numba_ir
7
- from numba.cuda import cgutils, typing
8
10
  from numba.core import (
9
11
  types,
10
- funcdesc,
11
- config,
12
- compiler,
12
+ bytecode,
13
13
  )
14
- from numba.core.compiler import (
15
- sanitize_compile_result_entries,
16
- DefaultPassBuilder,
14
+ from numba.cuda.core.options import ParallelOptions
15
+ from numba.core.compiler_lock import global_compiler_lock
16
+ from numba.core.errors import NumbaWarning, NumbaInvalidConfigWarning
17
+ from numba.cuda.core.interpreter import Interpreter
18
+
19
+ from numba.cuda import cgutils, typing, lowering, nvvmutils, utils
20
+ from numba.cuda.api import get_current_device
21
+ from numba.cuda.codegen import ExternalCodeLibrary
22
+
23
+ from numba.cuda.core import (
24
+ inline_closurecall,
25
+ sigutils,
26
+ postproc,
27
+ config,
28
+ funcdesc,
17
29
  )
30
+ from numba.cuda.cudadrv import nvvm, nvrtc
31
+ from numba.cuda.descriptor import cuda_target
32
+ from numba.cuda.flags import CUDAFlags
33
+ from numba.cuda.target import CUDACABICallConv
18
34
  from numba.cuda.core.compiler import CompilerBase
19
- from numba.core.compiler_lock import global_compiler_lock
20
- from numba.core.compiler_machinery import (
35
+ from numba.cuda.core.compiler_machinery import (
21
36
  FunctionPass,
22
37
  LoweringPass,
23
38
  PassManager,
24
39
  register_pass,
25
40
  )
26
- from numba.core.errors import NumbaInvalidConfigWarning
27
- from numba.core.untyped_passes import TranslateByteCode
28
- from numba.core.typed_passes import (
29
- IRLegalization,
41
+ from numba.cuda.core.untyped_passes import (
42
+ TranslateByteCode,
43
+ FixupArgs,
44
+ IRProcessing,
45
+ DeadBranchPrune,
46
+ RewriteSemanticConstants,
47
+ InlineClosureLikes,
48
+ GenericRewrites,
49
+ WithLifting,
50
+ InlineInlinables,
51
+ FindLiterallyCalls,
52
+ MakeFunctionToJitFunction,
53
+ LiteralUnroll,
54
+ ReconstructSSA,
55
+ RewriteDynamicRaises,
56
+ LiteralPropagationSubPipelinePass,
57
+ )
58
+ from numba.cuda.core.typed_passes import (
59
+ BaseNativeLowering,
60
+ NativeLowering,
30
61
  AnnotateTypes,
62
+ IRLegalization,
63
+ NopythonTypeInference,
64
+ NopythonRewrites,
65
+ InlineOverloads,
66
+ PreLowerStripPhis,
67
+ NoPythonSupportedFeatureValidation,
31
68
  )
32
- from warnings import warn
33
- from numba.cuda import nvvmutils
34
- from numba.cuda.api import get_current_device
35
- from numba.cuda.codegen import ExternalCodeLibrary
36
- from numba.cuda.core.typed_passes import BaseNativeLowering
37
- from numba.cuda.core import sigutils
38
- from numba.cuda.cudadrv import nvvm, nvrtc
39
- from numba.cuda.descriptor import cuda_target
40
- from numba.cuda.flags import CUDAFlags
41
- from numba.cuda.target import CUDACABICallConv
42
- from numba.cuda import lowering, utils
43
- from numba.core.utils import PYVERSION
44
69
 
45
- if PYVERSION < (3, 10):
46
- from numba.core.interpreter import Interpreter
47
- else:
48
- from numba.cuda.core.interpreter import Interpreter
70
+
71
+ _LowerResult = namedtuple(
72
+ "_LowerResult",
73
+ [
74
+ "fndesc",
75
+ "call_helper",
76
+ "cfunc",
77
+ "env",
78
+ ],
79
+ )
80
+
81
+
82
+ def sanitize_compile_result_entries(entries):
83
+ keys = set(entries.keys())
84
+ fieldset = set(CR_FIELDS)
85
+ badnames = keys - fieldset
86
+ if badnames:
87
+ raise NameError(*badnames)
88
+ missing = fieldset - keys
89
+ for k in missing:
90
+ entries[k] = None
91
+ # Avoid keeping alive traceback variables
92
+ err = entries["typing_error"]
93
+ if err is not None:
94
+ entries["typing_error"] = err.with_traceback(None)
95
+ return entries
96
+
97
+
98
+ def run_frontend(func, inline_closures=False, emit_dels=False):
99
+ """
100
+ Run the compiler frontend over the given Python function, and return
101
+ the function's canonical Numba IR.
102
+
103
+ If inline_closures is Truthy then closure inlining will be run
104
+ If emit_dels is Truthy the ir.Del nodes will be emitted appropriately
105
+ """
106
+ # XXX make this a dedicated Pipeline?
107
+ func_id = bytecode.FunctionIdentity.from_function(func)
108
+ interp = Interpreter(func_id)
109
+ bc = bytecode.ByteCode(func_id=func_id)
110
+ func_ir = interp.interpret(bc)
111
+ if inline_closures:
112
+ inline_pass = inline_closurecall.InlineClosureCallPass(
113
+ func_ir, ParallelOptions(False), {}, False
114
+ )
115
+ inline_pass.run()
116
+ post_proc = postproc.PostProcessor(func_ir)
117
+ post_proc.run(emit_dels)
118
+ return func_ir
119
+
120
+
121
+ class DefaultPassBuilder(object):
122
+ """
123
+ This is the default pass builder, it contains the "classic" default
124
+ pipelines as pre-canned PassManager instances:
125
+ - nopython
126
+ - objectmode
127
+ - interpreted
128
+ - typed
129
+ - untyped
130
+ - nopython lowering
131
+ """
132
+
133
+ @staticmethod
134
+ def define_nopython_pipeline(state, name="nopython"):
135
+ """Returns an nopython mode pipeline based PassManager"""
136
+ # compose pipeline from untyped, typed and lowering parts
137
+ dpb = DefaultPassBuilder
138
+ pm = PassManager(name)
139
+ untyped_passes = dpb.define_untyped_pipeline(state)
140
+ pm.passes.extend(untyped_passes.passes)
141
+
142
+ typed_passes = dpb.define_typed_pipeline(state)
143
+ pm.passes.extend(typed_passes.passes)
144
+
145
+ lowering_passes = dpb.define_nopython_lowering_pipeline(state)
146
+ pm.passes.extend(lowering_passes.passes)
147
+
148
+ pm.finalize()
149
+ return pm
150
+
151
+ @staticmethod
152
+ def define_nopython_lowering_pipeline(state, name="nopython_lowering"):
153
+ pm = PassManager(name)
154
+ # legalise
155
+ pm.add_pass(
156
+ NoPythonSupportedFeatureValidation,
157
+ "ensure features that are in use are in a valid form",
158
+ )
159
+ pm.add_pass(IRLegalization, "ensure IR is legal prior to lowering")
160
+ # Annotate only once legalized
161
+ pm.add_pass(AnnotateTypes, "annotate types")
162
+ # lower
163
+ pm.add_pass(NativeLowering, "native lowering")
164
+ pm.add_pass(CUDABackend, "nopython mode backend")
165
+ pm.finalize()
166
+ return pm
167
+
168
+ @staticmethod
169
+ def define_parfor_gufunc_nopython_lowering_pipeline(
170
+ state, name="parfor_gufunc_nopython_lowering"
171
+ ):
172
+ pm = PassManager(name)
173
+ # legalise
174
+ pm.add_pass(
175
+ NoPythonSupportedFeatureValidation,
176
+ "ensure features that are in use are in a valid form",
177
+ )
178
+ pm.add_pass(IRLegalization, "ensure IR is legal prior to lowering")
179
+ # Annotate only once legalized
180
+ pm.add_pass(AnnotateTypes, "annotate types")
181
+ # lower
182
+ pm.add_pass(NativeLowering, "native lowering")
183
+ pm.add_pass(CUDABackend, "nopython mode backend")
184
+ pm.finalize()
185
+ return pm
186
+
187
+ @staticmethod
188
+ def define_typed_pipeline(state, name="typed"):
189
+ """Returns the typed part of the nopython pipeline"""
190
+ pm = PassManager(name)
191
+ # typing
192
+ pm.add_pass(NopythonTypeInference, "nopython frontend")
193
+
194
+ # strip phis
195
+ pm.add_pass(PreLowerStripPhis, "remove phis nodes")
196
+
197
+ # optimisation
198
+ pm.add_pass(InlineOverloads, "inline overloaded functions")
199
+ if not state.flags.no_rewrites:
200
+ pm.add_pass(NopythonRewrites, "nopython rewrites")
201
+
202
+ pm.finalize()
203
+ return pm
204
+
205
+ @staticmethod
206
+ def define_untyped_pipeline(state, name="untyped"):
207
+ """Returns an untyped part of the nopython pipeline"""
208
+ pm = PassManager(name)
209
+ if state.func_ir is None:
210
+ pm.add_pass(TranslateByteCode, "analyzing bytecode")
211
+ pm.add_pass(FixupArgs, "fix up args")
212
+ pm.add_pass(IRProcessing, "processing IR")
213
+ pm.add_pass(WithLifting, "Handle with contexts")
214
+
215
+ # inline closures early in case they are using nonlocal's
216
+ # see issue #6585.
217
+ pm.add_pass(
218
+ InlineClosureLikes, "inline calls to locally defined closures"
219
+ )
220
+
221
+ # pre typing
222
+ if not state.flags.no_rewrites:
223
+ pm.add_pass(RewriteSemanticConstants, "rewrite semantic constants")
224
+ pm.add_pass(DeadBranchPrune, "dead branch pruning")
225
+ pm.add_pass(GenericRewrites, "nopython rewrites")
226
+
227
+ pm.add_pass(RewriteDynamicRaises, "rewrite dynamic raises")
228
+
229
+ # convert any remaining closures into functions
230
+ pm.add_pass(
231
+ MakeFunctionToJitFunction,
232
+ "convert make_function into JIT functions",
233
+ )
234
+ # inline functions that have been determined as inlinable and rerun
235
+ # branch pruning, this needs to be run after closures are inlined as
236
+ # the IR repr of a closure masks call sites if an inlinable is called
237
+ # inside a closure
238
+ pm.add_pass(InlineInlinables, "inline inlinable functions")
239
+ if not state.flags.no_rewrites:
240
+ pm.add_pass(DeadBranchPrune, "dead branch pruning")
241
+
242
+ pm.add_pass(FindLiterallyCalls, "find literally calls")
243
+ pm.add_pass(LiteralUnroll, "handles literal_unroll")
244
+
245
+ if state.flags.enable_ssa:
246
+ pm.add_pass(ReconstructSSA, "ssa")
247
+
248
+ if not state.flags.no_rewrites:
249
+ pm.add_pass(DeadBranchPrune, "dead branch pruning")
250
+
251
+ pm.add_pass(LiteralPropagationSubPipelinePass, "Literal propagation")
252
+
253
+ pm.finalize()
254
+ return pm
255
+
49
256
 
50
257
  # The CUDACompileResult (CCR) has a specially-defined entry point equal to its
51
258
  # id. This is because the entry point is used as a key into a dict of
@@ -344,6 +551,142 @@ class CUDACompiler(CompilerBase):
344
551
  return pm
345
552
 
346
553
 
554
+ def compile_extra(
555
+ typingctx,
556
+ targetctx,
557
+ func,
558
+ args,
559
+ return_type,
560
+ flags,
561
+ locals,
562
+ library=None,
563
+ pipeline_class=CUDACompiler,
564
+ ):
565
+ """Compiler entry point
566
+
567
+ Parameter
568
+ ---------
569
+ typingctx :
570
+ typing context
571
+ targetctx :
572
+ target context
573
+ func : function
574
+ the python function to be compiled
575
+ args : tuple, list
576
+ argument types
577
+ return_type :
578
+ Use ``None`` to indicate void return
579
+ flags : numba.compiler.Flags
580
+ compiler flags
581
+ library : numba.codegen.CodeLibrary
582
+ Used to store the compiled code.
583
+ If it is ``None``, a new CodeLibrary is used.
584
+ pipeline_class : type like numba.compiler.CompilerBase
585
+ compiler pipeline
586
+ """
587
+ pipeline = pipeline_class(
588
+ typingctx, targetctx, library, args, return_type, flags, locals
589
+ )
590
+ return pipeline.compile_extra(func)
591
+
592
+
593
+ def compile_ir(
594
+ typingctx,
595
+ targetctx,
596
+ func_ir,
597
+ args,
598
+ return_type,
599
+ flags,
600
+ locals,
601
+ lifted=(),
602
+ lifted_from=None,
603
+ is_lifted_loop=False,
604
+ library=None,
605
+ pipeline_class=CUDACompiler,
606
+ ):
607
+ """
608
+ Compile a function with the given IR.
609
+
610
+ For internal use only.
611
+ """
612
+
613
+ # This is a special branch that should only run on IR from a lifted loop
614
+ if is_lifted_loop:
615
+ # This code is pessimistic and costly, but it is a not often trodden
616
+ # path and it will go away once IR is made immutable. The problem is
617
+ # that the rewrite passes can mutate the IR into a state that makes
618
+ # it possible for invalid tokens to be transmitted to lowering which
619
+ # then trickle through into LLVM IR and causes RuntimeErrors as LLVM
620
+ # cannot compile it. As a result the following approach is taken:
621
+ # 1. Create some new flags that copy the original ones but switch
622
+ # off rewrites.
623
+ # 2. Compile with 1. to get a compile result
624
+ # 3. Try and compile another compile result but this time with the
625
+ # original flags (and IR being rewritten).
626
+ # 4. If 3 was successful, use the result, else use 2.
627
+
628
+ # create flags with no rewrites
629
+ norw_flags = copy.deepcopy(flags)
630
+ norw_flags.no_rewrites = True
631
+
632
+ def compile_local(the_ir, the_flags):
633
+ pipeline = pipeline_class(
634
+ typingctx,
635
+ targetctx,
636
+ library,
637
+ args,
638
+ return_type,
639
+ the_flags,
640
+ locals,
641
+ )
642
+ return pipeline.compile_ir(
643
+ func_ir=the_ir, lifted=lifted, lifted_from=lifted_from
644
+ )
645
+
646
+ # compile with rewrites off, IR shouldn't be mutated irreparably
647
+ norw_cres = compile_local(func_ir.copy(), norw_flags)
648
+
649
+ # try and compile with rewrites on if no_rewrites was not set in the
650
+ # original flags, IR might get broken but we've got a CompileResult
651
+ # that's usable from above.
652
+ rw_cres = None
653
+ if not flags.no_rewrites:
654
+ # Suppress warnings in compilation retry
655
+ with catch_warnings():
656
+ simplefilter("ignore", NumbaWarning)
657
+ try:
658
+ rw_cres = compile_local(func_ir.copy(), flags)
659
+ except Exception:
660
+ pass
661
+ # if the rewrite variant of compilation worked, use it, else use
662
+ # the norewrites backup
663
+ if rw_cres is not None:
664
+ cres = rw_cres
665
+ else:
666
+ cres = norw_cres
667
+ return cres
668
+
669
+ else:
670
+ pipeline = pipeline_class(
671
+ typingctx, targetctx, library, args, return_type, flags, locals
672
+ )
673
+ return pipeline.compile_ir(
674
+ func_ir=func_ir, lifted=lifted, lifted_from=lifted_from
675
+ )
676
+
677
+
678
+ def compile_internal(
679
+ typingctx, targetctx, library, func, args, return_type, flags, locals
680
+ ):
681
+ """
682
+ For internal use only.
683
+ """
684
+ pipeline = CUDACompiler(
685
+ typingctx, targetctx, library, args, return_type, flags, locals
686
+ )
687
+ return pipeline.compile_extra(func)
688
+
689
+
347
690
  @global_compiler_lock
348
691
  def compile_cuda(
349
692
  pyfunc,
@@ -403,7 +746,7 @@ def compile_cuda(
403
746
  from numba.core.target_extension import target_override
404
747
 
405
748
  with target_override("cuda"):
406
- cres = compiler.compile_extra(
749
+ cres = compile_extra(
407
750
  typingctx=typingctx,
408
751
  targetctx=targetctx,
409
752
  func=pyfunc,