numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (172) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +7 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
  129. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  130. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  134. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  139. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  141. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  143. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  146. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  147. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  148. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  151. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  152. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  153. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  154. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  155. numba_cuda/numba/cuda/tests/support.py +55 -15
  156. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  157. numba_cuda/numba/cuda/types.py +56 -0
  158. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  159. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  160. numba_cuda/numba/cuda/typing/context.py +751 -0
  161. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  162. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  163. numba_cuda/numba/cuda/typing/templates.py +7 -6
  164. numba_cuda/numba/cuda/ufuncs.py +3 -3
  165. numba_cuda/numba/cuda/utils.py +6 -112
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
  167. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
  168. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
  172. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
4
  from numba.core import errors, types
5
- from numba.core.typing.npydecl import (
5
+ from numba.cuda.typing.npydecl import (
6
6
  parse_dtype,
7
7
  parse_shape,
8
8
  register_number_classes,
@@ -9,6 +9,6 @@
9
9
 
10
10
  """
11
11
 
12
- from numba.core import config
12
+ from numba.cuda.core import config
13
13
 
14
14
  assert not config.ENABLE_CUDASIM, "Cannot use real driver API with simulator"
@@ -19,7 +19,8 @@ import numba
19
19
  from numba import _devicearray
20
20
  from numba.cuda.cudadrv import devices, dummyarray
21
21
  from numba.cuda.cudadrv import driver as _driver
22
- from numba.core import types, config
22
+ from numba.core import types
23
+ from numba.cuda.core import config
23
24
  from numba.np.unsafe.ndarray import to_fixed_tuple
24
25
  from numba.np.numpy_support import numpy_version
25
26
  from numba.np import numpy_support
@@ -47,7 +47,7 @@ from collections import namedtuple, deque
47
47
 
48
48
 
49
49
  from numba import mviewbuf
50
- from numba.core import config
50
+ from numba.cuda.core import config
51
51
  from numba.cuda import utils, serialize
52
52
  from .error import CudaSupportError, CudaDriverError
53
53
  from .drvapi import API_PROTOTYPES
@@ -82,12 +82,6 @@ _py_incref = ctypes.pythonapi.Py_IncRef
82
82
  _py_decref.argtypes = [ctypes.py_object]
83
83
  _py_incref.argtypes = [ctypes.py_object]
84
84
 
85
-
86
- _MVC_ERROR_MESSAGE = (
87
- "Minor version compatibility requires ptxcompiler and cubinlinker packages "
88
- "to be available"
89
- )
90
-
91
85
  USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
92
86
 
93
87
  if USE_NV_BINDING:
@@ -137,7 +131,7 @@ def _have_nvjitlink():
137
131
  nvjitlink_internal._inspect_function_pointer("__nvJitLinkVersion")
138
132
  != 0
139
133
  )
140
- except NotSupportedError:
134
+ except (RuntimeError, NotSupportedError):
141
135
  # no driver
142
136
  return False
143
137
 
@@ -161,12 +155,6 @@ class CudaAPIError(CudaDriverError):
161
155
 
162
156
 
163
157
  def locate_driver_and_loader():
164
- envpath = config.CUDA_DRIVER
165
-
166
- if envpath == "0":
167
- # Force fail
168
- _raise_driver_not_found()
169
-
170
158
  # Determine DLL type
171
159
  if sys.platform == "win32":
172
160
  dlloader = ctypes.WinDLL
@@ -182,26 +170,11 @@ def locate_driver_and_loader():
182
170
  dldir = ["/usr/lib", "/usr/lib64"]
183
171
  dlnames = ["libcuda.so", "libcuda.so.1"]
184
172
 
185
- if envpath:
186
- try:
187
- envpath = os.path.abspath(envpath)
188
- except ValueError:
189
- raise ValueError(
190
- "NUMBA_CUDA_DRIVER %s is not a valid path" % envpath
191
- )
192
- if not os.path.isfile(envpath):
193
- raise ValueError(
194
- "NUMBA_CUDA_DRIVER %s is not a valid file "
195
- "path. Note it must be a filepath of the .so/"
196
- ".dll/.dylib or the driver" % envpath
197
- )
198
- candidates = [envpath]
199
- else:
200
- # First search for the name in the default library path.
201
- # If that is not found, try the specific path.
202
- candidates = dlnames + [
203
- os.path.join(x, y) for x, y in product(dldir, dlnames)
204
- ]
173
+ # First search for the name in the default library path.
174
+ # If that is not found, try specific common paths.
175
+ candidates = dlnames + [
176
+ os.path.join(x, y) for x, y in product(dldir, dlnames)
177
+ ]
205
178
 
206
179
  return dlloader, candidates
207
180
 
@@ -237,9 +210,7 @@ def find_driver():
237
210
 
238
211
  DRIVER_NOT_FOUND_MSG = """
239
212
  CUDA driver library cannot be found.
240
- If you are sure that a CUDA driver is installed,
241
- try setting environment variable NUMBA_CUDA_DRIVER
242
- with the file path of the CUDA driver shared library.
213
+ Ensure that a compatible NVIDIA driver is installed and available on your system path.
243
214
  """
244
215
 
245
216
  DRIVER_LOAD_ERROR_MSG = """
@@ -2842,10 +2813,7 @@ class _LinkerBase(metaclass=ABCMeta):
2842
2813
  def add_cu(self, cu, name):
2843
2814
  """Add CUDA source in a string to the link. The name of the source
2844
2815
  file should be specified in `name`."""
2845
- with driver.get_active_context() as ac:
2846
- dev = driver.get_device(ac.devnum)
2847
- cc = dev.compute_capability
2848
- ptx, log = nvrtc.compile(cu, name, cc)
2816
+ ptx, log = nvrtc.compile(cu, name, self.cc)
2849
2817
 
2850
2818
  if config.DUMP_ASSEMBLY:
2851
2819
  print(("ASSEMBLY %s" % name).center(80, "-"))
@@ -3009,10 +2977,7 @@ class _Linker(_LinkerBase):
3009
2977
  self._object_codes.append(obj)
3010
2978
 
3011
2979
  def add_cu(self, cu, name="<cudapy-cu>"):
3012
- with driver.get_active_context() as ac:
3013
- dev = driver.get_device(ac.devnum)
3014
- cc = dev.compute_capability
3015
- obj, log = nvrtc.compile(cu, name, cc, ltoir=self.lto)
2980
+ obj, log = nvrtc.compile(cu, name, self.cc, ltoir=self.lto)
3016
2981
 
3017
2982
  if not self.lto and config.DUMP_ASSEMBLY:
3018
2983
  print(("ASSEMBLY %s" % name).center(80, "-"))
@@ -3099,101 +3064,6 @@ class _Linker(_LinkerBase):
3099
3064
  return result
3100
3065
 
3101
3066
 
3102
- class MVCLinker(_LinkerBase):
3103
- """
3104
- Linker supporting Minor Version Compatibility, backed by the cubinlinker
3105
- package.
3106
- """
3107
-
3108
- def __init__(self, max_registers=None, lineinfo=False, cc=None):
3109
- try:
3110
- from cubinlinker import CubinLinker
3111
- except ImportError as err:
3112
- raise ImportError(_MVC_ERROR_MESSAGE) from err
3113
-
3114
- if cc is None:
3115
- raise RuntimeError(
3116
- "MVCLinker requires Compute Capability to be "
3117
- "specified, but cc is None"
3118
- )
3119
-
3120
- super().__init__(max_registers, lineinfo, cc)
3121
-
3122
- arch = f"sm_{cc[0] * 10 + cc[1]}"
3123
- ptx_compile_opts = ["--gpu-name", arch, "-c"]
3124
- if max_registers:
3125
- arg = f"--maxrregcount={max_registers}"
3126
- ptx_compile_opts.append(arg)
3127
- if lineinfo:
3128
- ptx_compile_opts.append("--generate-line-info")
3129
- self.ptx_compile_options = tuple(ptx_compile_opts)
3130
-
3131
- self._linker = CubinLinker(f"--arch={arch}")
3132
-
3133
- @property
3134
- def info_log(self):
3135
- return self._linker.info_log
3136
-
3137
- @property
3138
- def error_log(self):
3139
- return self._linker.error_log
3140
-
3141
- def add_ptx(self, ptx, name="<cudapy-ptx>"):
3142
- try:
3143
- from ptxcompiler import compile_ptx
3144
- from cubinlinker import CubinLinkerError
3145
- except ImportError as err:
3146
- raise ImportError(_MVC_ERROR_MESSAGE) from err
3147
- compile_result = compile_ptx(ptx.decode(), self.ptx_compile_options)
3148
- try:
3149
- self._linker.add_cubin(compile_result.compiled_program, name)
3150
- except CubinLinkerError as e:
3151
- raise LinkerError from e
3152
-
3153
- def add_data(self, data, kind, name):
3154
- msg = "Adding in-memory data unsupported in the MVC linker"
3155
- raise LinkerError(msg)
3156
-
3157
- def add_file(self, path, kind):
3158
- try:
3159
- from cubinlinker import CubinLinkerError
3160
- except ImportError as err:
3161
- raise ImportError(_MVC_ERROR_MESSAGE) from err
3162
-
3163
- try:
3164
- data = cached_file_read(path, how="rb")
3165
- except FileNotFoundError:
3166
- raise LinkerError(f"{path} not found")
3167
-
3168
- name = pathlib.Path(path).name
3169
- if kind == FILE_EXTENSION_MAP["cubin"]:
3170
- fn = self._linker.add_cubin
3171
- elif kind == FILE_EXTENSION_MAP["fatbin"]:
3172
- fn = self._linker.add_fatbin
3173
- elif kind == FILE_EXTENSION_MAP["a"]:
3174
- raise LinkerError(f"Don't know how to link {kind}")
3175
- elif kind == FILE_EXTENSION_MAP["ptx"]:
3176
- return self.add_ptx(data, name)
3177
- else:
3178
- raise LinkerError(f"Don't know how to link {kind}")
3179
-
3180
- try:
3181
- fn(data, name)
3182
- except CubinLinkerError as e:
3183
- raise LinkerError from e
3184
-
3185
- def complete(self):
3186
- try:
3187
- from cubinlinker import CubinLinkerError
3188
- except ImportError as err:
3189
- raise ImportError(_MVC_ERROR_MESSAGE) from err
3190
-
3191
- try:
3192
- return self._linker.complete()
3193
- except CubinLinkerError as e:
3194
- raise LinkerError from e
3195
-
3196
-
3197
3067
  class CtypesLinker(_LinkerBase):
3198
3068
  """
3199
3069
  Links for current device if no CC given
@@ -3218,6 +3088,7 @@ class CtypesLinker(_LinkerBase):
3218
3088
  if lineinfo:
3219
3089
  options[enums.CU_JIT_GENERATE_LINE_INFO] = c_void_p(1)
3220
3090
 
3091
+ self.cc = cc
3221
3092
  if cc is None:
3222
3093
  # No option value is needed, but we need something as a placeholder
3223
3094
  options[enums.CU_JIT_TARGET_FROM_CUCONTEXT] = 1
@@ -5,25 +5,118 @@ from collections import namedtuple
5
5
  import itertools
6
6
  import functools
7
7
  import operator
8
- import ctypes
9
8
 
10
- import numpy as np
11
-
12
- from numba import _helperlib
13
9
 
14
10
  Extent = namedtuple("Extent", ["begin", "end"])
15
11
 
16
- attempt_nocopy_reshape = ctypes.CFUNCTYPE(
17
- ctypes.c_int,
18
- ctypes.c_long, # nd
19
- np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # dims
20
- np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # strides
21
- ctypes.c_long, # newnd
22
- np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # newdims
23
- np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # newstrides
24
- ctypes.c_long, # itemsize
25
- ctypes.c_int, # is_f_order
26
- )(_helperlib.c_helpers["attempt_nocopy_reshape"])
12
+
13
+ def attempt_nocopy_reshape(
14
+ nd, dims, strides, newnd, newdims, newstrides, itemsize, is_f_order
15
+ ):
16
+ """
17
+ Attempt to reshape an array without copying data.
18
+
19
+ This function should correctly handle all reshapes, including
20
+ axes of length 1. Zero strides should work but are untested.
21
+
22
+ If a copy is needed, returns 0
23
+ If no copy is needed, returns 1 and fills `newstrides`
24
+ with appropriate strides
25
+ """
26
+
27
+ olddims = []
28
+ oldstrides = []
29
+ oldnd = 0
30
+
31
+ # Remove axes with dimension 1 from the old array. They have no effect
32
+ # but would need special cases since their strides do not matter.
33
+ for oi in range(nd):
34
+ if dims[oi] != 1:
35
+ olddims.append(dims[oi])
36
+ oldstrides.append(strides[oi])
37
+ oldnd += 1
38
+
39
+ # Calculate total sizes
40
+ np_total = 1
41
+ for ni in range(newnd):
42
+ np_total *= newdims[ni]
43
+
44
+ op_total = 1
45
+ for oi in range(oldnd):
46
+ op_total *= olddims[oi]
47
+
48
+ if np_total != op_total:
49
+ # Different total sizes; no hope
50
+ return 0
51
+
52
+ if np_total == 0:
53
+ # Handle zero-sized arrays
54
+ # Just make the strides vaguely reasonable
55
+ # (they can have any value in theory).
56
+ for i in range(newnd):
57
+ newstrides[i] = itemsize
58
+ return 1
59
+
60
+ # oi to oj and ni to nj give the axis ranges currently worked with
61
+ oi = 0
62
+ oj = 1
63
+ ni = 0
64
+ nj = 1
65
+
66
+ while ni < newnd and oi < oldnd:
67
+ np = newdims[ni]
68
+ op = olddims[oi]
69
+
70
+ while np != op:
71
+ if np < op:
72
+ # Misses trailing 1s, these are handled later
73
+ np *= newdims[nj]
74
+ nj += 1
75
+ else:
76
+ op *= olddims[oj]
77
+ oj += 1
78
+
79
+ # Check whether the original axes can be combined
80
+ for ok in range(oi, oj - 1):
81
+ if is_f_order:
82
+ if oldstrides[ok + 1] != olddims[ok] * oldstrides[ok]:
83
+ # not contiguous enough
84
+ return 0
85
+ else:
86
+ # C order
87
+ if oldstrides[ok] != olddims[ok + 1] * oldstrides[ok + 1]:
88
+ # not contiguous enough
89
+ return 0
90
+
91
+ # Calculate new strides for all axes currently worked with
92
+ if is_f_order:
93
+ newstrides[ni] = oldstrides[oi]
94
+ for nk in range(ni + 1, nj):
95
+ newstrides[nk] = newstrides[nk - 1] * newdims[nk - 1]
96
+ else:
97
+ # C order
98
+ newstrides[nj - 1] = oldstrides[oj - 1]
99
+ for nk in range(nj - 1, ni, -1):
100
+ newstrides[nk - 1] = newstrides[nk] * newdims[nk]
101
+
102
+ ni = nj
103
+ nj += 1
104
+ oi = oj
105
+ oj += 1
106
+
107
+ # Set strides corresponding to trailing 1s of the new shape
108
+ if ni >= 1:
109
+ last_stride = newstrides[ni - 1]
110
+ else:
111
+ last_stride = itemsize
112
+
113
+ if is_f_order:
114
+ last_stride *= newdims[ni - 1]
115
+
116
+ for nk in range(ni, newnd):
117
+ newstrides[nk] = last_stride
118
+
119
+ return 1
27
120
 
28
121
 
29
122
  class Dim(object):
@@ -333,18 +426,12 @@ class Array(object):
333
426
  else:
334
427
  raise AssertionError("unreachable")
335
428
  else:
336
- newstrides = np.empty(newnd, np.ctypeslib.c_intp)
337
-
338
- # need to keep these around in variables, not temporaries, so they
339
- # don't get GC'ed before we call into the C code
340
- olddims = np.array(self.shape, dtype=np.ctypeslib.c_intp)
341
- oldstrides = np.array(self.strides, dtype=np.ctypeslib.c_intp)
342
- newdims = np.array(newdims, dtype=np.ctypeslib.c_intp)
429
+ newstrides = [0] * newnd
343
430
 
344
431
  if not attempt_nocopy_reshape(
345
432
  oldnd,
346
- olddims,
347
- oldstrides,
433
+ self.shape,
434
+ self.strides,
348
435
  newnd,
349
436
  newdims,
350
437
  newstrides,
@@ -16,11 +16,11 @@ import os
16
16
  import sys
17
17
  import ctypes
18
18
 
19
- from numba.misc.findlib import find_lib
19
+ from numba.cuda.misc.findlib import find_lib
20
20
  from numba.cuda.cuda_paths import get_cuda_paths
21
21
  from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
22
22
  from numba.cuda.cudadrv.error import CudaSupportError
23
- from numba.core import config
23
+ from numba.cuda.core import config
24
24
 
25
25
 
26
26
  if sys.platform == "win32":
@@ -54,9 +54,9 @@ def get_cudalib(lib, static=False):
54
54
  """
55
55
  if lib in {"nvrtc", "nvvm"}:
56
56
  return get_cuda_paths()[lib].info or _dllnamepattern % lib
57
- else:
58
- dir_type = "static_cudalib_dir" if static else "cudalib_dir"
59
- libdir = get_cuda_paths()[dir_type].info
57
+
58
+ dir_type = "static_cudalib_dir" if static else "cudalib_dir"
59
+ libdir = get_cuda_paths()[dir_type].info
60
60
 
61
61
  candidates = find_lib(lib, libdir, static=static)
62
62
  namepattern = _staticnamepattern if static else _dllnamepattern
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
- from numba import config
4
+ from numba.cuda import config
5
5
  from . import enums
6
6
 
7
7
  if config.CUDA_USE_NVIDIA_BINDING:
@@ -10,7 +10,7 @@ from numba.cuda.cudadrv.error import (
10
10
  NvrtcCompilationError,
11
11
  NvrtcSupportError,
12
12
  )
13
- from numba import config
13
+ from numba.cuda import config
14
14
  from numba.cuda.cuda_paths import get_cuda_paths
15
15
  from numba.cuda.utils import _readenv
16
16
 
@@ -21,8 +21,8 @@ import warnings
21
21
 
22
22
  NVRTC_EXTRA_SEARCH_PATHS = _readenv(
23
23
  "NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS", str, ""
24
- ) or getattr(config, "NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS", "")
25
- if not hasattr(config, "NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS"):
24
+ ) or getattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS", "")
25
+ if not hasattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS"):
26
26
  config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = NVRTC_EXTRA_SEARCH_PATHS
27
27
 
28
28
  # Opaque handle for compilation unit
@@ -347,15 +347,26 @@ def compile(src, name, cc, ltoir=False):
347
347
  arch = f"--gpu-architecture=compute_{major}{minor}"
348
348
 
349
349
  cuda_include_dir = get_cuda_paths()["include_dir"].info
350
- cuda_includes = [
351
- f"{cuda_include_dir}",
352
- f"{os.path.join(cuda_include_dir, 'cccl')}",
353
- ]
350
+ cuda_includes = [f"{cuda_include_dir}"]
354
351
 
355
352
  cudadrv_path = os.path.dirname(os.path.abspath(__file__))
356
353
  numba_cuda_path = os.path.dirname(cudadrv_path)
357
354
 
358
- numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
355
+ nvrtc_ver_major = version[0]
356
+ if nvrtc_ver_major == 12:
357
+ numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
358
+ # For CUDA 12 wheels, `cuda_include_dir` is `site-packages/nvidia/cuda_runtime/include`
359
+ # We need to find CCCL at `site-packages/nvidia/cuda_cccl/include`
360
+ # For CUDA 12 conda / system install, CCCL is just in the `include` directory
361
+ cuda_includes.append(
362
+ f"{os.path.join(cuda_include_dir, '..', '..', 'cuda_cccl', 'include')}"
363
+ )
364
+ elif nvrtc_ver_major == 13:
365
+ numba_include = f"{os.path.join(numba_cuda_path, 'include', '13')}"
366
+ # For CUDA 13 wheels, `cuda_include_dir` is `site-packages/nvidia/cu13/include`
367
+ # We need to find CCCL at `site-packages/nvidia/cu13/include/cccl`
368
+ # For CUDA 13 conda / system install, CCCL is in the `include/cccl` directory
369
+ cuda_includes.append(f"{os.path.join(cuda_include_dir, 'cccl')}")
359
370
 
360
371
  if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
361
372
  extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
@@ -154,10 +154,7 @@ class NVVM(object):
154
154
  inst.driver = open_cudalib("nvvm")
155
155
  except OSError as e:
156
156
  cls.__INSTANCE = None
157
- errmsg = (
158
- "libNVVM cannot be found. Do `conda install "
159
- "cudatoolkit`:\n%s"
160
- )
157
+ errmsg = "libNVVM cannot be found. Please install the cuda-toolkit conda package:\n%s"
161
158
  raise NvvmSupportError(errmsg % e)
162
159
 
163
160
  # Find & populate functions
@@ -8,7 +8,7 @@ The toolkit version can now be obtained from NVRTC, so we don't use a binding
8
8
  to the runtime anymore. This file is provided to maintain the existing API.
9
9
  """
10
10
 
11
- from numba import config
11
+ from numba.cuda import config
12
12
  from numba.cuda.cudadrv.nvrtc import NVRTC
13
13
 
14
14
 
@@ -10,7 +10,7 @@ from llvmlite import ir
10
10
  import llvmlite.binding as ll
11
11
 
12
12
  from numba.core.imputils import Registry
13
- from numba.core.typing.npydecl import parse_dtype
13
+ from numba.cuda.typing.npydecl import parse_dtype
14
14
  from numba.core.datamodel import models
15
15
  from numba.core import types
16
16
  from numba.cuda import cgutils
@@ -25,6 +25,10 @@ registry = Registry()
25
25
  lower = registry.lower
26
26
  lower_attr = registry.lower_getattr
27
27
  lower_constant = registry.lower_constant
28
+ lower_getattr_generic = registry.lower_getattr_generic
29
+ lower_setattr = registry.lower_setattr
30
+ lower_setattr_generic = registry.lower_setattr_generic
31
+ lower_cast = registry.lower_cast
28
32
 
29
33
 
30
34
  def initialize_dim3(builder, prefix):
@@ -1,15 +1,98 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
+ import abc
4
5
  import os
6
+ from contextlib import contextmanager
5
7
 
6
8
  from llvmlite import ir
7
- from numba.core import types, config
9
+ from numba.core import types
10
+ from numba.cuda.core import config
8
11
  from numba.cuda import cgutils
9
12
  from numba.core.datamodel.models import ComplexModel, UnionModel, UniTupleModel
10
- from numba.core.debuginfo import AbstractDIBuilder
11
13
  from numba.cuda.types import GridGroup
12
14
 
15
+
16
+ @contextmanager
17
+ def suspend_emission(builder):
18
+ """Suspends the emission of debug_metadata for the duration of the context
19
+ managed block."""
20
+ ref = builder.debug_metadata
21
+ builder.debug_metadata = None
22
+ try:
23
+ yield
24
+ finally:
25
+ builder.debug_metadata = ref
26
+
27
+
28
+ class AbstractDIBuilder(metaclass=abc.ABCMeta):
29
+ @abc.abstractmethod
30
+ def mark_variable(
31
+ self,
32
+ builder,
33
+ allocavalue,
34
+ name,
35
+ lltype,
36
+ size,
37
+ line,
38
+ datamodel=None,
39
+ argidx=None,
40
+ ):
41
+ """Emit debug info for the variable."""
42
+ pass
43
+
44
+ @abc.abstractmethod
45
+ def mark_location(self, builder, line):
46
+ """Emit source location information to the given IRBuilder."""
47
+ pass
48
+
49
+ @abc.abstractmethod
50
+ def mark_subprogram(self, function, qualname, argnames, argtypes, line):
51
+ """Emit source location information for the given function."""
52
+ pass
53
+
54
+ @abc.abstractmethod
55
+ def initialize(self):
56
+ """Initialize the debug info. An opportunity for the debuginfo to
57
+ prepare any necessary data structures.
58
+ """
59
+
60
+ @abc.abstractmethod
61
+ def finalize(self):
62
+ """Finalize the debuginfo by emitting all necessary metadata."""
63
+ pass
64
+
65
+
66
+ class DummyDIBuilder(AbstractDIBuilder):
67
+ def __init__(self, module, filepath, cgctx, directives_only):
68
+ pass
69
+
70
+ def mark_variable(
71
+ self,
72
+ builder,
73
+ allocavalue,
74
+ name,
75
+ lltype,
76
+ size,
77
+ line,
78
+ datamodel=None,
79
+ argidx=None,
80
+ ):
81
+ pass
82
+
83
+ def mark_location(self, builder, line):
84
+ pass
85
+
86
+ def mark_subprogram(self, function, qualname, argnames, argtypes, line):
87
+ pass
88
+
89
+ def initialize(self):
90
+ pass
91
+
92
+ def finalize(self):
93
+ pass
94
+
95
+
13
96
  _BYTE_SIZE = 8
14
97
 
15
98
 
@@ -2,10 +2,10 @@
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
4
  from warnings import warn
5
- from numba.core import types, config
5
+ from numba.core import types
6
6
  from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
7
7
  from numba.cuda.compiler import declare_device_function
8
- from numba.cuda.core import sigutils
8
+ from numba.cuda.core import sigutils, config
9
9
  from numba.cuda.dispatcher import CUDADispatcher
10
10
  from numba.cuda.simulator.kernel import FakeCUDAKernel
11
11
  from numba.cuda.cudadrv.driver import _have_nvjitlink
@@ -199,7 +199,7 @@ def jit(
199
199
  raise TypeError("CUDA kernel must have void return type.")
200
200
 
201
201
  if device:
202
- from numba.core import typeinfer
202
+ from numba.cuda.core import typeinfer
203
203
 
204
204
  with typeinfer.register_dispatcher(disp):
205
205
  disp.compile_device(argtypes, restype)
@@ -1,8 +1,7 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
- from numba.core.descriptors import TargetDescriptor
5
- from numba.core.options import TargetOptions
4
+ from numba.cuda.core.options import TargetOptions
6
5
  from .target import CUDATargetContext, CUDATypingContext
7
6
 
8
7
 
@@ -10,7 +9,7 @@ class CUDATargetOptions(TargetOptions):
10
9
  pass
11
10
 
12
11
 
13
- class CUDATarget(TargetDescriptor):
12
+ class CUDATarget:
14
13
  def __init__(self, name):
15
14
  self.options = CUDATargetOptions
16
15
  # The typing and target contexts are initialized only when needed -
@@ -18,7 +17,7 @@ class CUDATarget(TargetDescriptor):
18
17
  # systems that might not have them present.
19
18
  self._typingctx = None
20
19
  self._targetctx = None
21
- super().__init__(name)
20
+ self._target_name = name
22
21
 
23
22
  @property
24
23
  def typing_context(self):