numba-cuda 0.23.0__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +4 -1
  3. numba_cuda/numba/cuda/_compat.py +47 -0
  4. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  5. numba_cuda/numba/cuda/cext/_dispatcher.cpp +8 -2
  6. numba_cuda/numba/cuda/cext/_hashtable.cpp +5 -0
  7. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  8. numba_cuda/numba/cuda/cext/_pymodule.h +1 -1
  9. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  10. numba_cuda/numba/cuda/cext/_typeof.cpp +56 -8
  11. numba_cuda/numba/cuda/cext/mviewbuf.c +7 -1
  12. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  13. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +4 -5
  14. numba_cuda/numba/cuda/codegen.py +4 -2
  15. numba_cuda/numba/cuda/compiler.py +5 -5
  16. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +1 -1
  17. numba_cuda/numba/cuda/core/base.py +6 -10
  18. numba_cuda/numba/cuda/core/bytecode.py +21 -13
  19. numba_cuda/numba/cuda/core/byteflow.py +336 -90
  20. numba_cuda/numba/cuda/core/compiler.py +3 -4
  21. numba_cuda/numba/cuda/core/compiler_machinery.py +3 -3
  22. numba_cuda/numba/cuda/core/config.py +5 -7
  23. numba_cuda/numba/cuda/core/controlflow.py +17 -9
  24. numba_cuda/numba/cuda/core/inline_closurecall.py +11 -10
  25. numba_cuda/numba/cuda/core/interpreter.py +255 -96
  26. numba_cuda/numba/cuda/core/ir_utils.py +8 -17
  27. numba_cuda/numba/cuda/core/pythonapi.py +3 -0
  28. numba_cuda/numba/cuda/core/rewrites/static_binop.py +1 -1
  29. numba_cuda/numba/cuda/core/ssa.py +2 -2
  30. numba_cuda/numba/cuda/core/transforms.py +4 -6
  31. numba_cuda/numba/cuda/core/typed_passes.py +1 -1
  32. numba_cuda/numba/cuda/core/typeinfer.py +3 -3
  33. numba_cuda/numba/cuda/core/untyped_passes.py +11 -10
  34. numba_cuda/numba/cuda/cpython/unicode.py +2 -2
  35. numba_cuda/numba/cuda/cpython/unicode_support.py +1 -3
  36. numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -4
  37. numba_cuda/numba/cuda/cudadrv/driver.py +13 -11
  38. numba_cuda/numba/cuda/cudadrv/nvrtc.py +71 -32
  39. numba_cuda/numba/cuda/debuginfo.py +10 -79
  40. numba_cuda/numba/cuda/deviceufunc.py +3 -6
  41. numba_cuda/numba/cuda/dispatcher.py +5 -19
  42. numba_cuda/numba/cuda/libdeviceimpl.py +1 -2
  43. numba_cuda/numba/cuda/lowering.py +0 -28
  44. numba_cuda/numba/cuda/memory_management/nrt.py +1 -1
  45. numba_cuda/numba/cuda/np/arrayobj.py +7 -9
  46. numba_cuda/numba/cuda/np/numpy_support.py +7 -10
  47. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +4 -3
  48. numba_cuda/numba/cuda/testing.py +4 -8
  49. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +66 -4
  50. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  51. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +2 -2
  52. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +1 -1
  53. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +26 -4
  54. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +61 -9
  55. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +6 -0
  56. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +12 -1
  57. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +13 -0
  58. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +1 -1
  59. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  60. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +12 -7
  61. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  62. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
  63. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +37 -35
  64. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +8 -7
  65. numba_cuda/numba/cuda/tests/support.py +11 -0
  66. numba_cuda/numba/cuda/types/cuda_functions.py +1 -1
  67. numba_cuda/numba/cuda/typing/asnumbatype.py +37 -2
  68. numba_cuda/numba/cuda/typing/typeof.py +9 -16
  69. {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/METADATA +4 -13
  70. {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/RECORD +74 -73
  71. {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/WHEEL +0 -0
  72. {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE +0 -0
  73. {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE.numba +0 -0
  74. {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,6 @@ from llvmlite import ir as llvm_ir
11
11
  from numba.cuda import HAS_NUMBA
12
12
  from numba.cuda.core import ir
13
13
  from numba.cuda import debuginfo, cgutils, utils, typing, types
14
- from numba import cuda
15
14
  from numba.cuda.core import (
16
15
  ir_utils,
17
16
  targetconfig,
@@ -1684,31 +1683,10 @@ class Lower(BaseLower):
1684
1683
 
1685
1684
 
1686
1685
  class CUDALower(Lower):
1687
- def _is_shared_array_call(self, fnty):
1688
- # Check if function type is a cuda.shared.array call
1689
- if not hasattr(fnty, "typing_key"):
1690
- return False
1691
- return fnty.typing_key is cuda.shared.array
1692
-
1693
- def _lower_call_normal(self, fnty, expr, signature):
1694
- # Set flag for subsequent store to track shared address space
1695
- if self.context.enable_debuginfo and self._is_shared_array_call(fnty):
1696
- self._pending_shared_store = True
1697
-
1698
- return super()._lower_call_normal(fnty, expr, signature)
1699
-
1700
1686
  def storevar(self, value, name, argidx=None):
1701
1687
  """
1702
1688
  Store the value into the given variable.
1703
1689
  """
1704
- # Track address space for debug info
1705
- if self.context.enable_debuginfo and self._pending_shared_store:
1706
- from numba.cuda.cudadrv import nvvm
1707
-
1708
- self._addrspace_map[name] = nvvm.ADDRSPACE_SHARED
1709
- if not name.startswith("$") and not name.startswith("."):
1710
- self._pending_shared_store = False
1711
-
1712
1690
  # Handle polymorphic variables with CUDA_DEBUG_POLY enabled
1713
1691
  if config.CUDA_DEBUG_POLY:
1714
1692
  src_name = name.split(".")[0]
@@ -1834,12 +1812,6 @@ class CUDALower(Lower):
1834
1812
  """
1835
1813
  super().pre_lower()
1836
1814
 
1837
- # Track address space for debug info
1838
- self._addrspace_map = {}
1839
- self._pending_shared_store = False
1840
- if self.context.enable_debuginfo:
1841
- self.debuginfo._set_addrspace_map(self._addrspace_map)
1842
-
1843
1815
  # Track polymorphic variables for debug info
1844
1816
  self.poly_var_typ_map = {}
1845
1817
  self.poly_var_loc_map = {}
@@ -16,7 +16,7 @@ from numba.cuda.cudadrv.driver import (
16
16
  _to_core_stream,
17
17
  _have_nvjitlink,
18
18
  )
19
- from cuda.core.experimental import LaunchConfig, launch
19
+ from numba.cuda._compat import LaunchConfig, launch
20
20
  from numba.cuda.cudadrv import devices
21
21
  from numba.cuda.api import get_current_device
22
22
  from numba.cuda.utils import _readenv, cached_file_read
@@ -1798,10 +1798,10 @@ def numpy_broadcast_arrays(*args):
1798
1798
  tup = tuple_setitem(tup, i, shape[i])
1799
1799
 
1800
1800
  # numpy checks if the input arrays have the same shape as `shape`
1801
- outs = []
1802
- for array in literal_unroll(args):
1803
- outs.append(np.broadcast_to(np.asarray(array), tup))
1804
- return outs
1801
+ return [
1802
+ np.broadcast_to(np.asarray(array), tup)
1803
+ for array in literal_unroll(args)
1804
+ ]
1805
1805
 
1806
1806
  return impl
1807
1807
 
@@ -4822,13 +4822,11 @@ def _parse_shape(context, builder, ty, val):
4822
4822
  ndim = ty.count
4823
4823
  passed_shapes = cgutils.unpack_tuple(builder, val, count=ndim)
4824
4824
 
4825
- shapes = []
4826
- for s in passed_shapes:
4827
- shapes.append(safecast_intp(context, builder, s.type, s))
4825
+ shapes = [safecast_intp(context, builder, s.type, s) for s in passed_shapes]
4828
4826
 
4829
4827
  zero = context.get_constant_generic(builder, types.intp, 0)
4830
- for dim in range(ndim):
4831
- is_neg = builder.icmp_signed("<", shapes[dim], zero)
4828
+ for shape in shapes:
4829
+ is_neg = builder.icmp_signed("<", shape, zero)
4832
4830
  with cgutils.if_unlikely(builder, is_neg):
4833
4831
  context.call_conv.return_user_exc(
4834
4832
  builder, ValueError, ("negative dimensions not allowed",)
@@ -4,6 +4,7 @@
4
4
  import collections
5
5
  import ctypes
6
6
  import itertools
7
+ import functools
7
8
  import operator
8
9
  import re
9
10
 
@@ -21,11 +22,12 @@ from numba.cuda.cgutils import is_nonelike # noqa: F401
21
22
  numpy_version = tuple(map(int, np.__version__.split(".")[:2]))
22
23
 
23
24
 
25
+ @functools.lru_cache
24
26
  def strides_from_shape(
25
27
  shape: tuple[int, ...], itemsize: int, *, order: str
26
28
  ) -> tuple[int, ...]:
27
29
  """Compute strides for a contiguous array with given shape and order."""
28
- if len(shape) == 0:
30
+ if not shape:
29
31
  # 0-D arrays have empty strides
30
32
  return ()
31
33
  limits = slice(1, None) if order == "C" else slice(None, -1)
@@ -118,16 +120,11 @@ def from_dtype(dtype):
118
120
  elif getattr(dtype, "fields", None) is not None:
119
121
  return from_struct_dtype(dtype)
120
122
 
121
- try:
122
- return FROM_DTYPE[dtype]
123
- except KeyError:
124
- pass
123
+ result = FROM_DTYPE.get(dtype)
124
+ if result is not None:
125
+ return result
125
126
 
126
- try:
127
- char = dtype.char
128
- except AttributeError:
129
- pass
130
- else:
127
+ if (char := getattr(dtype, "char", None)) is not None:
131
128
  if char in "SU":
132
129
  return _from_str_dtype(dtype)
133
130
  if char in "mM":
@@ -122,9 +122,10 @@ def polyutils_as_series(alist, trim=True):
122
122
 
123
123
  def impl(alist, trim=True):
124
124
  if tuple_input:
125
- arrays = []
126
- for item in literal_unroll(alist):
127
- arrays.append(np.atleast_1d(np.asarray(item)).astype(res_dtype))
125
+ arrays = [
126
+ np.atleast_1d(np.asarray(item)).astype(res_dtype)
127
+ for item in literal_unroll(alist)
128
+ ]
128
129
 
129
130
  elif list_input:
130
131
  arrays = [
@@ -276,14 +276,6 @@ def skip_if_curand_kernel_missing(fn):
276
276
  return unittest.skipUnless(curand_kernel_h_file, reason)(fn)
277
277
 
278
278
 
279
- def skip_if_mvc_enabled(reason):
280
- """Skip a test if Minor Version Compatibility is enabled"""
281
- assert isinstance(reason, str)
282
- return unittest.skipIf(
283
- config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY, reason
284
- )
285
-
286
-
287
279
  def cc_X_or_above(major, minor):
288
280
  if not config.ENABLE_CUDASIM:
289
281
  cc = devices.get_context().device.compute_capability
@@ -308,6 +300,10 @@ def skip_unless_cc_75(fn):
308
300
  return unittest.skipUnless(cc_X_or_above(7, 5), "requires cc >= 7.5")(fn)
309
301
 
310
302
 
303
+ def skip_unless_cc_90(fn):
304
+ return unittest.skipUnless(cc_X_or_above(9, 0), "requires cc >= 9.0")(fn)
305
+
306
+
311
307
  def xfail_unless_cudasim(fn):
312
308
  if config.ENABLE_CUDASIM:
313
309
  return fn
@@ -2,21 +2,25 @@
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
4
  from ctypes import c_int, sizeof
5
+ import cffi
6
+ import numpy as np
5
7
 
6
8
  from numba.cuda.cudadrv.driver import host_to_device, device_to_host, driver
7
- from cuda.core.experimental import (
9
+ from numba.cuda._compat import (
8
10
  LaunchConfig,
11
+ Device,
9
12
  Stream as ExperimentalStream,
10
13
  launch,
11
14
  )
12
15
 
13
16
  from numba import cuda
14
- from numba.cuda.cudadrv import devices
15
- from numba.cuda.testing import unittest, CUDATestCase
17
+ from numba.cuda.cudadrv import devices, nvrtc
18
+ from numba.cuda.testing import unittest, CUDATestCase, skip_unless_cc_90
16
19
  from numba.cuda.testing import skip_on_cudasim
20
+ from numba.cuda.tests.support import override_config
21
+ from numba.core import types
17
22
  import contextlib
18
23
 
19
- from cuda.core.experimental import Device
20
24
 
21
25
  ptx1 = """
22
26
  .version 1.4
@@ -391,5 +395,63 @@ class TestDevice(CUDATestCase):
391
395
  self.assertRegex(dev.uuid, uuid_format)
392
396
 
393
397
 
398
+ @skip_on_cudasim("CUDA asm unsupported in the simulator")
399
+ class TestAcceleratedArchitecture(CUDATestCase):
400
+ @skip_unless_cc_90
401
+ def test_device_arch_specific(self):
402
+ set_desc = cuda.CUSource("""
403
+ #include <cuda_fp16.h>
404
+
405
+ extern "C" __device__
406
+ int set_descriptor(int *out, int* smem) {
407
+ unsigned usmem = __cvta_generic_to_shared(smem);
408
+ asm volatile("tensormap.replace.tile.rank.shared::cta.b1024.b32 [%0], 2;" :: "r"(usmem));
409
+ return 0;
410
+ }
411
+ """)
412
+
413
+ set_descriptor = cuda.declare_device(
414
+ "set_descriptor",
415
+ types.int32(types.CPointer(types.int32)),
416
+ link=[set_desc],
417
+ )
418
+
419
+ ffi = cffi.FFI()
420
+
421
+ @cuda.jit
422
+ def kernel(a):
423
+ sm = cuda.shared.array(1, dtype=np.int32)
424
+ data_ptr = ffi.from_buffer(sm)
425
+ set_descriptor(data_ptr)
426
+
427
+ # just to prevent optimization:
428
+ sm[0] = 2
429
+ cuda.syncthreads()
430
+ a[0] = sm[0]
431
+
432
+ a = np.ones(1, dtype=np.int32)
433
+
434
+ kernel[1, 1](a)
435
+
436
+ assert a[0] == 2
437
+
438
+ def test_get_arch_option_force_cc(self):
439
+ with override_config("FORCE_CUDA_CC", (8, 0)):
440
+ arch = nvrtc.get_arch_option(9, 0, "a")
441
+ self.assertEqual("compute_80", arch)
442
+
443
+ def test_get_arch_option_force_cc_arch_specific(self):
444
+ with override_config("FORCE_CUDA_CC", (9, 0, "a")):
445
+ arch = nvrtc.get_arch_option(9, 0)
446
+ self.assertEqual("compute_90a", arch)
447
+
448
+ def test_get_arch_option_illegal_arch_specific(self):
449
+ # Using a fictitious very high compute capability (major 99) for this
450
+ # test to ensure future toolkits are unlikely to provide an exact match
451
+ msg = "Can't use arch-specific compute_990a with"
452
+ with self.assertRaisesRegex(ValueError, msg):
453
+ nvrtc.get_arch_option(99, 0, "a")
454
+
455
+
394
456
  if __name__ == "__main__":
395
457
  unittest.main()
@@ -4,7 +4,7 @@
4
4
  import numpy as np
5
5
  from numba import cuda
6
6
  from numba.cuda.testing import unittest, CUDATestCase
7
- from cuda.core.experimental import Device
7
+ from numba.cuda._compat import Device
8
8
  from numba.cuda.testing import skip_on_cudasim
9
9
 
10
10
 
@@ -15,7 +15,7 @@ from numba.cuda import require_context
15
15
  from numba import cuda
16
16
  from numba.cuda import void, float64, int64, int32, float32
17
17
  from numba.cuda.typing.typeof import typeof
18
- from cuda.core.experimental._utils.cuda_utils import CUDAError
18
+ from numba.cuda._compat import CUDAError
19
19
 
20
20
  CONST1D = np.arange(10, dtype=np.float64)
21
21
 
@@ -196,7 +196,7 @@ class TestLinker(CUDATestCase):
196
196
 
197
197
  link = str(test_data_dir / "error.cu")
198
198
 
199
- from cuda.core.experimental._utils.cuda_utils import NVRTCError
199
+ from numba.cuda._compat import NVRTCError
200
200
 
201
201
  errty = NVRTCError
202
202
  with self.assertRaises(errty) as e:
@@ -13,7 +13,7 @@ from numba.cuda.testing import (
13
13
  CUDATestCase,
14
14
  skip_on_cudasim,
15
15
  )
16
- from cuda.core.experimental import ObjectCode
16
+ from numba.cuda._compat import ObjectCode
17
17
 
18
18
  if not config.ENABLE_CUDASIM:
19
19
  from cuda.bindings.driver import cuLibraryGetGlobal, cuMemcpyHtoD
@@ -43,6 +43,12 @@ if TEST_BIN_DIR:
43
43
  TEST_BIN_DIR, "test_device_functions.ltoir"
44
44
  )
45
45
 
46
+ require_cuobjdump = (
47
+ test_device_functions_fatbin_multi,
48
+ test_device_functions_fatbin,
49
+ test_device_functions_o,
50
+ )
51
+
46
52
 
47
53
  @unittest.skipIf(
48
54
  not TEST_BIN_DIR or not _have_nvjitlink(),
@@ -127,14 +133,22 @@ class TestLinkerDumpAssembly(CUDATestCase):
127
133
  super().tearDown()
128
134
 
129
135
  def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly(self):
130
- files = [
136
+ files = (
131
137
  test_device_functions_cu,
132
138
  test_device_functions_ltoir,
133
139
  test_device_functions_fatbin_multi,
134
- ]
140
+ )
135
141
 
136
142
  for file in files:
137
143
  with self.subTest(file=file):
144
+ if (
145
+ file in require_cuobjdump
146
+ and os.getenv("NUMBA_CUDA_TEST_WHEEL_ONLY") is not None
147
+ ):
148
+ self.skipTest(
149
+ "wheel-only environments do not have cuobjdump"
150
+ )
151
+
138
152
  f = io.StringIO()
139
153
  with contextlib.redirect_stdout(f):
140
154
  sig = "uint32(uint32, uint32)"
@@ -151,16 +165,24 @@ class TestLinkerDumpAssembly(CUDATestCase):
151
165
  self.assertTrue("ASSEMBLY (AFTER LTO)" in f.getvalue())
152
166
 
153
167
  def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn(self):
154
- files = [
168
+ files = (
155
169
  test_device_functions_a,
156
170
  test_device_functions_cubin,
157
171
  test_device_functions_fatbin,
158
172
  test_device_functions_o,
159
173
  test_device_functions_ptx,
160
- ]
174
+ )
161
175
 
162
176
  for file in files:
163
177
  with self.subTest(file=file):
178
+ if (
179
+ file in require_cuobjdump
180
+ and os.getenv("NUMBA_CUDA_TEST_WHEEL_ONLY") is not None
181
+ ):
182
+ self.skipTest(
183
+ "wheel-only environments do not have cuobjdump"
184
+ )
185
+
164
186
  sig = "uint32(uint32, uint32)"
165
187
  add_from_numba = cuda.declare_device("add_from_numba", sig)
166
188
 
@@ -854,13 +854,25 @@ class TestBranchPrunePredicates(TestBranchPruneBase):
854
854
  _CONST2 = "PLACEHOLDER2"
855
855
  return _CONST2 + 4
856
856
 
857
- new = self._literal_const_sample_generator(impl, {1: 0, 3: 20})
857
+ if PYVERSION in ((3, 14),):
858
+ # The order of the __code__.co_consts changes with 3.14
859
+ new = self._literal_const_sample_generator(impl, {0: 0, 2: 20})
860
+ elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
861
+ new = self._literal_const_sample_generator(impl, {1: 0, 3: 20})
862
+ else:
863
+ raise NotImplementedError(PYVERSION)
858
864
  iconst = impl.__code__.co_consts
859
865
  nconst = new.__code__.co_consts
860
- self.assertEqual(
861
- iconst, (None, "PLACEHOLDER1", 3.14159, "PLACEHOLDER2", 4)
862
- )
863
- self.assertEqual(nconst, (None, 0, 3.14159, 20, 4))
866
+ if PYVERSION in ((3, 14),):
867
+ self.assertEqual(iconst, ("PLACEHOLDER1", 3.14159, "PLACEHOLDER2"))
868
+ self.assertEqual(nconst, (0, 3.14159, 20))
869
+ elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
870
+ self.assertEqual(
871
+ iconst, (None, "PLACEHOLDER1", 3.14159, "PLACEHOLDER2", 4)
872
+ )
873
+ self.assertEqual(nconst, (None, 0, 3.14159, 20, 4))
874
+ else:
875
+ raise NotImplementedError(PYVERSION)
864
876
  self.assertEqual(impl(None), 3.14159)
865
877
  self.assertEqual(new(None), 24)
866
878
 
@@ -872,7 +884,17 @@ class TestBranchPrunePredicates(TestBranchPruneBase):
872
884
 
873
885
  for c_inp, prune in (self._TRUTHY, False), (self._FALSEY, True):
874
886
  for const in c_inp:
875
- func = self._literal_const_sample_generator(impl, {1: const})
887
+ if PYVERSION in ((3, 14),):
888
+ # The order of the __code__.co_consts changes with 3.14
889
+ func = self._literal_const_sample_generator(
890
+ impl, {0: const}
891
+ )
892
+ elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
893
+ func = self._literal_const_sample_generator(
894
+ impl, {1: const}
895
+ )
896
+ else:
897
+ raise NotImplementedError(PYVERSION)
876
898
  self.assert_prune(
877
899
  func, (types.NoneType("none"),), [prune], None
878
900
  )
@@ -885,7 +907,17 @@ class TestBranchPrunePredicates(TestBranchPruneBase):
885
907
 
886
908
  for c_inp, prune in (self._TRUTHY, False), (self._FALSEY, True):
887
909
  for const in c_inp:
888
- func = self._literal_const_sample_generator(impl, {1: const})
910
+ if PYVERSION in ((3, 14),):
911
+ # The order of the __code__.co_consts changes with 3.14
912
+ func = self._literal_const_sample_generator(
913
+ impl, {0: const}
914
+ )
915
+ elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
916
+ func = self._literal_const_sample_generator(
917
+ impl, {1: const}
918
+ )
919
+ else:
920
+ raise NotImplementedError(PYVERSION)
889
921
  self.assert_prune(
890
922
  func, (types.NoneType("none"),), [prune], None
891
923
  )
@@ -900,7 +932,17 @@ class TestBranchPrunePredicates(TestBranchPruneBase):
900
932
 
901
933
  for c_inp, prune in (self._TRUTHY, False), (self._FALSEY, True):
902
934
  for const in c_inp:
903
- func = self._literal_const_sample_generator(impl, {1: const})
935
+ if PYVERSION in ((3, 14),):
936
+ # The order of the __code__.co_consts changes with 3.14
937
+ func = self._literal_const_sample_generator(
938
+ impl, {0: const}
939
+ )
940
+ elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
941
+ func = self._literal_const_sample_generator(
942
+ impl, {1: const}
943
+ )
944
+ else:
945
+ raise NotImplementedError(PYVERSION)
904
946
  self.assert_prune(
905
947
  func, (types.NoneType("none"),), [prune], None
906
948
  )
@@ -915,7 +957,17 @@ class TestBranchPrunePredicates(TestBranchPruneBase):
915
957
 
916
958
  for c_inp, prune in (self._TRUTHY, False), (self._FALSEY, True):
917
959
  for const in c_inp:
918
- func = self._literal_const_sample_generator(impl, {1: const})
960
+ if PYVERSION in ((3, 14),):
961
+ # The order of the __code__.co_consts changes with 3.14
962
+ func = self._literal_const_sample_generator(
963
+ impl, {0: const}
964
+ )
965
+ elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
966
+ func = self._literal_const_sample_generator(
967
+ impl, {1: const}
968
+ )
969
+ else:
970
+ raise NotImplementedError(PYVERSION)
919
971
  self.assert_prune(
920
972
  func, (types.NoneType("none"),), [prune], None
921
973
  )
@@ -592,6 +592,12 @@ def atomic_cas_2dim(res, old, ary, fill_val):
592
592
  old[gid] = cuda.atomic.cas(res, gid, fill_val, ary[gid])
593
593
 
594
594
 
595
+ @unittest.skipIf(
596
+ not config.ENABLE_CUDASIM
597
+ and cuda.get_current_device().compute_capability >= (12, 0)
598
+ and cuda.cudadrv.runtime.get_version()[0] == 12,
599
+ reason="NVVM 12.9 Bugged on CC 10+",
600
+ )
595
601
  class TestCudaAtomics(CUDATestCase):
596
602
  def setUp(self):
597
603
  super().setUp()
@@ -13,6 +13,7 @@ from numba.cuda import (
13
13
  compile_all,
14
14
  LinkableCode,
15
15
  )
16
+ from numba.cuda.cudadrv import nvrtc
16
17
  from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
17
18
 
18
19
  TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
@@ -557,7 +558,7 @@ class TestCompile(unittest.TestCase):
557
558
  link_obj = LinkableCode.from_path(link)
558
559
  if link_obj.kind == "cu":
559
560
  # if link is a cu file, result contains a compiled object code
560
- from cuda.core.experimental import ObjectCode
561
+ from numba.cuda._compat import ObjectCode
561
562
 
562
563
  assert isinstance(code_list[1], ObjectCode)
563
564
  else:
@@ -661,6 +662,16 @@ class TestCompileOnlyTests(unittest.TestCase):
661
662
  ),
662
663
  )
663
664
 
665
+ def test_compile_ptx_arch_specific(self):
666
+ ptx, resty = cuda.compile_ptx(lambda: None, tuple(), cc=(9, 0, "a"))
667
+ self.assertIn(".target sm_90a", ptx)
668
+
669
+ if nvrtc._get_nvrtc_version() >= (12, 9):
670
+ ptx, resty = cuda.compile_ptx(
671
+ lambda: None, tuple(), cc=(10, 0, "f")
672
+ )
673
+ self.assertIn(".target sm_100f", ptx)
674
+
664
675
 
665
676
  @skip_on_cudasim("Compilation unsupported in the simulator")
666
677
  class TestCompileWithLaunchBounds(unittest.TestCase):
@@ -3,12 +3,15 @@
3
3
 
4
4
  import math
5
5
  import itertools
6
+ import sys
6
7
 
7
8
  import numpy as np
9
+ import pytest
8
10
 
9
11
  from numba.cuda.testing import unittest, CUDATestCase
10
12
  from numba.cuda import types
11
13
  from numba import cuda
14
+ from numba.cuda import config
12
15
  from numba.cuda.tests.cudapy.complex_usecases import (
13
16
  real_usecase,
14
17
  imag_usecase,
@@ -275,6 +278,10 @@ class TestCMath(BaseComplexTest):
275
278
  def test_log(self):
276
279
  self.check_unary_func(log_usecase)
277
280
 
281
+ @pytest.mark.xfail(
282
+ sys.version_info[:2] >= (3, 14),
283
+ reason="python 3.14 cmath.log behavior is different than previous versions",
284
+ )
278
285
  def test_log_base(self):
279
286
  values = list(itertools.product(self.more_values(), self.more_values()))
280
287
  value_types = [
@@ -333,6 +340,12 @@ class TestCMath(BaseComplexTest):
333
340
  self.check_unary_func(tanh_usecase, ulps=2, ignore_sign_on_zero=True)
334
341
 
335
342
 
343
+ @unittest.skipIf(
344
+ not config.ENABLE_CUDASIM
345
+ and cuda.get_current_device().compute_capability >= (12, 0)
346
+ and cuda.cudadrv.runtime.get_version()[0] == 12,
347
+ reason="NVVM 12.9 Bugged on CC 10+",
348
+ )
336
349
  class TestAtomicOnComplexComponents(CUDATestCase):
337
350
  # Based on the reproducer from Issue #8309. array.real and array.imag could
338
351
  # not be used because they required returning an array from a generated
@@ -48,7 +48,7 @@ def _in_list_var(list_var, var):
48
48
 
49
49
 
50
50
  def _find_assign(func_ir, var):
51
- for label, block in func_ir.blocks.items():
51
+ for block in func_ir.blocks.values():
52
52
  for i, inst in enumerate(block.body):
53
53
  if isinstance(inst, ir.Assign) and inst.target.name != var:
54
54
  all_var = inst.list_vars()
@@ -54,7 +54,7 @@ class TestDebugOutput(CUDATestCase):
54
54
  self.assertRaises(AssertionError, check_meth, out)
55
55
 
56
56
  def _check_dump_bytecode(self, out):
57
- if PYVERSION > (3, 10):
57
+ if PYVERSION in ((3, 11), (3, 12), (3, 13), (3, 14)):
58
58
  # binop with arg=0 is binary add, see CPython dis.py and opcode.py
59
59
  self.assertIn("BINARY_OP(arg=0", out)
60
60
  else:
@@ -885,29 +885,34 @@ class TestCudaDebugInfo(CUDATestCase):
885
885
  """,
886
886
  )
887
887
 
888
- # shared_arr -> composite -> elements[4] (data field at index 4) -> pointer with dwarfAddressSpace: 8
889
- # local_arr -> composite -> elements[4] (data field at index 4) -> pointer without dwarfAddressSpace: 8
888
+ # shared_arr -> composite -> elements[4] (data field at index 4) -> pointer without dwarfAddressSpace
889
+ # local_arr -> composite -> elements[4] (data field at index 4) -> pointer without dwarfAddressSpace
890
+ # Note: Shared memory pointers don't have dwarfAddressSpace because they are
891
+ # cast to generic address space via addrspacecast in cudaimpl.py
890
892
  address_class_filechecks = r"""
891
893
  CHECK-DAG: [[SHARED_VAR:![0-9]+]] = !DILocalVariable({{.*}}name: "shared_arr"{{.*}}type: [[SHARED_COMPOSITE:![0-9]+]]
892
894
  CHECK-DAG: [[SHARED_COMPOSITE]] = {{.*}}!DICompositeType(elements: [[SHARED_ELEMENTS:![0-9]+]]
893
895
  CHECK-DAG: [[SHARED_ELEMENTS]] = !{{{.*}}, {{.*}}, {{.*}}, {{.*}}, [[SHARED_DATA:![0-9]+]], {{.*}}, {{.*}}}
894
896
  CHECK-DAG: [[SHARED_DATA]] = !DIDerivedType(baseType: [[SHARED_PTR:![0-9]+]], name: "data"
895
- CHECK-DAG: [[SHARED_PTR]] = !DIDerivedType({{.*}}dwarfAddressSpace: 8{{.*}}tag: DW_TAG_pointer_type
897
+ CHECK-DAG: [[SHARED_PTR]] = !DIDerivedType({{.*}}tag: DW_TAG_pointer_type
898
+ CHECK-NOT: [[SHARED_PTR]]{{.*}}dwarfAddressSpace
896
899
 
897
900
  CHECK-DAG: [[LOCAL_VAR:![0-9]+]] = !DILocalVariable({{.*}}name: "local_arr"{{.*}}type: [[LOCAL_COMPOSITE:![0-9]+]]
898
901
  CHECK-DAG: [[LOCAL_COMPOSITE]] = {{.*}}!DICompositeType(elements: [[LOCAL_ELEMENTS:![0-9]+]]
899
902
  CHECK-DAG: [[LOCAL_ELEMENTS]] = !{{{.*}}, {{.*}}, {{.*}}, {{.*}}, [[LOCAL_DATA:![0-9]+]], {{.*}}, {{.*}}}
900
903
  CHECK-DAG: [[LOCAL_DATA]] = !DIDerivedType(baseType: [[LOCAL_PTR:![0-9]+]], name: "data"
901
904
  CHECK-DAG: [[LOCAL_PTR]] = !DIDerivedType(baseType: {{.*}}tag: DW_TAG_pointer_type
902
- CHECK-NOT: [[LOCAL_PTR]]{{.*}}dwarfAddressSpace: 8
905
+ CHECK-NOT: [[LOCAL_PTR]]{{.*}}dwarfAddressSpace
903
906
  """
904
907
 
905
908
  def _test_shared_memory_address_class(self, dtype):
906
909
  """Test that shared memory arrays have correct DWARF address class.
907
910
 
908
- Shared memory pointers should have addressClass: 8 (DW_AT_address_class
909
- for CUDA shared memory) in their debug metadata, while regular local
910
- arrays should not have this annotation.
911
+ Shared memory pointers should NOT have dwarfAddressSpace attribute
912
+ because they are cast to generic address space via addrspacecast.
913
+ The runtime pointer type is generic, not shared, so cuda-gdb can
914
+ correctly dereference them. Local arrays also should not have this
915
+ attribute.
911
916
  """
912
917
  sig = (numpy_support.from_dtype(dtype),)
913
918
 
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
- from cuda.core.experimental._utils.cuda_utils import CUDAError
4
+ from numba.cuda._compat import CUDAError
5
5
  import numpy as np
6
6
  import threading
7
7
 
@@ -860,7 +860,7 @@ class TestIntrinsic(TestCase):
860
860
  "TestIntrinsic.test_docstring.<locals>.void_func",
861
861
  void_func.__qualname__,
862
862
  )
863
- self.assertDictEqual({"a": int}, void_func.__annotations__)
863
+ self.assertDictEqual({"a": int}, inspect.get_annotations(void_func))
864
864
  self.assertEqual("void_func docstring", void_func.__doc__)
865
865
 
866
866