numba-cuda 0.13.0__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

numba_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 0.13.0
1
+ 0.14.0
@@ -575,6 +575,7 @@ def compile(
575
575
  abi_info=None,
576
576
  output="ptx",
577
577
  forceinline=False,
578
+ launch_bounds=None,
578
579
  ):
579
580
  """Compile a Python function to PTX or LTO-IR for a given set of argument
580
581
  types.
@@ -620,6 +621,16 @@ def compile(
620
621
  ``alwaysinline`` function attribute to the function
621
622
  definition. This is only valid when the output is
622
623
  ``"ltoir"``.
624
+ :param launch_bounds: Kernel launch bounds, specified as a scalar or a tuple
625
+ of between one and three items. Tuple items provide:
626
+
627
+ - The maximum number of threads per block,
628
+ - The minimum number of blocks per SM,
629
+ - The maximum number of blocks per cluster.
630
+
631
+ If a scalar is provided, it is used as the maximum
632
+ number of threads per block.
633
+ :type launch_bounds: int | tuple[int]
623
634
  :return: (code, resty): The compiled code and inferred return type
624
635
  :rtype: tuple
625
636
  """
@@ -662,7 +673,12 @@ def compile(
662
673
 
663
674
  args, return_type = sigutils.normalize_signature(sig)
664
675
 
665
- cc = cc or config.CUDA_DEFAULT_PTX_CC
676
+ # If the user has used the config variable to specify a non-default that is
677
+ # greater than the lowest non-deprecated one, then we should default to
678
+ # their specified CC instead of the lowest non-deprecated one.
679
+ MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvvm.LOWEST_CURRENT_CC)
680
+ cc = cc or MIN_CC
681
+
666
682
  cres = compile_cuda(
667
683
  pyfunc,
668
684
  return_type,
@@ -693,6 +709,7 @@ def compile(
693
709
  kernel = lib.get_function(cres.fndesc.llvm_func_name)
694
710
  lib._entry_name = cres.fndesc.llvm_func_name
695
711
  kernel_fixup(kernel, debug)
712
+ nvvm.set_launch_bounds(kernel, launch_bounds)
696
713
 
697
714
  if lto:
698
715
  code = lib.get_ltoir(cc=cc)
@@ -713,6 +730,7 @@ def compile_for_current_device(
713
730
  abi_info=None,
714
731
  output="ptx",
715
732
  forceinline=False,
733
+ launch_bounds=None,
716
734
  ):
717
735
  """Compile a Python function to PTX or LTO-IR for a given signature for the
718
736
  current device's compute capabilility. This calls :func:`compile` with an
@@ -731,6 +749,7 @@ def compile_for_current_device(
731
749
  abi_info=abi_info,
732
750
  output=output,
733
751
  forceinline=forceinline,
752
+ launch_bounds=launch_bounds,
734
753
  )
735
754
 
736
755
 
@@ -746,6 +765,7 @@ def compile_ptx(
746
765
  abi="numba",
747
766
  abi_info=None,
748
767
  forceinline=False,
768
+ launch_bounds=None,
749
769
  ):
750
770
  """Compile a Python function to PTX for a given signature. See
751
771
  :func:`compile`. The defaults for this function are to compile a kernel
@@ -764,6 +784,7 @@ def compile_ptx(
764
784
  abi_info=abi_info,
765
785
  output="ptx",
766
786
  forceinline=forceinline,
787
+ launch_bounds=launch_bounds,
767
788
  )
768
789
 
769
790
 
@@ -778,6 +799,7 @@ def compile_ptx_for_current_device(
778
799
  abi="numba",
779
800
  abi_info=None,
780
801
  forceinline=False,
802
+ launch_bounds=None,
781
803
  ):
782
804
  """Compile a Python function to PTX for a given signature for the current
783
805
  device's compute capabilility. See :func:`compile_ptx`."""
@@ -794,6 +816,7 @@ def compile_ptx_for_current_device(
794
816
  abi=abi,
795
817
  abi_info=abi_info,
796
818
  forceinline=forceinline,
819
+ launch_bounds=launch_bounds,
797
820
  )
798
821
 
799
822
 
@@ -82,9 +82,21 @@ _MVC_ERROR_MESSAGE = (
82
82
  "to be available"
83
83
  )
84
84
 
85
- ENABLE_PYNVJITLINK = _readenv(
86
- "NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, False
87
- ) or getattr(config, "CUDA_ENABLE_PYNVJITLINK", False)
85
+ # Enable pynvjitlink if the environment variables NUMBA_CUDA_ENABLE_PYNVJITLINK
86
+ # or CUDA_ENABLE_PYNVJITLINK are set, or if the pynvjitlink module is found. If
87
+ # explicitly disabled, do not use pynvjitlink, even if present in the env.
88
+ _pynvjitlink_enabled_in_env = _readenv(
89
+ "NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, None
90
+ )
91
+ _pynvjitlink_enabled_in_cfg = getattr(config, "CUDA_ENABLE_PYNVJITLINK", None)
92
+
93
+ if _pynvjitlink_enabled_in_env is not None:
94
+ ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_env
95
+ elif _pynvjitlink_enabled_in_cfg is not None:
96
+ ENABLE_PYNVJITLINK = _pynvjitlink_enabled_in_cfg
97
+ else:
98
+ ENABLE_PYNVJITLINK = importlib.util.find_spec("pynvjitlink") is not None
99
+
88
100
  if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
89
101
  config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
90
102
 
@@ -369,48 +369,101 @@ COMPUTE_CAPABILITIES = (
369
369
  (9, 0),
370
370
  (10, 0),
371
371
  (10, 1),
372
+ (10, 3),
372
373
  (12, 0),
374
+ (12, 1),
373
375
  )
374
376
 
375
377
 
376
- # Maps CTK version -> (min supported cc, max supported cc) inclusive
378
+ # Maps CTK version -> (min supported cc, max supported cc) ranges, bounds inclusive
377
379
  _CUDA_CC_MIN_MAX_SUPPORT = {
378
- (11, 1): ((3, 5), (8, 0)),
379
- (11, 2): ((3, 5), (8, 6)),
380
- (11, 3): ((3, 5), (8, 6)),
381
- (11, 4): ((3, 5), (8, 7)),
382
- (11, 5): ((3, 5), (8, 7)),
383
- (11, 6): ((3, 5), (8, 7)),
384
- (11, 7): ((3, 5), (8, 7)),
385
- (11, 8): ((3, 5), (9, 0)),
386
- (12, 0): ((5, 0), (9, 0)),
387
- (12, 1): ((5, 0), (9, 0)),
388
- (12, 2): ((5, 0), (9, 0)),
389
- (12, 3): ((5, 0), (9, 0)),
390
- (12, 4): ((5, 0), (9, 0)),
391
- (12, 5): ((5, 0), (9, 0)),
392
- (12, 6): ((5, 0), (9, 0)),
393
- (12, 8): ((5, 0), (12, 0)),
380
+ (11, 2): [
381
+ ((3, 5), (8, 6)),
382
+ ],
383
+ (11, 3): [
384
+ ((3, 5), (8, 6)),
385
+ ],
386
+ (11, 4): [
387
+ ((3, 5), (8, 7)),
388
+ ],
389
+ (11, 5): [
390
+ ((3, 5), (8, 7)),
391
+ ],
392
+ (11, 6): [
393
+ ((3, 5), (8, 7)),
394
+ ],
395
+ (11, 7): [
396
+ ((3, 5), (8, 7)),
397
+ ],
398
+ (11, 8): [
399
+ ((3, 5), (9, 0)),
400
+ ],
401
+ (12, 0): [
402
+ ((5, 0), (9, 0)),
403
+ ],
404
+ (12, 1): [
405
+ ((5, 0), (9, 0)),
406
+ ],
407
+ (12, 2): [
408
+ ((5, 0), (9, 0)),
409
+ ],
410
+ (12, 3): [
411
+ ((5, 0), (9, 0)),
412
+ ],
413
+ (12, 4): [
414
+ ((5, 0), (9, 0)),
415
+ ],
416
+ (12, 5): [
417
+ ((5, 0), (9, 0)),
418
+ ],
419
+ (12, 6): [
420
+ ((5, 0), (9, 0)),
421
+ ],
422
+ (12, 8): [
423
+ ((5, 0), (10, 1)),
424
+ ((12, 0), (12, 0)),
425
+ ],
426
+ (12, 9): [
427
+ ((5, 0), (12, 1)),
428
+ ],
394
429
  }
395
430
 
431
+ # From CUDA 12.9 Release notes, Section 1.5.4, "Deprecated Architectures"
432
+ # https://docs.nvidia.com/cuda/archive/12.9.0/cuda-toolkit-release-notes/index.html#deprecated-architectures
433
+ #
434
+ # "Maxwell, Pascal, and Volta architectures are now feature-complete with no
435
+ # further enhancements planned. While CUDA Toolkit 12.x series will continue
436
+ # to support building applications for these architectures, offline
437
+ # compilation and library support will be removed in the next major CUDA
438
+ # Toolkit version release. Users should plan migration to newer
439
+ # architectures, as future toolkits will be unable to target Maxwell, Pascal,
440
+ # and Volta GPUs."
441
+ #
442
+ # In order to maintain compatibility with future toolkits, we use Turing (7.5)
443
+ # as the default CC if it is not otherwise specified.
444
+ LOWEST_CURRENT_CC = (7, 5)
445
+
396
446
 
397
447
  def ccs_supported_by_ctk(ctk_version):
398
448
  try:
399
449
  # For supported versions, we look up the range of supported CCs
400
- min_cc, max_cc = _CUDA_CC_MIN_MAX_SUPPORT[ctk_version]
401
- return tuple(
402
- [cc for cc in COMPUTE_CAPABILITIES if min_cc <= cc <= max_cc]
403
- )
404
- except KeyError:
405
- # For unsupported CUDA toolkit versions, all we can do is assume all
406
- # non-deprecated versions we are aware of are supported.
407
450
  return tuple(
408
451
  [
409
452
  cc
453
+ for min_cc, max_cc in _CUDA_CC_MIN_MAX_SUPPORT[ctk_version]
410
454
  for cc in COMPUTE_CAPABILITIES
411
- if cc >= config.CUDA_DEFAULT_PTX_CC
455
+ if min_cc <= cc <= max_cc
412
456
  ]
413
457
  )
458
+ except KeyError:
459
+ # For unsupported CUDA toolkit versions, all we can do is assume all
460
+ # non-deprecated versions we are aware of are supported.
461
+ #
462
+ # If the user has specified a non-default CC that is greater than the
463
+ # lowest non-deprecated one, then we should assume that instead.
464
+ MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, LOWEST_CURRENT_CC)
465
+
466
+ return tuple([cc for cc in COMPUTE_CAPABILITIES if cc >= MIN_CC])
414
467
 
415
468
 
416
469
  def get_supported_ccs():
@@ -857,6 +910,54 @@ def set_cuda_kernel(function):
857
910
  function.attributes.discard("noinline")
858
911
 
859
912
 
913
+ def set_launch_bounds(kernel, launch_bounds):
914
+ # Based on: CUDA C / C++ Programming Guide 12.9, Section 8.38:
915
+ # https://docs.nvidia.com/cuda/archive/12.9.0/cuda-c-programming-guide/index.html#launch-bounds
916
+ # PTX ISA Specification Version 8.7, Section 11.4:
917
+ # https://docs.nvidia.com/cuda/archive/12.8.1/parallel-thread-execution/index.html#performance-tuning-directives
918
+ # NVVM IR Specification 12.9, Section 13:
919
+ # https://docs.nvidia.com/cuda/archive/12.9.0/nvvm-ir-spec/index.html#global-property-annotation
920
+
921
+ if launch_bounds is None:
922
+ return
923
+
924
+ if isinstance(launch_bounds, int):
925
+ launch_bounds = (launch_bounds,)
926
+
927
+ if (n := len(launch_bounds)) > 3:
928
+ raise ValueError(
929
+ f"Got {n} launch bounds: {launch_bounds}. A maximum of three are supported: "
930
+ "(max_threads_per_block, min_blocks_per_sm, max_blocks_per_cluster)"
931
+ )
932
+
933
+ module = kernel.module
934
+ nvvm_annotations = cgutils.get_or_insert_named_metadata(
935
+ module, "nvvm.annotations"
936
+ )
937
+
938
+ # Note that only maxntidx is used even though NVVM IR and PTX allow
939
+ # maxntidy and maxntidz. This is because the thread block size limit
940
+ # pertains only to the total number of threads, and therefore bounds on
941
+ # individual dimensions may be exceeded anyway. To prevent an unsurprising
942
+ # interface, it is cleaner to only allow setting total size via maxntidx
943
+ # and assuming y and z to be 1 (as is the case in CUDA C/C++).
944
+
945
+ properties = (
946
+ # Max threads per block
947
+ "maxntidx",
948
+ # Min blocks per multiprocessor
949
+ "minctasm",
950
+ # Max blocks per cluster
951
+ "cluster_max_blocks",
952
+ )
953
+
954
+ for prop, bound in zip(properties, launch_bounds):
955
+ mdstr = ir.MetaDataString(module, prop)
956
+ mdvalue = ir.Constant(ir.IntType(32), bound)
957
+ md = module.add_metadata((kernel, mdstr, mdvalue))
958
+ nvvm_annotations.add(md)
959
+
960
+
860
961
  def add_ir_version(mod):
861
962
  """Add NVVM IR version to module"""
862
963
  # We specify the IR version to match the current NVVM's IR version
@@ -2,6 +2,7 @@ from llvmlite import ir
2
2
  from numba.core import types, cgutils
3
3
  from numba.core.debuginfo import DIBuilder
4
4
  from numba.cuda.types import GridGroup
5
+ from numba.core.datamodel.models import UnionModel
5
6
 
6
7
  _BYTE_SIZE = 8
7
8
 
@@ -16,6 +17,7 @@ class CUDADIBuilder(DIBuilder):
16
17
  is_bool = False
17
18
  is_int_literal = False
18
19
  is_grid_group = False
20
+ m = self.module
19
21
 
20
22
  if isinstance(lltype, ir.IntType):
21
23
  if datamodel is None:
@@ -36,7 +38,6 @@ class CUDADIBuilder(DIBuilder):
36
38
  is_grid_group = True
37
39
 
38
40
  if is_bool or is_int_literal or is_grid_group:
39
- m = self.module
40
41
  bitsize = _BYTE_SIZE * size
41
42
  # Boolean type workaround until upstream Numba is fixed
42
43
  if is_bool:
@@ -56,6 +57,56 @@ class CUDADIBuilder(DIBuilder):
56
57
  },
57
58
  )
58
59
 
60
+ if isinstance(datamodel, UnionModel):
61
+ # UnionModel is handled here to represent polymorphic types
62
+ meta = []
63
+ maxwidth = 0
64
+ for field, model in zip(
65
+ datamodel._fields, datamodel.inner_models()
66
+ ):
67
+ # Ignore the "tag" field, focus on the "payload" field which
68
+ # contains the data types in memory
69
+ if field == "payload":
70
+ for mod in model.inner_models():
71
+ dtype = mod.get_value_type()
72
+ membersize = self.cgctx.get_abi_sizeof(dtype)
73
+ basetype = self._var_type(
74
+ dtype, membersize, datamodel=mod
75
+ )
76
+ if isinstance(mod.fe_type, types.Literal):
77
+ typename = str(mod.fe_type.literal_type)
78
+ else:
79
+ typename = str(mod.fe_type)
80
+ # Use a prefix "_" on type names as field names
81
+ membername = "_" + typename
82
+ memberwidth = _BYTE_SIZE * membersize
83
+ derived_type = m.add_debug_info(
84
+ "DIDerivedType",
85
+ {
86
+ "tag": ir.DIToken("DW_TAG_member"),
87
+ "name": membername,
88
+ "baseType": basetype,
89
+ # DW_TAG_member size is in bits
90
+ "size": memberwidth,
91
+ },
92
+ )
93
+ meta.append(derived_type)
94
+ if memberwidth > maxwidth:
95
+ maxwidth = memberwidth
96
+
97
+ fake_union_name = "dbg_poly_union"
98
+ return m.add_debug_info(
99
+ "DICompositeType",
100
+ {
101
+ "file": self.difile,
102
+ "tag": ir.DIToken("DW_TAG_union_type"),
103
+ "name": fake_union_name,
104
+ "identifier": str(lltype),
105
+ "elements": m.add_metadata(meta),
106
+ "size": maxwidth,
107
+ },
108
+ is_distinct=True,
109
+ )
59
110
  # For other cases, use upstream Numba implementation
60
111
  return super()._var_type(lltype, size, datamodel=datamodel)
61
112
 
@@ -23,6 +23,7 @@ def jit(
23
23
  opt=None,
24
24
  lineinfo=False,
25
25
  cache=False,
26
+ launch_bounds=None,
26
27
  **kws,
27
28
  ):
28
29
  """
@@ -72,6 +73,16 @@ def jit(
72
73
  :type lineinfo: bool
73
74
  :param cache: If True, enables the file-based cache for this function.
74
75
  :type cache: bool
76
+ :param launch_bounds: Kernel launch bounds, specified as a scalar or a tuple
77
+ of between one and three items. Tuple items provide:
78
+
79
+ - The maximum number of threads per block,
80
+ - The minimum number of blocks per SM,
81
+ - The maximum number of blocks per cluster.
82
+
83
+ If a scalar is provided, it is used as the maximum
84
+ number of threads per block.
85
+ :type launch_bounds: int | tuple[int]
75
86
  """
76
87
 
77
88
  if link and config.ENABLE_CUDASIM:
@@ -153,6 +164,7 @@ def jit(
153
164
  targetoptions["inline"] = inline
154
165
  targetoptions["forceinline"] = forceinline
155
166
  targetoptions["extensions"] = extensions
167
+ targetoptions["launch_bounds"] = launch_bounds
156
168
 
157
169
  disp = CUDADispatcher(func, targetoptions=targetoptions)
158
170
 
@@ -200,6 +212,7 @@ def jit(
200
212
  lineinfo=lineinfo,
201
213
  link=link,
202
214
  cache=cache,
215
+ launch_bounds=launch_bounds,
203
216
  **kws,
204
217
  )
205
218
 
@@ -221,6 +234,7 @@ def jit(
221
234
  targetoptions["inline"] = inline
222
235
  targetoptions["forceinline"] = forceinline
223
236
  targetoptions["extensions"] = extensions
237
+ targetoptions["launch_bounds"] = launch_bounds
224
238
  disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
225
239
 
226
240
  if cache:
@@ -18,7 +18,7 @@ from numba.cuda.compiler import (
18
18
  kernel_fixup,
19
19
  )
20
20
  import re
21
- from numba.cuda.cudadrv import driver
21
+ from numba.cuda.cudadrv import driver, nvvm
22
22
  from numba.cuda.cudadrv.linkable_code import LinkableCode
23
23
  from numba.cuda.cudadrv.devices import get_context
24
24
  from numba.cuda.descriptor import cuda_target
@@ -94,6 +94,7 @@ class _Kernel(serialize.ReduceMixin):
94
94
  lto=False,
95
95
  opt=True,
96
96
  device=False,
97
+ launch_bounds=None,
97
98
  ):
98
99
  if device:
99
100
  raise RuntimeError("Cannot compile a device function as a kernel")
@@ -120,6 +121,7 @@ class _Kernel(serialize.ReduceMixin):
120
121
  self.debug = debug
121
122
  self.lineinfo = lineinfo
122
123
  self.extensions = extensions or []
124
+ self.launch_bounds = launch_bounds
123
125
 
124
126
  nvvm_options = {"fastmath": fastmath, "opt": 3 if opt else 0}
125
127
 
@@ -145,6 +147,7 @@ class _Kernel(serialize.ReduceMixin):
145
147
  kernel = lib.get_function(cres.fndesc.llvm_func_name)
146
148
  lib._entry_name = cres.fndesc.llvm_func_name
147
149
  kernel_fixup(kernel, self.debug)
150
+ nvvm.set_launch_bounds(kernel, launch_bounds)
148
151
 
149
152
  if not link:
150
153
  link = []
@@ -547,6 +550,10 @@ class _Kernel(serialize.ReduceMixin):
547
550
  for ax in range(devary.ndim):
548
551
  kernelargs.append(c_intp(devary.strides[ax]))
549
552
 
553
+ elif isinstance(ty, types.CPointer):
554
+ # Pointer arguments should be a pointer-sized integer
555
+ kernelargs.append(ctypes.c_uint64(val))
556
+
550
557
  elif isinstance(ty, types.Integer):
551
558
  cval = getattr(ctypes, "c_%s" % ty)(val)
552
559
  kernelargs.append(cval)
@@ -1,5 +1,7 @@
1
1
  from numba.core.lowering import Lower
2
2
  from llvmlite import ir
3
+ from numba.core import ir as numba_ir
4
+ from numba.core import types
3
5
 
4
6
 
5
7
  class CUDALower(Lower):
@@ -14,10 +16,7 @@ class CUDALower(Lower):
14
16
  if (
15
17
  self.context.enable_debuginfo
16
18
  # Conditions used to elide stores in parent method
17
- and (
18
- name not in self._singly_assigned_vars
19
- or self._disable_sroa_like_opt
20
- )
19
+ and self.store_var_needed(name)
21
20
  # No emission of debuginfo for internal names
22
21
  and not name.startswith("$")
23
22
  ):
@@ -27,6 +26,11 @@ class CUDALower(Lower):
27
26
  int_type = (ir.IntType,)
28
27
  real_type = ir.FloatType, ir.DoubleType
29
28
  if isinstance(lltype, int_type + real_type):
29
+ index = name.find(".")
30
+ src_name = name[:index] if index > 0 else name
31
+ if src_name in self.poly_var_typ_map:
32
+ # Do not emit debug value on polymorphic type var
33
+ return
30
34
  # Emit debug value for scalar variable
31
35
  sizeof = self.context.get_abi_sizeof(lltype)
32
36
  datamodel = self.context.data_model_manager[fetype]
@@ -41,3 +45,78 @@ class CUDALower(Lower):
41
45
  datamodel,
42
46
  argidx,
43
47
  )
48
+
49
+ def pre_lower(self):
50
+ """
51
+ Called before lowering all blocks.
52
+ """
53
+ super().pre_lower()
54
+
55
+ self.poly_var_typ_map = {}
56
+ self.poly_var_loc_map = {}
57
+
58
+ # When debug info is enabled, walk through function body and mark
59
+ # variables with polymorphic types.
60
+ if self.context.enable_debuginfo and self._disable_sroa_like_opt:
61
+ poly_map = {}
62
+ # pre-scan all blocks
63
+ for block in self.blocks.values():
64
+ for x in block.find_insts(numba_ir.Assign):
65
+ if x.target.name.startswith("$"):
66
+ continue
67
+ ssa_name = x.target.name
68
+ index = ssa_name.find(".")
69
+ src_name = ssa_name[:index] if index > 0 else ssa_name
70
+ # Check all the multi-versioned targets
71
+ if len(x.target.versioned_names) > 0:
72
+ fetype = self.typeof(ssa_name)
73
+ if src_name not in poly_map:
74
+ poly_map[src_name] = set()
75
+ # deduplicate polymorphic types
76
+ if isinstance(fetype, types.Literal):
77
+ fetype = fetype.literal_type
78
+ poly_map[src_name].add(fetype)
79
+ # Filter out multi-versioned but single typed variables
80
+ self.poly_var_typ_map = {
81
+ k: v for k, v in poly_map.items() if len(v) > 1
82
+ }
83
+
84
+ def _alloca_var(self, name, fetype):
85
+ """
86
+ Ensure the given variable has an allocated stack slot (if needed).
87
+ """
88
+ # If the name is not handled yet and a store is needed
89
+ if name not in self.varmap and self.store_var_needed(name):
90
+ index = name.find(".")
91
+ src_name = name[:index] if index > 0 else name
92
+ if src_name in self.poly_var_typ_map:
93
+ dtype = types.UnionType(self.poly_var_typ_map[src_name])
94
+ datamodel = self.context.data_model_manager[dtype]
95
+ if src_name not in self.poly_var_loc_map:
96
+ # UnionType has sorted set of types, max at last index
97
+ maxsizetype = dtype.types[-1]
98
+ # Create a single element aggregate type
99
+ aggr_type = types.UniTuple(maxsizetype, 1)
100
+ lltype = self.context.get_value_type(aggr_type)
101
+ ptr = self.alloca_lltype(src_name, lltype, datamodel)
102
+ # save the location of the union type for polymorphic var
103
+ self.poly_var_loc_map[src_name] = ptr
104
+ # Any member of this union type shoud type cast ptr to fetype
105
+ lltype = self.context.get_value_type(fetype)
106
+ castptr = self.builder.bitcast(
107
+ self.poly_var_loc_map[src_name], ir.PointerType(lltype)
108
+ )
109
+ # Remember the pointer
110
+ self.varmap[name] = castptr
111
+
112
+ super()._alloca_var(name, fetype)
113
+
114
+ def store_var_needed(self, name):
115
+ # Check the conditions used to elide stores in parent class,
116
+ # e.g. in method storevar() and _alloca_var()
117
+ return (
118
+ # used in multiple blocks
119
+ name not in self._singly_assigned_vars
120
+ # lowering with debuginfo
121
+ or self._disable_sroa_like_opt
122
+ )
@@ -299,12 +299,12 @@ class TestLinkerUsage(CUDATestCase):
299
299
 
300
300
  def test_linker_enabled_envvar(self):
301
301
  env = os.environ.copy()
302
- env["NUMBA_CUDA_ENABLE_PYNVJITLINK"] = "1"
302
+ env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
303
303
  run_in_subprocess(self.src.format(config=""), env=env)
304
304
 
305
305
  def test_linker_disabled_envvar(self):
306
306
  env = os.environ.copy()
307
- env.pop("NUMBA_CUDA_ENABLE_PYNVJITLINK", None)
307
+ env["NUMBA_CUDA_ENABLE_PYNVJITLINK"] = "0"
308
308
  with self.assertRaisesRegex(
309
309
  AssertionError, "LTO and additional flags require PyNvJitLinker"
310
310
  ):
@@ -30,7 +30,8 @@ class TestNvvmDriver(unittest.TestCase):
30
30
  self.skipTest("-gen-lto unavailable in this toolkit version")
31
31
 
32
32
  nvvmir = self.get_nvvmir()
33
- ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch="compute_52")
33
+ arch = "compute_%d%d" % nvvm.LOWEST_CURRENT_CC
34
+ ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch=arch)
34
35
 
35
36
  # Verify we correctly passed the option by checking if we got LTOIR
36
37
  # from NVVM (by looking for the expected magic number for LTOIR)
@@ -138,9 +139,9 @@ class TestNvvmDriver(unittest.TestCase):
138
139
  class TestArchOption(unittest.TestCase):
139
140
  def test_get_arch_option(self):
140
141
  # Test returning the nearest lowest arch.
141
- self.assertEqual(nvvm.get_arch_option(5, 3), "compute_53")
142
142
  self.assertEqual(nvvm.get_arch_option(7, 5), "compute_75")
143
143
  self.assertEqual(nvvm.get_arch_option(7, 7), "compute_75")
144
+ self.assertEqual(nvvm.get_arch_option(8, 8), "compute_87")
144
145
  # Test known arch.
145
146
  supported_cc = nvvm.get_supported_ccs()
146
147
  for arch in supported_cc:
@@ -1,5 +1,5 @@
1
1
  from math import sqrt
2
- from numba import cuda, float32, int16, int32, int64, uint32, void
2
+ from numba import cuda, float32, int16, int32, int64, types, uint32, void
3
3
  from numba.cuda import (
4
4
  compile,
5
5
  compile_for_current_device,
@@ -288,7 +288,7 @@ class TestCompileOnlyTests(unittest.TestCase):
288
288
  # Sleep for a variable time
289
289
  cuda.nanosleep(x)
290
290
 
291
- ptx, resty = compile_ptx(use_nanosleep, (uint32,), cc=(7, 0))
291
+ ptx, resty = compile_ptx(use_nanosleep, (uint32,))
292
292
 
293
293
  nanosleep_count = 0
294
294
  for line in ptx.split("\n"):
@@ -306,5 +306,65 @@ class TestCompileOnlyTests(unittest.TestCase):
306
306
  )
307
307
 
308
308
 
309
+ @skip_on_cudasim("Compilation unsupported in the simulator")
310
+ class TestCompileWithLaunchBounds(unittest.TestCase):
311
+ def _test_launch_bounds_common(self, launch_bounds):
312
+ def f():
313
+ pass
314
+
315
+ sig = "void()"
316
+ ptx, resty = cuda.compile_ptx(f, sig, launch_bounds=launch_bounds)
317
+ self.assertIsInstance(resty, types.NoneType)
318
+ self.assertRegex(ptx, r".maxntid\s+128,\s+1,\s+1")
319
+ return ptx
320
+
321
+ def test_launch_bounds_scalar(self):
322
+ launch_bounds = 128
323
+ ptx = self._test_launch_bounds_common(launch_bounds)
324
+
325
+ self.assertNotIn(".minnctapersm", ptx)
326
+ self.assertNotIn(".maxclusterrank", ptx)
327
+
328
+ def test_launch_bounds_tuple(self):
329
+ launch_bounds = (128,)
330
+ ptx = self._test_launch_bounds_common(launch_bounds)
331
+
332
+ self.assertNotIn(".minnctapersm", ptx)
333
+ self.assertNotIn(".maxclusterrank", ptx)
334
+
335
+ def test_launch_bounds_with_min_cta(self):
336
+ launch_bounds = (128, 2)
337
+ ptx = self._test_launch_bounds_common(launch_bounds)
338
+
339
+ self.assertRegex(ptx, r".minnctapersm\s+2")
340
+ self.assertNotIn(".maxclusterrank", ptx)
341
+
342
+ def test_launch_bounds_with_max_cluster_rank(self):
343
+ def f():
344
+ pass
345
+
346
+ launch_bounds = (128, 2, 4)
347
+ cc = (9, 0)
348
+ sig = "void()"
349
+ ptx, resty = cuda.compile_ptx(
350
+ f, sig, launch_bounds=launch_bounds, cc=cc
351
+ )
352
+ self.assertIsInstance(resty, types.NoneType)
353
+ self.assertRegex(ptx, r".maxntid\s+128,\s+1,\s+1")
354
+
355
+ self.assertRegex(ptx, r".minnctapersm\s+2")
356
+ self.assertRegex(ptx, r".maxclusterrank\s+4")
357
+
358
+ def test_too_many_launch_bounds(self):
359
+ def f():
360
+ pass
361
+
362
+ sig = "void()"
363
+ launch_bounds = (128, 2, 4, 8)
364
+
365
+ with self.assertRaisesRegex(ValueError, "Got 4 launch bounds:"):
366
+ cuda.compile_ptx(f, sig, launch_bounds=launch_bounds)
367
+
368
+
309
369
  if __name__ == "__main__":
310
370
  unittest.main()
@@ -332,10 +332,10 @@ class TestCudaDebugInfo(CUDATestCase):
332
332
 
333
333
  @cuda.jit("void(int32, int32)", debug=True, opt=False)
334
334
  def f(x, y):
335
- z = x # noqa: F841
336
- z = 100 # noqa: F841
337
- z = y # noqa: F841
338
- z = True # noqa: F841
335
+ z1 = x # noqa: F841
336
+ z2 = 100 # noqa: F841
337
+ z3 = y # noqa: F841
338
+ z4 = True # noqa: F841
339
339
 
340
340
  llvm_ir = f.inspect_llvm(sig)
341
341
  # Verify the call to llvm.dbg.declare is replaced by llvm.dbg.value
@@ -373,6 +373,45 @@ class TestCudaDebugInfo(CUDATestCase):
373
373
  match = re.compile(pat).search(llvm_ir)
374
374
  self.assertIsNone(match, msg=llvm_ir)
375
375
 
376
+ def test_union_poly_types(self):
377
+ sig = (types.int32, types.int32)
378
+
379
+ @cuda.jit("void(int32, int32)", debug=True, opt=False)
380
+ def f(x, y):
381
+ foo = 100 # noqa: F841
382
+ foo = 2.34 # noqa: F841
383
+ foo = True # noqa: F841
384
+ foo = 200 # noqa: F841
385
+
386
+ llvm_ir = f.inspect_llvm(sig)
387
+ # Extract the type node id
388
+ pat1 = r'!DILocalVariable\(.*name: "foo".*type: !(\d+)\)'
389
+ match = re.compile(pat1).search(llvm_ir)
390
+ self.assertIsNotNone(match, msg=llvm_ir)
391
+ mdnode_id = match.group(1)
392
+ # Verify the union type and extract the elements node id
393
+ pat2 = rf"!{mdnode_id} = distinct !DICompositeType\(elements: !(\d+),.*size: 64, tag: DW_TAG_union_type\)" # noqa: E501
394
+ match = re.compile(pat2).search(llvm_ir)
395
+ self.assertIsNotNone(match, msg=llvm_ir)
396
+ mdnode_id = match.group(1)
397
+ # Extract the member node ids
398
+ pat3 = r"!{ !(\d+), !(\d+), !(\d+) }"
399
+ match = re.compile(pat3).search(llvm_ir)
400
+ self.assertIsNotNone(match, msg=llvm_ir)
401
+ mdnode_id1 = match.group(1)
402
+ mdnode_id2 = match.group(2)
403
+ mdnode_id3 = match.group(3)
404
+ # Verify the member nodes
405
+ pat4 = rf'!{mdnode_id1} = !DIDerivedType(.*name: "_bool", size: 8, tag: DW_TAG_member)' # noqa: E501
406
+ match = re.compile(pat4).search(llvm_ir)
407
+ self.assertIsNotNone(match, msg=llvm_ir)
408
+ pat5 = rf'!{mdnode_id2} = !DIDerivedType(.*name: "_float64", size: 64, tag: DW_TAG_member)' # noqa: E501
409
+ match = re.compile(pat5).search(llvm_ir)
410
+ self.assertIsNotNone(match, msg=llvm_ir)
411
+ pat6 = rf'!{mdnode_id3} = !DIDerivedType(.*name: "_int64", size: 64, tag: DW_TAG_member)' # noqa: E501
412
+ match = re.compile(pat6).search(llvm_ir)
413
+ self.assertIsNotNone(match, msg=llvm_ir)
414
+
376
415
 
377
416
  if __name__ == "__main__":
378
417
  unittest.main()
@@ -1,9 +1,26 @@
1
+ from numba.cuda.cudadrv.driver import CudaAPIError
1
2
  import numpy as np
2
3
  import threading
3
4
 
4
- from numba import boolean, config, cuda, float32, float64, int32, int64, void
5
+ from numba import (
6
+ boolean,
7
+ config,
8
+ cuda,
9
+ float32,
10
+ float64,
11
+ int32,
12
+ int64,
13
+ types,
14
+ uint32,
15
+ void,
16
+ )
5
17
  from numba.core.errors import TypingError
6
- from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
18
+ from numba.cuda.testing import (
19
+ cc_X_or_above,
20
+ skip_on_cudasim,
21
+ unittest,
22
+ CUDATestCase,
23
+ )
7
24
  import math
8
25
 
9
26
 
@@ -466,6 +483,35 @@ class TestDispatcher(CUDATestCase):
466
483
  self.assertEqual("Add two integers, kernel version", add_kernel.__doc__)
467
484
  self.assertEqual("Add two integers, device version", add_device.__doc__)
468
485
 
486
+ @skip_on_cudasim("Cudasim does not have device pointers")
487
+ def test_dispatcher_cpointer_arguments(self):
488
+ ptr = types.CPointer(types.int32)
489
+ sig = void(ptr, int32, ptr, ptr, uint32)
490
+
491
+ @cuda.jit(sig)
492
+ def axpy(r, a, x, y, n):
493
+ i = cuda.grid(1)
494
+ if i < n:
495
+ r[i] = a * x[i] + y[i]
496
+
497
+ N = 16
498
+ a = 5
499
+ hx = np.arange(10, dtype=np.int32)
500
+ hy = np.arange(10, dtype=np.int32) * 2
501
+ dx = cuda.to_device(hx)
502
+ dy = cuda.to_device(hy)
503
+ dr = cuda.device_array_like(dx)
504
+
505
+ r_ptr = dr.__cuda_array_interface__["data"][0]
506
+ x_ptr = dx.__cuda_array_interface__["data"][0]
507
+ y_ptr = dy.__cuda_array_interface__["data"][0]
508
+
509
+ axpy[1, 32](r_ptr, a, x_ptr, y_ptr, N)
510
+
511
+ expected = a * hx + hy
512
+ actual = dr.copy_to_host()
513
+ np.testing.assert_equal(expected, actual)
514
+
469
515
 
470
516
  @skip_on_cudasim("CUDA simulator doesn't implement kernel properties")
471
517
  class TestDispatcherKernelProperties(CUDATestCase):
@@ -708,5 +754,63 @@ class TestDispatcherKernelProperties(CUDATestCase):
708
754
  self.assertGreaterEqual(local_mem_per_thread, N * 4)
709
755
 
710
756
 
757
+ @skip_on_cudasim("Simulator does not support launch bounds")
758
+ class TestLaunchBounds(CUDATestCase):
759
+ def _test_launch_bounds_common(self, launch_bounds):
760
+ @cuda.jit(launch_bounds=launch_bounds)
761
+ def f():
762
+ pass
763
+
764
+ # Test successful launch
765
+ f[1, 128]()
766
+
767
+ # Test launch bound exceeded
768
+ msg = "Call to cuLaunchKernel results in CUDA_ERROR_INVALID_VALUE"
769
+ with self.assertRaisesRegex(CudaAPIError, msg):
770
+ f[1, 256]()
771
+
772
+ sig = f.signatures[0]
773
+ ptx = f.inspect_asm(sig)
774
+ self.assertRegex(ptx, r".maxntid\s+128,\s+1,\s+1")
775
+
776
+ return ptx
777
+
778
+ def test_launch_bounds_scalar(self):
779
+ launch_bounds = 128
780
+ ptx = self._test_launch_bounds_common(launch_bounds)
781
+
782
+ self.assertNotIn(".minnctapersm", ptx)
783
+ self.assertNotIn(".maxclusterrank", ptx)
784
+
785
+ def test_launch_bounds_tuple(self):
786
+ launch_bounds = (128,)
787
+ ptx = self._test_launch_bounds_common(launch_bounds)
788
+
789
+ self.assertNotIn(".minnctapersm", ptx)
790
+ self.assertNotIn(".maxclusterrank", ptx)
791
+
792
+ def test_launch_bounds_with_min_cta(self):
793
+ launch_bounds = (128, 2)
794
+ ptx = self._test_launch_bounds_common(launch_bounds)
795
+
796
+ self.assertRegex(ptx, r".minnctapersm\s+2")
797
+ self.assertNotIn(".maxclusterrank", ptx)
798
+
799
+ @unittest.skipUnless(
800
+ cc_X_or_above(9, 0), "CC 9.0 needed for max cluster rank"
801
+ )
802
+ def test_launch_bounds_with_max_cluster_rank(self):
803
+ launch_bounds = (128, 2, 4)
804
+ ptx = self._test_launch_bounds_common(launch_bounds)
805
+
806
+ self.assertRegex(ptx, r".minnctapersm\s+2")
807
+ self.assertRegex(ptx, r".maxclusterrank\s+4")
808
+
809
+ def test_too_many_launch_bounds(self):
810
+ launch_bounds = (128, 2, 4, 8)
811
+ with self.assertRaisesRegex(ValueError, "Got 4 launch bounds:"):
812
+ cuda.jit("void()", launch_bounds=launch_bounds)(lambda: None)
813
+
814
+
711
815
  if __name__ == "__main__":
712
816
  unittest.main()
@@ -118,31 +118,18 @@ class TestFastMathOption(CUDATestCase):
118
118
  def tanh_kernel(r, x):
119
119
  r[0] = tanh(x)
120
120
 
121
- def tanh_common_test(cc, criterion):
122
- fastptx, _ = compile_ptx(
123
- tanh_kernel, (float32[::1], float32), fastmath=True, cc=cc
124
- )
125
- precptx, _ = compile_ptx(
126
- tanh_kernel, (float32[::1], float32), cc=cc
127
- )
128
- criterion.check(self, fastptx, precptx)
129
-
130
- tanh_common_test(
131
- cc=(7, 5),
132
- criterion=FastMathCriterion(
133
- fast_expected=["tanh.approx.f32 "],
134
- prec_unexpected=["tanh.approx.f32 "],
135
- ),
121
+ fastptx, _ = compile_ptx(
122
+ tanh_kernel, (float32[::1], float32), fastmath=True
136
123
  )
124
+ precptx, _ = compile_ptx(tanh_kernel, (float32[::1], float32))
137
125
 
138
- tanh_common_test(
139
- cc=(7, 0),
140
- criterion=FastMathCriterion(
141
- fast_expected=["ex2.approx.ftz.f32 ", "rcp.approx.ftz.f32 "],
142
- prec_unexpected=["tanh.approx.f32 "],
143
- ),
126
+ criterion = FastMathCriterion(
127
+ fast_expected=["tanh.approx.f32 "],
128
+ prec_unexpected=["tanh.approx.f32 "],
144
129
  )
145
130
 
131
+ criterion.check(self, fastptx, precptx)
132
+
146
133
  def test_expf(self):
147
134
  self._test_fast_math_unary(
148
135
  exp,
@@ -641,7 +641,7 @@ class TestCudaIntrinsic(CUDATestCase):
641
641
  @skip_on_cudasim("Compilation unsupported in the simulator")
642
642
  def test_hadd_ptx(self):
643
643
  args = (f2[:], f2, f2)
644
- ptx, _ = compile_ptx(simple_hadd_scalar, args, cc=(5, 3))
644
+ ptx, _ = compile_ptx(simple_hadd_scalar, args)
645
645
  self.assertIn("add.f16", ptx)
646
646
 
647
647
  @skip_unless_cc_53
@@ -668,7 +668,7 @@ class TestCudaIntrinsic(CUDATestCase):
668
668
  @skip_on_cudasim("Compilation unsupported in the simulator")
669
669
  def test_hfma_ptx(self):
670
670
  args = (f2[:], f2, f2, f2)
671
- ptx, _ = compile_ptx(simple_hfma_scalar, args, cc=(5, 3))
671
+ ptx, _ = compile_ptx(simple_hfma_scalar, args)
672
672
  self.assertIn("fma.rn.f16", ptx)
673
673
 
674
674
  @skip_unless_cc_53
@@ -693,7 +693,7 @@ class TestCudaIntrinsic(CUDATestCase):
693
693
  @skip_on_cudasim("Compilation unsupported in the simulator")
694
694
  def test_hsub_ptx(self):
695
695
  args = (f2[:], f2, f2)
696
- ptx, _ = compile_ptx(simple_hsub_scalar, args, cc=(5, 3))
696
+ ptx, _ = compile_ptx(simple_hsub_scalar, args)
697
697
  self.assertIn("sub.f16", ptx)
698
698
 
699
699
  @skip_unless_cc_53
@@ -718,7 +718,7 @@ class TestCudaIntrinsic(CUDATestCase):
718
718
  @skip_on_cudasim("Compilation unsupported in the simulator")
719
719
  def test_hmul_ptx(self):
720
720
  args = (f2[:], f2, f2)
721
- ptx, _ = compile_ptx(simple_hmul_scalar, args, cc=(5, 3))
721
+ ptx, _ = compile_ptx(simple_hmul_scalar, args)
722
722
  self.assertIn("mul.f16", ptx)
723
723
 
724
724
  @skip_unless_cc_53
@@ -763,7 +763,7 @@ class TestCudaIntrinsic(CUDATestCase):
763
763
  @skip_on_cudasim("Compilation unsupported in the simulator")
764
764
  def test_hneg_ptx(self):
765
765
  args = (f2[:], f2)
766
- ptx, _ = compile_ptx(simple_hneg_scalar, args, cc=(5, 3))
766
+ ptx, _ = compile_ptx(simple_hneg_scalar, args)
767
767
  self.assertIn("neg.f16", ptx)
768
768
 
769
769
  @skip_unless_cc_53
@@ -786,7 +786,7 @@ class TestCudaIntrinsic(CUDATestCase):
786
786
  @skip_on_cudasim("Compilation unsupported in the simulator")
787
787
  def test_habs_ptx(self):
788
788
  args = (f2[:], f2)
789
- ptx, _ = compile_ptx(simple_habs_scalar, args, cc=(5, 3))
789
+ ptx, _ = compile_ptx(simple_habs_scalar, args)
790
790
  self.assertIn("abs.f16", ptx)
791
791
 
792
792
  @skip_unless_cc_53
@@ -178,7 +178,7 @@ class TestOperatorModule(CUDATestCase):
178
178
  args = (f2[:], f2, f2)
179
179
  for fn, instr in zip(functions, instrs):
180
180
  with self.subTest(instr=instr):
181
- ptx, _ = compile_ptx(fn, args, cc=(5, 3))
181
+ ptx, _ = compile_ptx(fn, args)
182
182
  self.assertIn(instr, ptx)
183
183
 
184
184
  @skip_unless_cc_53
@@ -212,7 +212,7 @@ class TestOperatorModule(CUDATestCase):
212
212
 
213
213
  for fn, instr in zip(functions, instrs):
214
214
  with self.subTest(instr=instr):
215
- ptx, _ = compile_ptx(fn, args, cc=(5, 3))
215
+ ptx, _ = compile_ptx(fn, args)
216
216
  self.assertIn(instr, ptx)
217
217
 
218
218
  @skip_unless_cc_53
@@ -255,13 +255,13 @@ class TestOperatorModule(CUDATestCase):
255
255
  @skip_on_cudasim("Compilation unsupported in the simulator")
256
256
  def test_fp16_neg_ptx(self):
257
257
  args = (f2[:], f2)
258
- ptx, _ = compile_ptx(simple_fp16neg, args, cc=(5, 3))
258
+ ptx, _ = compile_ptx(simple_fp16neg, args)
259
259
  self.assertIn("neg.f16", ptx)
260
260
 
261
261
  @skip_on_cudasim("Compilation unsupported in the simulator")
262
262
  def test_fp16_abs_ptx(self):
263
263
  args = (f2[:], f2)
264
- ptx, _ = compile_ptx(simple_fp16abs, args, cc=(5, 3))
264
+ ptx, _ = compile_ptx(simple_fp16abs, args)
265
265
 
266
266
  self.assertIn("abs.f16", ptx)
267
267
 
@@ -396,7 +396,7 @@ class TestOperatorModule(CUDATestCase):
396
396
 
397
397
  for fn, op, s in zip(functions, ops, opstring):
398
398
  with self.subTest(op=op):
399
- ptx, _ = compile_ptx(fn, args, cc=(5, 3))
399
+ ptx, _ = compile_ptx(fn, args)
400
400
  self.assertIn(s, ptx)
401
401
 
402
402
  @skip_on_cudasim("Compilation unsupported in the simulator")
@@ -431,7 +431,7 @@ class TestOperatorModule(CUDATestCase):
431
431
  for fn, op in zip(functions, ops):
432
432
  with self.subTest(op=op):
433
433
  args = (b1[:], f2, from_dtype(np.int8))
434
- ptx, _ = compile_ptx(fn, args, cc=(5, 3))
434
+ ptx, _ = compile_ptx(fn, args)
435
435
  self.assertIn(opstring[op], ptx)
436
436
 
437
437
  @skip_on_cudasim("Compilation unsupported in the simulator")
@@ -475,7 +475,7 @@ class TestOperatorModule(CUDATestCase):
475
475
  with self.subTest(op=op, ty=ty):
476
476
  arg2_ty = np.result_type(np.float16, ty)
477
477
  args = (b1[:], f2, from_dtype(arg2_ty))
478
- ptx, _ = compile_ptx(fn, args, cc=(5, 3))
478
+ ptx, _ = compile_ptx(fn, args)
479
479
 
480
480
  ops = opstring[op] + opsuffix[arg2_ty]
481
481
  self.assertIn(ops, ptx)
@@ -0,0 +1,64 @@
1
+ import unittest
2
+
3
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
4
+ from numba.tests.support import captured_stdout
5
+
6
+
7
+ @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
8
+ class TestCPointer(CUDATestCase):
9
+ """
10
+ Test simple vector addition
11
+ """
12
+
13
+ def setUp(self):
14
+ # Prevent output from this test showing
15
+ # up when running the test suite
16
+ self._captured_stdout = captured_stdout()
17
+ self._captured_stdout.__enter__()
18
+ super().setUp()
19
+
20
+ def tearDown(self):
21
+ # No exception type, value, or traceback
22
+ self._captured_stdout.__exit__(None, None, None)
23
+ super().tearDown()
24
+
25
+ def test_ex_cpointer(self):
26
+ # ex_cpointer.sig.begin
27
+ import numpy as np
28
+ from numba import cuda, types
29
+
30
+ # The first kernel argument is a pointer to a uint8 array.
31
+ # The second argument holds the length as a uint32.
32
+ # The return type of a kernel is always void.
33
+ sig = types.void(types.CPointer(types.uint8), types.uint32)
34
+ # ex_cpointer.sig.end
35
+
36
+ # ex_cpointer.kernel.begin
37
+ @cuda.jit(sig)
38
+ def add_one(x, n):
39
+ i = cuda.grid(1)
40
+ if i < n:
41
+ x[i] += 1
42
+
43
+ # ex_cpointer.kernel.end
44
+
45
+ # ex_cpointer.launch.begin
46
+ x = cuda.to_device(np.arange(10, dtype=np.uint8))
47
+
48
+ # Print initial values of x
49
+ print(x.copy_to_host()) # [0 1 2 3 4 5 6 7 8 9]
50
+
51
+ # Obtain a pointer to the data from from the CUDA Array Interface
52
+ x_ptr = x.__cuda_array_interface__["data"][0]
53
+ x_len = len(x)
54
+
55
+ # Launch the kernel with the pointer and length
56
+ add_one[1, 32](x_ptr, x_len)
57
+
58
+ # Demonstrate that the data was updated by the kernel
59
+ print(x.copy_to_host()) # [ 1 2 3 4 5 6 7 8 9 10]
60
+ # ex_cpointer.launch.end
61
+
62
+
63
+ if __name__ == "__main__":
64
+ unittest.main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: numba-cuda
3
- Version: 0.13.0
3
+ Version: 0.14.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -12,6 +12,27 @@ Requires-Python: >=3.9
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: numba>=0.59.1
15
+ Provides-Extra: cu11
16
+ Requires-Dist: cuda-python==11.8.*; extra == "cu11"
17
+ Requires-Dist: nvidia-cuda-nvcc-cu11; extra == "cu11"
18
+ Requires-Dist: nvidia-cuda-runtime-cu11; extra == "cu11"
19
+ Requires-Dist: nvidia-cuda-nvrtc-cu11; extra == "cu11"
20
+ Provides-Extra: cu12
21
+ Requires-Dist: cuda-python==12.9.*; extra == "cu12"
22
+ Requires-Dist: nvidia-cuda-nvcc-cu12; extra == "cu12"
23
+ Requires-Dist: nvidia-cuda-runtime-cu12; extra == "cu12"
24
+ Requires-Dist: nvidia-cuda-nvrtc-cu12; extra == "cu12"
25
+ Provides-Extra: test
26
+ Requires-Dist: psutil; extra == "test"
27
+ Requires-Dist: cffi; extra == "test"
28
+ Requires-Dist: pytest; extra == "test"
29
+ Provides-Extra: test-cu11
30
+ Requires-Dist: numba-cuda[test]; extra == "test-cu11"
31
+ Requires-Dist: nvidia-curand-cu11; extra == "test-cu11"
32
+ Provides-Extra: test-cu12
33
+ Requires-Dist: numba-cuda[test]; extra == "test-cu12"
34
+ Requires-Dist: nvidia-curand-cu12; extra == "test-cu12"
35
+ Requires-Dist: pynvjitlink-cu12; extra == "test-cu12"
15
36
  Dynamic: license-file
16
37
 
17
38
  <div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>
@@ -1,6 +1,6 @@
1
1
  _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
2
2
  _numba_cuda_redirector.py,sha256=n_r8MYbu5-vcXMnLJW147k8DnFXXvgb7nPIXnlXwTyQ,2659
3
- numba_cuda/VERSION,sha256=2EyeWWx9apTl90V5742JEqgHsNKFgkdJAK0137Pt_PQ,7
3
+ numba_cuda/VERSION,sha256=BlWCZVqs1vyD_3QqVxXAS7Slc5W_PuRVl5j6QsLORYk,7
4
4
  numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
5
5
  numba_cuda/_version.py,sha256=nzrrJXi85d18m6SPdsPsetJNClDETkmF1MrEhGLYDBs,734
6
6
  numba_cuda/numba/cuda/__init__.py,sha256=3siqMXEKqa9ezQ8RxPC3KMdebUjgJt-EKxxV4CX9818,607
@@ -10,18 +10,18 @@ numba_cuda/numba/cuda/args.py,sha256=UlTHTJpwPeCtnW0Bb-Wetm5UO9TPR-PCgIt5ys8b8tQ
10
10
  numba_cuda/numba/cuda/bf16.py,sha256=PXuitxHhPMjnti3g9IOSoL90ofGgVRcDfqFg7AqCXpU,1778
11
11
  numba_cuda/numba/cuda/cg.py,sha256=n-sBj05ut6U_GgFIq-PTCjPad4nXWAc0GVg_J9xD_Pc,1602
12
12
  numba_cuda/numba/cuda/codegen.py,sha256=u2J0mRRDBiPceB1G5WR4KQ0KUFGGawaDaaoUf9zLQzE,16719
13
- numba_cuda/numba/cuda/compiler.py,sha256=aZwEVP8KXCIyccSw4vJyG6Qaai9oXsFuBAo_Ghwwai4,25607
13
+ numba_cuda/numba/cuda/compiler.py,sha256=JeF0PXoIOlL4wCHPkcQN48KTl_Ll90TQ3ZO150Isaa0,26681
14
14
  numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=8lUPmU6FURxphzEqkPLZRPYBCEK_wmDtHq2voPkckfs,950
15
15
  numba_cuda/numba/cuda/cuda_paths.py,sha256=kMIJ_1yV2qtcKEM5rCgSDJ3Gz7bgxbfAWh54E5cDndg,15872
16
16
  numba_cuda/numba/cuda/cudadecl.py,sha256=_TXMu8SIT2hIhsPI0n05wuShtzp8NcPX88NH5y7xauU,22909
17
17
  numba_cuda/numba/cuda/cudaimpl.py,sha256=q6CPqD8ZtJvY8JlpMEN--d6003_FIHoHLBqNP2McNyM,39274
18
18
  numba_cuda/numba/cuda/cudamath.py,sha256=wbGjlyGVwcUAoQjgXIaAaasLdVuDSKHkf6KyID5IYBw,3979
19
- numba_cuda/numba/cuda/debuginfo.py,sha256=5tCw_IEeZfoD6CtFpA_yUGdrq25Q9mFjfxxrudH_VFg,5476
20
- numba_cuda/numba/cuda/decorators.py,sha256=bR8yOAIC68lhm8mSMU-DUt1qFrEogbmSAtzAI4MoToc,9608
19
+ numba_cuda/numba/cuda/debuginfo.py,sha256=br4Ce9Q8AA7FlX8sjpXj0-mUWgs5ttQCP0ma-qayWUE,7812
20
+ numba_cuda/numba/cuda/decorators.py,sha256=NeSHxaiUZyAVJf79UFTctU-7AKLm8dDPERIHbERZPI0,10347
21
21
  numba_cuda/numba/cuda/descriptor.py,sha256=t1rSVJSCAlVACC5_Un3FQ7iubdTTBe-euqz88cvs2tI,985
22
22
  numba_cuda/numba/cuda/device_init.py,sha256=Rtwd6hQMHMLMkj6MXtndbWYFJfkIaRe0MwOIJF2nzhU,3449
23
23
  numba_cuda/numba/cuda/deviceufunc.py,sha256=zj9BbLiZD-dPttHew4olw8ANgR2nXnXEE9qjCeGLrQI,30731
24
- numba_cuda/numba/cuda/dispatcher.py,sha256=m8kXKk08ldcW7Cl3KpFxsKMTxVgZeRJke9bKzO6_JjE,43172
24
+ numba_cuda/numba/cuda/dispatcher.py,sha256=_uaS7jxpquTiG4En2u5eNbOBXYvOIrJebVS-vk9voVU,43467
25
25
  numba_cuda/numba/cuda/errors.py,sha256=WRso1Q_jCoWP5yrDBMhihRhhVtVo1-7KdN8QVE9j46o,1712
26
26
  numba_cuda/numba/cuda/extending.py,sha256=VwuU5F0AQFlJsqaiwoWk-6Itihew1FsjVT_BVjhY8Us,2278
27
27
  numba_cuda/numba/cuda/initialize.py,sha256=0SnpjccQEYiWITIyfAJx833H1yhYFFDY42EpnwYyMn8,487
@@ -32,7 +32,7 @@ numba_cuda/numba/cuda/libdevicedecl.py,sha256=xdZbb_rCaftMf8Pbw63g_Lr230N-1QoaYz
32
32
  numba_cuda/numba/cuda/libdevicefuncs.py,sha256=c80lGpGoFIYkAdgr4fzbxzdNCyJYrLdss64bwa0Mc6w,37471
33
33
  numba_cuda/numba/cuda/libdeviceimpl.py,sha256=m4Fog_OPPEg2RkOk7LEeqF26MK4aEFlKxITlSCZKMAo,2798
34
34
  numba_cuda/numba/cuda/locks.py,sha256=yF6WcwMyzauJ9H7JuCRq2Ynx7kFVAnlkkvmWp7UdZ5w,388
35
- numba_cuda/numba/cuda/lowering.py,sha256=6XXpTRfTBTVHPh1M4jVAL9APvKk1UWSb-A5WJTEMsqQ,1602
35
+ numba_cuda/numba/cuda/lowering.py,sha256=DSco9CZiYcKyL2U22yzg9Z7eW7VA7YA-TZ55ZyZ5wIo,5240
36
36
  numba_cuda/numba/cuda/mathimpl.py,sha256=-8IOkhorbMg8iPBMIdgjk3qJZSyRWYJDwPAWrTMkODI,14356
37
37
  numba_cuda/numba/cuda/models.py,sha256=jbvmbL51mt0Z1nZTSiniBJTFhnOfPzzcVD6xCEpXDMA,1282
38
38
  numba_cuda/numba/cuda/nvvmutils.py,sha256=x-0nCqwkoB8DzX7bSrvTH0h-aKSDx0rVWKR7Eqx4ldA,7993
@@ -52,7 +52,7 @@ numba_cuda/numba/cuda/_internal/cuda_bf16.py,sha256=QYck6s_D85HBEsc__SAl_UZxf7Sp
52
52
  numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=inat2K8K1OVrgDe64FK7CyRmyFyNKcNO4p2_L79yRZ0,201
53
53
  numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=6tF2TYnmjMbKk2fho1ONoD_QsRD9QVTT2kHP7x1u1J0,31556
54
54
  numba_cuda/numba/cuda/cudadrv/devices.py,sha256=k87EDIRhj1ncM9PxJCjZGPFfEks99vzmHlTc55GK5X0,8062
55
- numba_cuda/numba/cuda/cudadrv/driver.py,sha256=63NDga5RLrk6JEiHW1aJDubqCbbHA5uumK3mSYy7SEY,119091
55
+ numba_cuda/numba/cuda/cudadrv/driver.py,sha256=ypF1plUmtHo7pFVI_JsIAJkOAYerj_1eW3rsXmawXJM,119641
56
56
  numba_cuda/numba/cuda/cudadrv/drvapi.py,sha256=OnjYWnmy8ZlSfYouhzyYIpW-AJ3x1YHj32YcBY2xet4,16790
57
57
  numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=2jycZhniMy3ncoVWQG9D8dBehTEeocBZTW43gKHL5Tc,14291
58
58
  numba_cuda/numba/cuda/cudadrv/enums.py,sha256=raWKryxamWQZ5A8ivMpyYVhhwbSpaD9lu7l1_wl2W9M,23742
@@ -62,7 +62,7 @@ numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=IZ13laEG_altDQyi9HkdMcwW-Y
62
62
  numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=9uEs1KepeVGRbEpVhLjtxSsvZpZsbrHnPywmx--y88A,804
63
63
  numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
64
64
  numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=UD8kASyGUU896tNWAtVxmbzDTP5jDbiOAZjCsELOg6U,14986
65
- numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=7tTy6-VEbMBpDUmuSMnUwqPFfBndTh3aPq_n7nxhEA0,26344
65
+ numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=2vq00bifcNvQQGbp0IUaStlFLM5faU9weQ2poWSB0a4,29637
66
66
  numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=J6PRGGK07XSLRzgCw5xs8VU5xVoqavvhojk1mxiQsi4,226
67
67
  numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=CFumwg4iblWap_E7l7GM_hMYz1PsbH81-N0tZwFFooA,4372
68
68
  numba_cuda/numba/cuda/include/11/cuda_bf16.h,sha256=Z7HGJEOhMjQzD0Gs0eq0qdzD-Wr8Zbty-FeeLtahN-s,138713
@@ -130,8 +130,8 @@ numba_cuda/numba/cuda/tests/cudadrv/test_linker.py,sha256=ymv2ujRLLIIURikNEdC0Ss
130
130
  numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py,sha256=2tkf766GjIta_wL5NGlMIqmrDMFN2rZmnP_c9A8cWA8,5084
131
131
  numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py,sha256=176Ma2ZVLnc4w4bfYwbF1eeRq3x3rbOvDieRJLSuNpI,8413
132
132
  numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py,sha256=9MLFEXn7DnLkuuXK_qjilA1jxQwC-AeSBOcRYzZogRY,1513
133
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=8SSSAotk8rhGClwxQCnwL_JhoD9NbvXxEa7KfjaZO3M,11551
134
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py,sha256=1r817QeIrIEs8BcK0XKBR9g_mkO3e7WI-oW-sNsO_Ho,7353
133
+ numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=2BpJ-m3Ue9ZN-NNVkVgPyPyWsffADj_eCtYdiLVJ528,11551
134
+ numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py,sha256=71-Hlng6-HyhfK3i3ITUzHQIHyL3hCv1ubkkJOGt0R4,7400
135
135
  numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py,sha256=PGuv4bt9qiIGlkLhyQCOXFIf1SK5Nj-RjcpWqeO1TMM,943
136
136
  numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py,sha256=xbSFmvqOIcWY-TI9p1MDcGwE-24iaK4j-_UenMvTnR4,508
137
137
  numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py,sha256=bpM9AvL39hUM2kv01lUy3UdlnCmv1BGyzh4rByaUMns,4978
@@ -159,7 +159,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_boolean.py,sha256=j4mIOv4rJTLjJzpKk1O9UF
159
159
  numba_cuda/numba/cuda/tests/cudapy/test_caching.py,sha256=obUSTJSP2Lh-YNElq8PZpVnRJOeq-uqV_VyLHtsXwAw,18427
160
160
  numba_cuda/numba/cuda/tests/cudapy/test_casting.py,sha256=3LaN3ZsSuOZXAZXCV85wYyhh0ih7JqABnjGTa7Y2YBE,8748
161
161
  numba_cuda/numba/cuda/tests/cudapy/test_cffi.py,sha256=tC7ZCA4dkzehS33iz2l35rX6OxE3BTQd9ivV4r74YXs,926
162
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py,sha256=OkCavTZAAcdffdUBYGEmlP_BN7zAH-rWlhr-LqSUUs8,10997
162
+ numba_cuda/numba/cuda/tests/cudapy/test_compiler.py,sha256=4BB1pEC_2XQ9EWixiLXeLTDcP-5H2sAZCPt2_p-njQ4,12908
163
163
  numba_cuda/numba/cuda/tests/cudapy/test_complex.py,sha256=hmAcyZim46yueXZDqDSJYqxXuBGm7wRiZo_q9-SbMlg,10129
164
164
  numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py,sha256=KIuXQ0ihgQQXM-eH7s3xAxhKe35YL1qDTHCVTWA4ut8,497
165
165
  numba_cuda/numba/cuda/tests/cudapy/test_const_string.py,sha256=li1UsV5vc2M01cJ7k6_526VPtuAOAKr8e7kb1CDUXi4,4323
@@ -169,14 +169,14 @@ numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py,sha256=RXCNHAZM3
169
169
  numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py,sha256=8prL2FTiaajW-UHSL9al-nBniygOfpdAOT_Dkej4PWI,2138
170
170
  numba_cuda/numba/cuda/tests/cudapy/test_datetime.py,sha256=MnOeDWMz-rL3-07FsswM06Laxmm0KjTmTwhrP3rmchQ,3526
171
171
  numba_cuda/numba/cuda/tests/cudapy/test_debug.py,sha256=1P369s02AvGu7fSIEe_YxSgh3c6S72Aw1gRgmepDbQY,3383
172
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=AE8D4U4dAv4nYP9oatDwROW6knpJ0-iggP4BaHymo6g,13170
172
+ numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=5TVEbo5DAfF5Z-kDLU6cShgNy18-A1fp0vssE8Gs7D8,15038
173
173
  numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=LNGBZfqFGUtVVQeC6FcHo8T3DbG-j6AjeBwJmwp9HH4,13157
174
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py,sha256=Oc6CdI1j9Ad_wklHdIYSMytrzUpzK6oXD0BGe45sTwg,26636
174
+ numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py,sha256=mu35VClyXQK8tqF9IBc5909FVgtqfHmPUSwQNufJD6A,29609
175
175
  numba_cuda/numba/cuda/tests/cudapy/test_enums.py,sha256=VQGPLcTbT1nhS1BE4VALK-TaQEsPec5zu-XVlWV0sHA,4593
176
176
  numba_cuda/numba/cuda/tests/cudapy/test_errors.py,sha256=w6ipW9UIvUD_ZIt_6fQ-uJsHyKLyHVqv2bym-9vyGyY,2757
177
177
  numba_cuda/numba/cuda/tests/cudapy/test_exception.py,sha256=W5NF022DOOTaEjFmhfr8BnfhRXvYyXHiGwznQrm_9T4,5507
178
178
  numba_cuda/numba/cuda/tests/cudapy/test_extending.py,sha256=G6KcFAiJnDEfa5f7HW72Ocqxrv6xRvGMRTbwttTsuec,8678
179
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py,sha256=fiUoOiwWjctZNFN-DGw1A8eGfHLqNulo2OQ7v1DFS9o,8552
179
+ numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py,sha256=2May_6jJVWlYMvkAjns6UROv6GbK9wu8z2AJC2clJiE,8122
180
180
  numba_cuda/numba/cuda/tests/cudapy/test_forall.py,sha256=Ory5s-_9MauSCP2RuWUEmcGFvP0kS7ytV-3iYPFYR6o,1470
181
181
  numba_cuda/numba/cuda/tests/cudapy/test_freevar.py,sha256=JvWn7Lw137HI61mouKnPvDxZIqLppiCF_351osxQQYE,753
182
182
  numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py,sha256=nm3dK4SEIj_Wmg5iIxgFkFBHc-hLwcFtqu-8rcV7w68,2024
@@ -187,7 +187,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py,sha256=1USofSlavYFa
187
187
  numba_cuda/numba/cuda/tests/cudapy/test_idiv.py,sha256=tTy7hN2LJ4897UzO3EUxjuUzbBcs9QITHJu3s_eknq0,1054
188
188
  numba_cuda/numba/cuda/tests/cudapy/test_inline.py,sha256=T7DHquV_4HuX5fFQQS3kcZzgifTzwYbMFiY7SgQzoLA,4584
189
189
  numba_cuda/numba/cuda/tests/cudapy/test_inspect.py,sha256=L9-62nPmiWC90PST5EZrnGdAcrsbhMS_mbEkwdDkFQ0,4901
190
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=uQ0S_XXds-F9Z5GhuFYzRVXu5XYD1ULa-y55Wi92i5I,36726
190
+ numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=-RGl-0vVFbCMOJFXIc_f2kvtoO6al3wRmh8f24roBpU,36660
191
191
  numba_cuda/numba/cuda/tests/cudapy/test_ipc.py,sha256=bNT6UZgsgeVWyzBrlKXucQW6IKcD6NEmbwV5cFhf-7I,10553
192
192
  numba_cuda/numba/cuda/tests/cudapy/test_iterators.py,sha256=WCRkQfkEnB0d9aj55dVvyQzD4QxrOLubnlKO0xTiNto,2343
193
193
  numba_cuda/numba/cuda/tests/cudapy/test_lang.py,sha256=TP1spLeJfmBKKrU7G3bvkhNPvVm-oQX134taQsZeNbE,1693
@@ -204,7 +204,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py,sha256=rZNVEwf7FqFwFd_O433D9
204
204
  numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py,sha256=9jkdHiaHAFbs7DzrOIDKYsbByB-8B6ucLQUvV9dWJcE,1225
205
205
  numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py,sha256=B6g46b9Ky8G0PlJhoGUf44D_Ayvs1otQ0DoCFPwhBWw,2843
206
206
  numba_cuda/numba/cuda/tests/cudapy/test_nondet.py,sha256=E5hu6MD7FV9JJOK1t9ggVP37EQzpDaCdVd5TjNcmOqU,1378
207
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py,sha256=H9108Y72VNQ3pfJU2ViZcCGeNtDVEeHkseTikV8cWmI,13813
207
+ numba_cuda/numba/cuda/tests/cudapy/test_operator.py,sha256=HKbXyFAGRgkWmtCQRCo0vSnO2TcM4BYDUmxs4jSC7Gs,13736
208
208
  numba_cuda/numba/cuda/tests/cudapy/test_optimization.py,sha256=-sY0U9aQUYTVFQFd8hXuypv2oH6dRY3N8cNSixCMykE,2924
209
209
  numba_cuda/numba/cuda/tests/cudapy/test_overload.py,sha256=BtBI4DxVKbg5i6ftQEmWjtITU25OTbn35WA2pyLWoI8,9107
210
210
  numba_cuda/numba/cuda/tests/cudapy/test_powi.py,sha256=ydwUtozuZlaLqSl440BkYbrUP3p_x6U1boXXcaDbU8c,3264
@@ -245,6 +245,7 @@ numba_cuda/numba/cuda/tests/data/warn.cu,sha256=6L-qsXJIxAr_n3hVMAz_EZ5j0skcJAfg
245
245
  numba_cuda/numba/cuda/tests/data/include/add.cuh,sha256=yv61Ilqge_kjj-_BPO5YWAx3sqJD73gEh66gxYwE8wc,107
246
246
  numba_cuda/numba/cuda/tests/doc_examples/__init__.py,sha256=GdfSq6pRVSOQwmgNi7ZFQ5l0yg4-2gNar_0Rz0buUpM,157
247
247
  numba_cuda/numba/cuda/tests/doc_examples/test_cg.py,sha256=VLWd5_v744Z5QKa4i3JVDLUwA1sxJFQzV5cRG6EkyOI,2888
248
+ numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py,sha256=eMWfbi-dj1uyE6lXfTeSmFYDsZkgQeAEu4vmDg_4AOU,1921
248
249
  numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py,sha256=I4hWDF4DzTTtt3-XmQsP5RzPAO_pWUGsKjVO0hhPOCM,2251
249
250
  numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py,sha256=AtjAzFgZWm1nwOokQyO7D8NVMYGd1QDD3EaUT_RQruQ,4403
250
251
  numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py,sha256=4C_drWYNZq_qGIt-N0fJ9r8DZBaJdO_5h7mxRZ6RcO8,5133
@@ -273,8 +274,8 @@ numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=
273
274
  numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu,sha256=T9ubst3fFUK7EXyXXMi73wAban3VFFQ986cY5OcKfvI,157
274
275
  numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=IB5t-dVhrKVoue3AbUx3yVMxPG0hBF_yZbzb4642sf0,538
275
276
  numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
276
- numba_cuda-0.13.0.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
277
- numba_cuda-0.13.0.dist-info/METADATA,sha256=clEe3q5Jb4S4sixwT6RAgkGqLieoRYtWoyWEqBvSyZk,1859
278
- numba_cuda-0.13.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
279
- numba_cuda-0.13.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
280
- numba_cuda-0.13.0.dist-info/RECORD,,
277
+ numba_cuda-0.14.0.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
278
+ numba_cuda-0.14.0.dist-info/METADATA,sha256=eq4qxmqY97oT9f9_0tBT4EFxrMBsD1Bvj5Ix3he40HM,2799
279
+ numba_cuda-0.14.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
280
+ numba_cuda-0.14.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
281
+ numba_cuda-0.14.0.dist-info/RECORD,,