numba-cuda 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
numba_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.16
1
+ 0.0.18
@@ -59,8 +59,15 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
59
59
  get_cufunc), which may be of different compute capabilities.
60
60
  """
61
61
 
62
- def __init__(self, codegen, name, entry_name=None, max_registers=None,
63
- nvvm_options=None):
62
+ def __init__(
63
+ self,
64
+ codegen,
65
+ name,
66
+ entry_name=None,
67
+ max_registers=None,
68
+ lto=False,
69
+ nvvm_options=None
70
+ ):
64
71
  """
65
72
  codegen:
66
73
  Codegen object.
@@ -71,6 +78,8 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
71
78
  kernel and not a device function.
72
79
  max_registers:
73
80
  The maximum register usage to aim for when linking.
81
+ lto:
82
+ Whether to enable link-time optimization.
74
83
  nvvm_options:
75
84
  Dict of options to pass to NVVM.
76
85
  """
@@ -103,6 +112,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
103
112
  self._cufunc_cache = {}
104
113
 
105
114
  self._max_registers = max_registers
115
+ self._lto = lto
106
116
  if nvvm_options is None:
107
117
  nvvm_options = {}
108
118
  self._nvvm_options = nvvm_options
@@ -178,7 +188,9 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
178
188
  if cubin:
179
189
  return cubin
180
190
 
181
- linker = driver.Linker.new(max_registers=self._max_registers, cc=cc)
191
+ linker = driver.Linker.new(
192
+ max_registers=self._max_registers, cc=cc, lto=self._lto
193
+ )
182
194
 
183
195
  if linker.lto:
184
196
  ltoir = self.get_ltoir(cc=cc)
@@ -10,7 +10,6 @@ subsequent deallocation could further corrupt the CUDA context and causes the
10
10
  system to freeze in some cases.
11
11
 
12
12
  """
13
-
14
13
  import sys
15
14
  import os
16
15
  import ctypes
@@ -19,6 +18,7 @@ import functools
19
18
  import warnings
20
19
  import logging
21
20
  import threading
21
+ import traceback
22
22
  import asyncio
23
23
  import pathlib
24
24
  from itertools import product
@@ -35,6 +35,8 @@ from numba.core import utils, serialize, config
35
35
  from .error import CudaSupportError, CudaDriverError
36
36
  from .drvapi import API_PROTOTYPES
37
37
  from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
38
+ from .mappings import FILE_EXTENSION_MAP
39
+ from .linkable_code import LinkableCode
38
40
  from numba.cuda.cudadrv import enums, drvapi, nvrtc
39
41
 
40
42
  USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
@@ -56,6 +58,52 @@ _py_decref.argtypes = [ctypes.py_object]
56
58
  _py_incref.argtypes = [ctypes.py_object]
57
59
 
58
60
 
61
+ def _readenv(name, ctor, default):
62
+ value = os.environ.get(name)
63
+ if value is None:
64
+ return default() if callable(default) else default
65
+ try:
66
+ if ctor is bool:
67
+ return value.lower() in {'1', "true"}
68
+ return ctor(value)
69
+ except Exception:
70
+ warnings.warn(
71
+ f"Environment variable '{name}' is defined but its associated "
72
+ f"value '{value}' could not be parsed.\n"
73
+ "The parse failed with exception:\n"
74
+ f"{traceback.format_exc()}",
75
+ RuntimeWarning
76
+ )
77
+ return default
78
+
79
+
80
+ _MVC_ERROR_MESSAGE = (
81
+ "Minor version compatibility requires ptxcompiler and cubinlinker packages "
82
+ "to be available"
83
+ )
84
+
85
+ ENABLE_PYNVJITLINK = (
86
+ _readenv("NUMBA_CUDA_ENABLE_PYNVJITLINK", bool, False)
87
+ or getattr(config, "CUDA_ENABLE_PYNVJITLINK", False)
88
+ )
89
+ if not hasattr(config, "CUDA_ENABLE_PYNVJITLINK"):
90
+ config.CUDA_ENABLE_PYNVJITLINK = ENABLE_PYNVJITLINK
91
+
92
+ if ENABLE_PYNVJITLINK:
93
+ try:
94
+ from pynvjitlink.api import NvJitLinker, NvJitLinkError
95
+ except ImportError:
96
+ raise ImportError(
97
+ "Using pynvjitlink requires the pynvjitlink package to be available"
98
+ )
99
+
100
+ if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
101
+ raise ValueError(
102
+ "Can't set CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY and "
103
+ "CUDA_ENABLE_PYNVJITLINK at the same time"
104
+ )
105
+
106
+
59
107
  def make_logger():
60
108
  logger = logging.getLogger(__name__)
61
109
  # is logging configured?
@@ -432,7 +480,7 @@ class Driver(object):
432
480
 
433
481
  def get_version(self):
434
482
  """
435
- Returns the CUDA Runtime version as a tuple (major, minor).
483
+ Returns the CUDA Driver version as a tuple (major, minor).
436
484
  """
437
485
  if USE_NV_BINDING:
438
486
  version = driver.cuDriverGetVersion()
@@ -2546,38 +2594,47 @@ def launch_kernel(cufunc_handle,
2546
2594
  extra)
2547
2595
 
2548
2596
 
2549
- if USE_NV_BINDING:
2550
- jitty = binding.CUjitInputType
2551
- FILE_EXTENSION_MAP = {
2552
- 'o': jitty.CU_JIT_INPUT_OBJECT,
2553
- 'ptx': jitty.CU_JIT_INPUT_PTX,
2554
- 'a': jitty.CU_JIT_INPUT_LIBRARY,
2555
- 'lib': jitty.CU_JIT_INPUT_LIBRARY,
2556
- 'cubin': jitty.CU_JIT_INPUT_CUBIN,
2557
- 'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
2558
- }
2559
- else:
2560
- FILE_EXTENSION_MAP = {
2561
- 'o': enums.CU_JIT_INPUT_OBJECT,
2562
- 'ptx': enums.CU_JIT_INPUT_PTX,
2563
- 'a': enums.CU_JIT_INPUT_LIBRARY,
2564
- 'lib': enums.CU_JIT_INPUT_LIBRARY,
2565
- 'cubin': enums.CU_JIT_INPUT_CUBIN,
2566
- 'fatbin': enums.CU_JIT_INPUT_FATBINARY,
2567
- }
2568
-
2569
-
2570
2597
  class Linker(metaclass=ABCMeta):
2571
2598
  """Abstract base class for linkers"""
2572
2599
 
2573
2600
  @classmethod
2574
- def new(cls, max_registers=0, lineinfo=False, cc=None):
2575
- if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
2576
- return MVCLinker(max_registers, lineinfo, cc)
2577
- elif USE_NV_BINDING:
2578
- return CudaPythonLinker(max_registers, lineinfo, cc)
2601
+ def new(cls,
2602
+ max_registers=0,
2603
+ lineinfo=False,
2604
+ cc=None,
2605
+ lto=None,
2606
+ additional_flags=None
2607
+ ):
2608
+
2609
+ driver_ver = driver.get_version()
2610
+ if (
2611
+ config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY
2612
+ and driver_ver >= (12, 0)
2613
+ ):
2614
+ raise ValueError(
2615
+ "Use CUDA_ENABLE_PYNVJITLINK for CUDA >= 12.0 MVC"
2616
+ )
2617
+ if config.CUDA_ENABLE_PYNVJITLINK and driver_ver < (12, 0):
2618
+ raise ValueError(
2619
+ "Enabling pynvjitlink requires CUDA 12."
2620
+ )
2621
+ if config.CUDA_ENABLE_PYNVJITLINK:
2622
+ linker = PyNvJitLinker
2623
+
2624
+ elif config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
2625
+ linker = MVCLinker
2626
+ else:
2627
+ if USE_NV_BINDING:
2628
+ linker = CudaPythonLinker
2629
+ else:
2630
+ linker = CtypesLinker
2631
+
2632
+ if linker is PyNvJitLinker:
2633
+ return linker(max_registers, lineinfo, cc, lto, additional_flags)
2634
+ elif additional_flags or lto:
2635
+ raise ValueError("LTO and additional flags require PyNvJitLinker")
2579
2636
  else:
2580
- return CtypesLinker(max_registers, lineinfo, cc)
2637
+ return linker(max_registers, lineinfo, cc)
2581
2638
 
2582
2639
  @abstractmethod
2583
2640
  def __init__(self, max_registers, lineinfo, cc):
@@ -2626,19 +2683,42 @@ class Linker(metaclass=ABCMeta):
2626
2683
  cu = f.read()
2627
2684
  self.add_cu(cu, os.path.basename(path))
2628
2685
 
2629
- def add_file_guess_ext(self, path):
2630
- """Add a file to the link, guessing its type from its extension."""
2631
- ext = os.path.splitext(path)[1][1:]
2632
- if ext == '':
2633
- raise RuntimeError("Don't know how to link file with no extension")
2634
- elif ext == 'cu':
2635
- self.add_cu_file(path)
2686
+ def add_file_guess_ext(self, path_or_code):
2687
+ """
2688
+ Add a file or LinkableCode object to the link. If a file is
2689
+ passed, the type will be inferred from the extension. A LinkableCode
2690
+ object represents a file already in memory.
2691
+ """
2692
+ if isinstance(path_or_code, str):
2693
+ ext = pathlib.Path(path_or_code).suffix
2694
+ if ext == '':
2695
+ raise RuntimeError(
2696
+ "Don't know how to link file with no extension"
2697
+ )
2698
+ elif ext == '.cu':
2699
+ self.add_cu_file(path_or_code)
2700
+ else:
2701
+ kind = FILE_EXTENSION_MAP.get(ext.lstrip('.'), None)
2702
+ if kind is None:
2703
+ raise RuntimeError(
2704
+ "Don't know how to link file with extension "
2705
+ f"{ext}"
2706
+ )
2707
+ self.add_file(path_or_code, kind)
2708
+ return
2636
2709
  else:
2637
- kind = FILE_EXTENSION_MAP.get(ext, None)
2638
- if kind is None:
2639
- raise RuntimeError("Don't know how to link file with extension "
2640
- f".{ext}")
2641
- self.add_file(path, kind)
2710
+ # Otherwise, we should have been given a LinkableCode object
2711
+ if not isinstance(path_or_code, LinkableCode):
2712
+ raise TypeError(
2713
+ "Expected path to file or a LinkableCode object"
2714
+ )
2715
+
2716
+ if path_or_code.kind == "cu":
2717
+ self.add_cu(path_or_code.data, path_or_code.name)
2718
+ else:
2719
+ self.add_data(
2720
+ path_or_code.data, path_or_code.kind, path_or_code.name
2721
+ )
2642
2722
 
2643
2723
  @abstractmethod
2644
2724
  def complete(self):
@@ -2649,12 +2729,6 @@ class Linker(metaclass=ABCMeta):
2649
2729
  """
2650
2730
 
2651
2731
 
2652
- _MVC_ERROR_MESSAGE = (
2653
- "Minor version compatibility requires ptxcompiler and cubinlinker packages "
2654
- "to be available"
2655
- )
2656
-
2657
-
2658
2732
  class MVCLinker(Linker):
2659
2733
  """
2660
2734
  Linker supporting Minor Version Compatibility, backed by the cubinlinker
@@ -2930,6 +3004,94 @@ class CudaPythonLinker(Linker):
2930
3004
  return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))
2931
3005
 
2932
3006
 
3007
+ class PyNvJitLinker(Linker):
3008
+ def __init__(
3009
+ self,
3010
+ max_registers=None,
3011
+ lineinfo=False,
3012
+ cc=None,
3013
+ lto=False,
3014
+ additional_flags=None,
3015
+ ):
3016
+
3017
+ if cc is None:
3018
+ raise RuntimeError("PyNvJitLinker requires CC to be specified")
3019
+ if not any(isinstance(cc, t) for t in [list, tuple]):
3020
+ raise TypeError("`cc` must be a list or tuple of length 2")
3021
+
3022
+ sm_ver = f"{cc[0] * 10 + cc[1]}"
3023
+ arch = f"-arch=sm_{sm_ver}"
3024
+ options = [arch]
3025
+ if max_registers:
3026
+ options.append(f"-maxrregcount={max_registers}")
3027
+ if lineinfo:
3028
+ options.append("-lineinfo")
3029
+ if lto:
3030
+ options.append("-lto")
3031
+ if additional_flags is not None:
3032
+ options.extend(additional_flags)
3033
+
3034
+ self._linker = NvJitLinker(*options)
3035
+ self.lto = lto
3036
+ self.options = options
3037
+
3038
+ @property
3039
+ def info_log(self):
3040
+ return self._linker.info_log
3041
+
3042
+ @property
3043
+ def error_log(self):
3044
+ return self._linker.error_log
3045
+
3046
+ def add_ptx(self, ptx, name="<cudapy-ptx>"):
3047
+ self._linker.add_ptx(ptx, name)
3048
+
3049
+ def add_fatbin(self, fatbin, name="<external-fatbin>"):
3050
+ self._linker.add_fatbin(fatbin, name)
3051
+
3052
+ def add_ltoir(self, ltoir, name="<external-ltoir>"):
3053
+ self._linker.add_ltoir(ltoir, name)
3054
+
3055
+ def add_object(self, obj, name="<external-object>"):
3056
+ self._linker.add_object(obj, name)
3057
+
3058
+ def add_file(self, path, kind):
3059
+ try:
3060
+ with open(path, "rb") as f:
3061
+ data = f.read()
3062
+ except FileNotFoundError:
3063
+ raise LinkerError(f"{path} not found")
3064
+
3065
+ name = pathlib.Path(path).name
3066
+ self.add_data(data, kind, name)
3067
+
3068
+ def add_data(self, data, kind, name):
3069
+ if kind == FILE_EXTENSION_MAP["cubin"]:
3070
+ fn = self._linker.add_cubin
3071
+ elif kind == FILE_EXTENSION_MAP["fatbin"]:
3072
+ fn = self._linker.add_fatbin
3073
+ elif kind == FILE_EXTENSION_MAP["a"]:
3074
+ fn = self._linker.add_library
3075
+ elif kind == FILE_EXTENSION_MAP["ptx"]:
3076
+ return self.add_ptx(data, name)
3077
+ elif kind == FILE_EXTENSION_MAP["o"]:
3078
+ fn = self._linker.add_object
3079
+ elif kind == FILE_EXTENSION_MAP["ltoir"]:
3080
+ fn = self._linker.add_ltoir
3081
+ else:
3082
+ raise LinkerError(f"Don't know how to link {kind}")
3083
+
3084
+ try:
3085
+ fn(data, name)
3086
+ except NvJitLinkError as e:
3087
+ raise LinkerError from e
3088
+
3089
+ def complete(self):
3090
+ try:
3091
+ return self._linker.get_linked_cubin()
3092
+ except NvJitLinkError as e:
3093
+ raise LinkerError from e
3094
+
2933
3095
  # -----------------------------------------------------------------------------
2934
3096
 
2935
3097
 
@@ -309,6 +309,9 @@ CU_JIT_INPUT_OBJECT = 3
309
309
  # Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
310
310
  CU_JIT_INPUT_LIBRARY = 4
311
311
 
312
+ # LTO IR
313
+ CU_JIT_INPUT_NVVM = 5
314
+
312
315
  CU_JIT_NUM_INPUT_TYPES = 6
313
316
 
314
317
 
@@ -0,0 +1,63 @@
1
+ from .mappings import FILE_EXTENSION_MAP
2
+
3
+
4
+ class LinkableCode:
5
+ """An object that can be passed in the `link` list argument to `@cuda.jit`
6
+ kernels to supply code to be linked from memory."""
7
+
8
+ def __init__(self, data, name=None):
9
+ self.data = data
10
+ self._name = name
11
+
12
+ @property
13
+ def name(self):
14
+ return self._name or self.default_name
15
+
16
+
17
+ class PTXSource(LinkableCode):
18
+ """PTX Source code in memory"""
19
+
20
+ kind = FILE_EXTENSION_MAP["ptx"]
21
+ default_name = "<unnamed-ptx>"
22
+
23
+
24
+ class CUSource(LinkableCode):
25
+ """CUDA C/C++ Source code in memory"""
26
+
27
+ kind = "cu"
28
+ default_name = "<unnamed-cu>"
29
+
30
+
31
+ class Fatbin(LinkableCode):
32
+ """A fatbin ELF in memory"""
33
+
34
+ kind = FILE_EXTENSION_MAP["fatbin"]
35
+ default_name = "<unnamed-fatbin>"
36
+
37
+
38
+ class Cubin(LinkableCode):
39
+ """A cubin ELF in memory"""
40
+
41
+ kind = FILE_EXTENSION_MAP["cubin"]
42
+ default_name = "<unnamed-cubin>"
43
+
44
+
45
+ class Archive(LinkableCode):
46
+ """An archive of objects in memory"""
47
+
48
+ kind = FILE_EXTENSION_MAP["a"]
49
+ default_name = "<unnamed-archive>"
50
+
51
+
52
+ class Object(LinkableCode):
53
+ """An object file in memory"""
54
+
55
+ kind = FILE_EXTENSION_MAP["o"]
56
+ default_name = "<unnamed-object>"
57
+
58
+
59
+ class LTOIR(LinkableCode):
60
+ """An LTOIR file in memory"""
61
+
62
+ kind = "ltoir"
63
+ default_name = "<unnamed-ltoir>"
@@ -0,0 +1,24 @@
1
+ from numba import config
2
+ from . import enums
3
+ if config.CUDA_USE_NVIDIA_BINDING:
4
+ from cuda import cuda
5
+ jitty = cuda.CUjitInputType
6
+ FILE_EXTENSION_MAP = {
7
+ 'o': jitty.CU_JIT_INPUT_OBJECT,
8
+ 'ptx': jitty.CU_JIT_INPUT_PTX,
9
+ 'a': jitty.CU_JIT_INPUT_LIBRARY,
10
+ 'lib': jitty.CU_JIT_INPUT_LIBRARY,
11
+ 'cubin': jitty.CU_JIT_INPUT_CUBIN,
12
+ 'fatbin': jitty.CU_JIT_INPUT_FATBINARY,
13
+ 'ltoir': jitty.CU_JIT_INPUT_NVVM,
14
+ }
15
+ else:
16
+ FILE_EXTENSION_MAP = {
17
+ 'o': enums.CU_JIT_INPUT_OBJECT,
18
+ 'ptx': enums.CU_JIT_INPUT_PTX,
19
+ 'a': enums.CU_JIT_INPUT_LIBRARY,
20
+ 'lib': enums.CU_JIT_INPUT_LIBRARY,
21
+ 'cubin': enums.CU_JIT_INPUT_CUBIN,
22
+ 'fatbin': enums.CU_JIT_INPUT_FATBINARY,
23
+ 'ltoir': enums.CU_JIT_INPUT_NVVM,
24
+ }
@@ -31,6 +31,9 @@ from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
31
31
  shfl_xor_sync)
32
32
 
33
33
  from .kernels import reduction
34
+ from numba.cuda.cudadrv.linkable_code import (
35
+ Archive, CUSource, Cubin, Fatbin, LinkableCode, LTOIR, Object, PTXSource
36
+ )
34
37
 
35
38
  reduce = Reduce = reduction.Reduce
36
39
 
@@ -46,7 +46,7 @@ class _Kernel(serialize.ReduceMixin):
46
46
  @global_compiler_lock
47
47
  def __init__(self, py_func, argtypes, link=None, debug=False,
48
48
  lineinfo=False, inline=False, fastmath=False, extensions=None,
49
- max_registers=None, opt=True, device=False):
49
+ max_registers=None, lto=False, opt=True, device=False):
50
50
 
51
51
  if device:
52
52
  raise RuntimeError('Cannot compile a device function as a kernel')
@@ -94,7 +94,7 @@ class _Kernel(serialize.ReduceMixin):
94
94
  lib, kernel = tgt_ctx.prepare_cuda_kernel(cres.library, cres.fndesc,
95
95
  debug, lineinfo, nvvm_options,
96
96
  filename, linenum,
97
- max_registers)
97
+ max_registers, lto)
98
98
 
99
99
  if not link:
100
100
  link = []
@@ -4,7 +4,7 @@ from numba import cuda, types
4
4
  from numba.core import cgutils
5
5
  from numba.core.errors import RequireLiteralValue
6
6
  from numba.core.typing import signature
7
- from numba.core.extending import overload_attribute
7
+ from numba.core.extending import overload_attribute, overload_method
8
8
  from numba.cuda import nvvmutils
9
9
  from numba.cuda.extending import intrinsic
10
10
 
@@ -196,3 +196,8 @@ def syncthreads_or(typingctx, predicate):
196
196
  '''
197
197
  fname = 'llvm.nvvm.barrier0.or'
198
198
  return _syncthreads_predicate(typingctx, predicate, fname)
199
+
200
+
201
+ @overload_method(types.Integer, 'bit_count', target='cuda')
202
+ def integer_bit_count(i):
203
+ return lambda i: cuda.popc(i)
@@ -63,6 +63,17 @@ def dim3_print_impl(ty, context, builder, val):
63
63
  return rawfmt, [x, y, z]
64
64
 
65
65
 
66
+ @print_item.register(types.Boolean)
67
+ def bool_print_impl(ty, context, builder, val):
68
+ true_string = context.insert_string_const_addrspace(builder, "True")
69
+ false_string = context.insert_string_const_addrspace(builder, "False")
70
+ res_ptr = cgutils.alloca_once_value(builder, false_string)
71
+ with builder.if_then(val):
72
+ builder.store(true_string, res_ptr)
73
+ rawfmt = "%s"
74
+ return rawfmt, [builder.load(res_ptr)]
75
+
76
+
66
77
  @lower(print, types.VarArg(types.Any))
67
78
  def print_varargs(context, builder, sig, args):
68
79
  """This function is a generic 'print' wrapper for arbitrary types.
@@ -148,7 +148,7 @@ class CUDATargetContext(BaseContext):
148
148
 
149
149
  def prepare_cuda_kernel(self, codelib, fndesc, debug, lineinfo,
150
150
  nvvm_options, filename, linenum,
151
- max_registers=None):
151
+ max_registers=None, lto=False):
152
152
  """
153
153
  Adapt a code library ``codelib`` with the numba compiled CUDA kernel
154
154
  with name ``fname`` and arguments ``argtypes`` for NVVM.
@@ -175,7 +175,9 @@ class CUDATargetContext(BaseContext):
175
175
  library = self.codegen().create_library(f'{codelib.name}_kernel_',
176
176
  entry_name=kernel_name,
177
177
  nvvm_options=nvvm_options,
178
- max_registers=max_registers)
178
+ max_registers=max_registers,
179
+ lto=lto
180
+ )
179
181
  library.add_linking_library(codelib)
180
182
  wrapper = self.generate_kernel_wrapper(library, fndesc, kernel_name,
181
183
  debug, lineinfo, filename,
@@ -0,0 +1,199 @@
1
+ from numba.cuda.testing import unittest
2
+ from numba.cuda.testing import skip_on_cudasim
3
+ from numba.cuda.testing import CUDATestCase
4
+ from numba.cuda.cudadrv.driver import PyNvJitLinker
5
+
6
+ import itertools
7
+ import os
8
+ from numba.cuda import get_current_device
9
+ from numba import cuda
10
+ from numba import config
11
+
12
+ TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
13
+ if TEST_BIN_DIR:
14
+ test_device_functions_a = os.path.join(
15
+ TEST_BIN_DIR, "test_device_functions.a"
16
+ )
17
+ test_device_functions_cubin = os.path.join(
18
+ TEST_BIN_DIR, "test_device_functions.cubin"
19
+ )
20
+ test_device_functions_cu = os.path.join(
21
+ TEST_BIN_DIR, "test_device_functions.cu"
22
+ )
23
+ test_device_functions_fatbin = os.path.join(
24
+ TEST_BIN_DIR, "test_device_functions.fatbin"
25
+ )
26
+ test_device_functions_o = os.path.join(
27
+ TEST_BIN_DIR, "test_device_functions.o"
28
+ )
29
+ test_device_functions_ptx = os.path.join(
30
+ TEST_BIN_DIR, "test_device_functions.ptx"
31
+ )
32
+ test_device_functions_ltoir = os.path.join(
33
+ TEST_BIN_DIR, "test_device_functions.ltoir"
34
+ )
35
+
36
+
37
+ @unittest.skipIf(
38
+ not config.CUDA_ENABLE_PYNVJITLINK or not TEST_BIN_DIR,
39
+ "pynvjitlink not enabled"
40
+ )
41
+ @skip_on_cudasim("Linking unsupported in the simulator")
42
+ class TestLinker(CUDATestCase):
43
+ _NUMBA_NVIDIA_BINDING_0_ENV = {"NUMBA_CUDA_USE_NVIDIA_BINDING": "0"}
44
+
45
+ def test_nvjitlink_create(self):
46
+ patched_linker = PyNvJitLinker(cc=(7, 5))
47
+ assert "-arch=sm_75" in patched_linker.options
48
+
49
+ def test_nvjitlink_create_no_cc_error(self):
50
+ # nvJitLink expects at least the architecture to be specified.
51
+ with self.assertRaisesRegex(
52
+ RuntimeError, "PyNvJitLinker requires CC to be specified"
53
+ ):
54
+ PyNvJitLinker()
55
+
56
+ def test_nvjitlink_invalid_arch_error(self):
57
+ from pynvjitlink.api import NvJitLinkError
58
+
59
+ # CC 0.0 is not a valid compute capability
60
+ with self.assertRaisesRegex(
61
+ NvJitLinkError, "NVJITLINK_ERROR_UNRECOGNIZED_OPTION error"
62
+ ):
63
+ PyNvJitLinker(cc=(0, 0))
64
+
65
+ def test_nvjitlink_invalid_cc_type_error(self):
66
+ with self.assertRaisesRegex(
67
+ TypeError, "`cc` must be a list or tuple of length 2"
68
+ ):
69
+ PyNvJitLinker(cc=0)
70
+
71
+ def test_nvjitlink_ptx_compile_options(self):
72
+
73
+ max_registers = (None, 32)
74
+ lineinfo = (False, True)
75
+ lto = (False, True)
76
+ additional_flags = (None, ("-g",), ("-g", "-time"))
77
+ for (
78
+ max_registers_i,
79
+ line_info_i,
80
+ lto_i,
81
+ additional_flags_i,
82
+ ) in itertools.product(max_registers, lineinfo, lto, additional_flags):
83
+ with self.subTest(
84
+ max_registers=max_registers_i,
85
+ lineinfo=line_info_i,
86
+ lto=lto_i,
87
+ additional_flags=additional_flags_i,
88
+ ):
89
+ patched_linker = PyNvJitLinker(
90
+ cc=(7, 5),
91
+ max_registers=max_registers_i,
92
+ lineinfo=line_info_i,
93
+ lto=lto_i,
94
+ additional_flags=additional_flags_i,
95
+ )
96
+ assert "-arch=sm_75" in patched_linker.options
97
+
98
+ if max_registers_i:
99
+ assert (
100
+ f"-maxrregcount={max_registers_i}"
101
+ in patched_linker.options
102
+ )
103
+ else:
104
+ assert "-maxrregcount" not in patched_linker.options
105
+
106
+ if line_info_i:
107
+ assert "-lineinfo" in patched_linker.options
108
+ else:
109
+ assert "-lineinfo" not in patched_linker.options
110
+
111
+ if lto_i:
112
+ assert "-lto" in patched_linker.options
113
+ else:
114
+ assert "-lto" not in patched_linker.options
115
+
116
+ if additional_flags_i:
117
+ for flag in additional_flags_i:
118
+ assert flag in patched_linker.options
119
+
120
+ def test_nvjitlink_add_file_guess_ext_linkable_code(self):
121
+ files = (
122
+ test_device_functions_a,
123
+ test_device_functions_cubin,
124
+ test_device_functions_cu,
125
+ test_device_functions_fatbin,
126
+ test_device_functions_o,
127
+ test_device_functions_ptx,
128
+ )
129
+ for file in files:
130
+ with self.subTest(file=file):
131
+ patched_linker = PyNvJitLinker(
132
+ cc=get_current_device().compute_capability
133
+ )
134
+ patched_linker.add_file_guess_ext(file)
135
+
136
+ def test_nvjitlink_test_add_file_guess_ext_invalid_input(self):
137
+ with open(test_device_functions_cubin, "rb") as f:
138
+ content = f.read()
139
+
140
+ patched_linker = PyNvJitLinker(
141
+ cc=get_current_device().compute_capability
142
+ )
143
+ with self.assertRaisesRegex(
144
+ TypeError, "Expected path to file or a LinkableCode"
145
+ ):
146
+ # Feeding raw data as bytes to add_file_guess_ext should raise,
147
+ # because there's no way to know what kind of file to treat it as
148
+ patched_linker.add_file_guess_ext(content)
149
+
150
+ def test_nvjitlink_jit_with_linkable_code(self):
151
+ files = (
152
+ test_device_functions_a,
153
+ test_device_functions_cubin,
154
+ test_device_functions_cu,
155
+ test_device_functions_fatbin,
156
+ test_device_functions_o,
157
+ test_device_functions_ptx,
158
+ )
159
+ for file in files:
160
+ with self.subTest(file=file):
161
+ sig = "uint32(uint32, uint32)"
162
+ add_from_numba = cuda.declare_device("add_from_numba", sig)
163
+
164
+ @cuda.jit(link=[file])
165
+ def kernel(result):
166
+ result[0] = add_from_numba(1, 2)
167
+
168
+ result = cuda.device_array(1)
169
+ kernel[1, 1](result)
170
+ assert result[0] == 3
171
+
172
+ def test_nvjitlink_jit_with_linkable_code_lto(self):
173
+ file = test_device_functions_ltoir
174
+
175
+ sig = "uint32(uint32, uint32)"
176
+ add_from_numba = cuda.declare_device("add_from_numba", sig)
177
+
178
+ @cuda.jit(link=[file], lto=True)
179
+ def kernel(result):
180
+ result[0] = add_from_numba(1, 2)
181
+
182
+ result = cuda.device_array(1)
183
+ kernel[1, 1](result)
184
+ assert result[0] == 3
185
+
186
+ def test_nvjitlink_jit_with_invalid_linkable_code(self):
187
+ with open(test_device_functions_cubin, "rb") as f:
188
+ content = f.read()
189
+ with self.assertRaisesRegex(
190
+ TypeError, "Expected path to file or a LinkableCode"
191
+ ):
192
+
193
+ @cuda.jit("void()", link=[content])
194
+ def kernel():
195
+ pass
196
+
197
+
198
+ if __name__ == "__main__":
199
+ unittest.main()
@@ -68,6 +68,10 @@ def simple_popc(ary, c):
68
68
  ary[0] = cuda.popc(c)
69
69
 
70
70
 
71
+ def simple_bit_count(ary, c):
72
+ ary[0] = c.bit_count()
73
+
74
+
71
75
  def simple_fma(ary, a, b, c):
72
76
  ary[0] = cuda.fma(a, b, c)
73
77
 
@@ -550,17 +554,53 @@ class TestCudaIntrinsic(CUDATestCase):
550
554
 
551
555
  self.assertTrue(np.all(arr))
552
556
 
557
+ def test_popc_u1(self):
558
+ compiled = cuda.jit("void(int32[:], uint8)")(simple_popc)
559
+ ary = np.zeros(1, dtype=np.int8)
560
+ compiled[1, 1](ary, np.uint8(0xFF))
561
+ self.assertEqual(ary[0], 8)
562
+
563
+ def test_popc_u2(self):
564
+ compiled = cuda.jit("void(int32[:], uint16)")(simple_popc)
565
+ ary = np.zeros(1, dtype=np.int16)
566
+ compiled[1, 1](ary, np.uint16(0xFFFF))
567
+ self.assertEqual(ary[0], 16)
568
+
553
569
  def test_popc_u4(self):
554
570
  compiled = cuda.jit("void(int32[:], uint32)")(simple_popc)
555
571
  ary = np.zeros(1, dtype=np.int32)
556
- compiled[1, 1](ary, 0xF0)
557
- self.assertEqual(ary[0], 4)
572
+ compiled[1, 1](ary, np.uint32(0xFFFFFFFF))
573
+ self.assertEqual(ary[0], 32)
558
574
 
559
575
  def test_popc_u8(self):
560
576
  compiled = cuda.jit("void(int32[:], uint64)")(simple_popc)
561
577
  ary = np.zeros(1, dtype=np.int32)
562
- compiled[1, 1](ary, 0xF00000000000)
563
- self.assertEqual(ary[0], 4)
578
+ compiled[1, 1](ary, np.uint64(0xFFFFFFFFFFFFFFFF))
579
+ self.assertEqual(ary[0], 64)
580
+
581
+ def test_bit_count_u1(self):
582
+ compiled = cuda.jit("void(int32[:], uint8)")(simple_bit_count)
583
+ ary = np.zeros(1, dtype=np.int8)
584
+ compiled[1, 1](ary, np.uint8(0xFF))
585
+ self.assertEqual(ary[0], 8)
586
+
587
+ def test_bit_count_u2(self):
588
+ compiled = cuda.jit("void(int32[:], uint16)")(simple_bit_count)
589
+ ary = np.zeros(1, dtype=np.int16)
590
+ compiled[1, 1](ary, np.uint16(0xFFFF))
591
+ self.assertEqual(ary[0], 16)
592
+
593
+ def test_bit_count_u4(self):
594
+ compiled = cuda.jit("void(int32[:], uint32)")(simple_bit_count)
595
+ ary = np.zeros(1, dtype=np.int32)
596
+ compiled[1, 1](ary, np.uint32(0xFFFFFFFF))
597
+ self.assertEqual(ary[0], 32)
598
+
599
+ def test_bit_count_u8(self):
600
+ compiled = cuda.jit("void(int32[:], uint64)")(simple_bit_count)
601
+ ary = np.zeros(1, dtype=np.int32)
602
+ compiled[1, 1](ary, np.uint64(0xFFFFFFFFFFFFFFFF))
603
+ self.assertEqual(ary[0], 64)
564
604
 
565
605
  def test_fma_f4(self):
566
606
  compiled = cuda.jit("void(f4[:], f4, f4, f4)")(simple_fma)
@@ -32,6 +32,21 @@ cuda.synchronize()
32
32
  """
33
33
 
34
34
 
35
+ printbool_usecase = """\
36
+ from numba import cuda
37
+
38
+ @cuda.jit
39
+ def printbool(x):
40
+ print(True)
41
+ print(False)
42
+ print(x == 0)
43
+
44
+ printbool[1, 1](0)
45
+ printbool[1, 1](1)
46
+ cuda.synchronize()
47
+ """
48
+
49
+
35
50
  printstring_usecase = """\
36
51
  from numba import cuda
37
52
 
@@ -109,6 +124,11 @@ class TestPrint(CUDATestCase):
109
124
  expected_cases = ["0 23 34.750000 321", "0 23 34.75 321"]
110
125
  self.assertIn(output.strip(), expected_cases)
111
126
 
127
+ def test_bool(self):
128
+ output, _ = self.run_code(printbool_usecase)
129
+ expected = "True\nFalse\nTrue\nTrue\nFalse\nFalse"
130
+ self.assertEqual(output.strip(), expected)
131
+
112
132
  def test_printempty(self):
113
133
  output, _ = self.run_code(printempty_usecase)
114
134
  self.assertEqual(output.strip(), "")
@@ -0,0 +1,51 @@
1
+ # Generates the input files used by the pynvjitlink binding test suite
2
+
3
+ # Test binaries are built taking into account the CC of the GPU in the test machine
4
+ GPU_CC := $(shell nvidia-smi --query-gpu=compute_cap --format=csv | grep -v compute_cap | head -n 1 | sed 's/\.//')
5
+ GPU_CC ?= 75
6
+
7
+ # Use CC 7.0 as an alternative in fatbin testing, unless CC is 7.x
8
+ ifeq ($(shell echo "$(GPU_CC)" | cut -c1),7)
9
+ ALT_CC := 80
10
+ else
11
+ ALT_CC := 70
12
+ endif
13
+
14
+ # Gencode flags suitable for most tests
15
+ GENCODE := -gencode arch=compute_$(GPU_CC),code=sm_$(GPU_CC)
16
+
17
+ # Fatbin tests need to generate code for an additional compute capability
18
+ FATBIN_GENCODE := $(GENCODE) -gencode arch=compute_$(ALT_CC),code=sm_$(ALT_CC)
19
+
20
+ # LTO-IR tests need to generate for the LTO "architecture" instead
21
+ LTOIR_GENCODE := -gencode arch=lto_$(GPU_CC),code=lto_$(GPU_CC)
22
+
23
+ # Compile with optimization; use relocatable device code to preserve device
24
+ # functions in the final output
25
+ NVCC_FLAGS := -O3 -rdc true
26
+
27
+ # Flags specific to output type
28
+ CUBIN_FLAGS := $(GENCODE) --cubin
29
+ PTX_FLAGS := $(GENCODE) -ptx
30
+ OBJECT_FLAGS := $(GENCODE) -dc
31
+ LIBRARY_FLAGS := $(GENCODE) -lib
32
+ FATBIN_FLAGS := $(FATBIN_GENCODE) --fatbin
33
+ LTOIR_FLAGS := $(LTOIR_GENCODE) -dc
34
+
35
+ OUTPUT_DIR := ./
36
+
37
+ all:
38
+ @echo "GPU CC: $(GPU_CC)"
39
+ @echo "Alternative CC: $(ALT_CC)"
40
+ # Compile all test objects
41
+ nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/undefined_extern.cubin undefined_extern.cu
42
+ nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.cubin test_device_functions.cu
43
+ nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.fatbin test_device_functions.cu
44
+ nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ptx test_device_functions.cu
45
+ nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.o test_device_functions.cu
46
+ nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.a test_device_functions.cu
47
+
48
+ # Generate LTO-IR wrapped in a fatbin
49
+ nvcc $(NVCC_FLAGS) $(LTOIR_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ltoir.o test_device_functions.cu
50
+ # Generate LTO-IR in a "raw" LTO-IR container
51
+ python generate_raw_ltoir.py --arch sm_$(GPU_CC) -o $(OUTPUT_DIR)/test_device_functions.ltoir test_device_functions.cu
@@ -0,0 +1,163 @@
1
+ # Copyright (c) 2024, NVIDIA CORPORATION.
2
+
3
+ import argparse
4
+ import pathlib
5
+ import subprocess
6
+ import sys
7
+
8
+ from cuda import nvrtc
9
+
10
+ # Magic number found at the start of an LTO-IR file
11
+ LTOIR_MAGIC = 0x7F4E43ED
12
+
13
+
14
+ def check(args):
15
+ """
16
+ Abort and print an error message in the presence of an error result.
17
+
18
+ Otherwise:
19
+ - Return None if there were no more arguments,
20
+ - Return the singular argument if there was only one further argument,
21
+ - Return the tuple of arguments if multiple followed.
22
+ """
23
+
24
+ result, *args = args
25
+ value = result.value
26
+
27
+ if value:
28
+ error_string = check(nvrtc.nvrtcGetErrorString(result)).decode()
29
+ msg = f"NVRTC error, code {value}: {error_string}"
30
+ print(msg, file=sys.stderr)
31
+ sys.exit(1)
32
+
33
+ if len(args) == 0:
34
+ return None
35
+ elif len(args) == 1:
36
+ return args[0]
37
+ else:
38
+ return args
39
+
40
+
41
+ def determine_include_flags():
42
+ # Inspired by the logic in FindCUDAToolkit.cmake. We need the CUDA include
43
+ # paths because NVRTC doesn't add them by default, and we can compile a
44
+ # much broader set of test files if the CUDA includes are available.
45
+
46
+ # We invoke NVCC in verbose mode ("-v") and give a dummy filename, without
47
+ # which it won't produce output.
48
+
49
+ cmd = ["nvcc", "-v", "__dummy"]
50
+ cp = subprocess.run(cmd, capture_output=True)
51
+
52
+ # Since the dummy file doesn't actually exist, NVCC is expected to exit
53
+ # with an error code of 1.
54
+ rc = cp.returncode
55
+ if rc != 1:
56
+ print(f"Unexpected return code ({rc}) from `nvcc -v`. Expected 1.")
57
+ return None
58
+
59
+ output = cp.stderr.decode()
60
+ lines = output.splitlines()
61
+
62
+ includes_lines = [line for line in lines if line.startswith("#$ INCLUDES=")]
63
+ if len(includes_lines) != 1:
64
+ print(f"Expected exactly one INCLUDES line. Got {len(includes_lines)}.")
65
+ return None
66
+
67
+ # Parse out the arguments following "INCLUDES=" - these are a space
68
+ # separated list of strings that are potentially quoted.
69
+
70
+ quoted_flags = includes_lines[0].split("INCLUDES=")[1].strip().split()
71
+ include_flags = [flag.strip('"') for flag in quoted_flags]
72
+ print(f"Using CUDA include flags: {include_flags}")
73
+
74
+ return include_flags
75
+
76
+
77
+ def get_ltoir(source, name, arch):
78
+ """Given a CUDA C/C++ source, compile it and return the LTO-IR."""
79
+
80
+ program = check(
81
+ nvrtc.nvrtcCreateProgram(source.encode(), name.encode(), 0, [], [])
82
+ )
83
+
84
+ cuda_include_flags = determine_include_flags()
85
+ if cuda_include_flags is None:
86
+ print("Error determining CUDA include flags. Exiting.", file=sys.stderr)
87
+ sys.exit(1)
88
+
89
+ options = [
90
+ f"--gpu-architecture={arch}",
91
+ "-dlto",
92
+ "-rdc",
93
+ "true",
94
+ *cuda_include_flags,
95
+ ]
96
+ options = [o.encode() for o in options]
97
+
98
+ result = nvrtc.nvrtcCompileProgram(program, len(options), options)
99
+
100
+ # Report compilation errors back to the user
101
+ if result[0] == nvrtc.nvrtcResult.NVRTC_ERROR_COMPILATION:
102
+ log_size = check(nvrtc.nvrtcGetProgramLogSize(program))
103
+ log = b" " * log_size
104
+ check(nvrtc.nvrtcGetProgramLog(program, log))
105
+ print("NVRTC compilation error:\n", file=sys.stderr)
106
+ print(log.decode(), file=sys.stderr)
107
+ sys.exit(1)
108
+
109
+ # Handle other errors in the standard way
110
+ check(result)
111
+
112
+ ltoir_size = check(nvrtc.nvrtcGetLTOIRSize(program))
113
+ ltoir = b" " * ltoir_size
114
+ check(nvrtc.nvrtcGetLTOIR(program, ltoir))
115
+
116
+ # Check that the output looks like an LTO-IR container
117
+ header = int.from_bytes(ltoir[:4], byteorder="little")
118
+ if header != LTOIR_MAGIC:
119
+ print(
120
+ f"Unexpected header value 0x{header:X}.\n"
121
+ f"Expected LTO-IR magic number 0x{LTOIR_MAGIC:X}."
122
+ "\nExiting.",
123
+ file=sys.stderr,
124
+ )
125
+ sys.exit(1)
126
+
127
+ return ltoir
128
+
129
+
130
+ def main(sourcepath, outputpath, arch):
131
+ with open(sourcepath) as f:
132
+ source = f.read()
133
+
134
+ name = pathlib.Path(sourcepath).name
135
+ ltoir = get_ltoir(source, name, arch)
136
+
137
+ print(f"Writing {outputpath}...")
138
+
139
+ with open(outputpath, "wb") as f:
140
+ f.write(ltoir)
141
+
142
+
143
+ if __name__ == "__main__":
144
+ description = "Compiles CUDA C/C++ to LTO-IR using NVRTC."
145
+ parser = argparse.ArgumentParser(description=description)
146
+ parser.add_argument("sourcepath", help="path to source file")
147
+ parser.add_argument(
148
+ "-o", "--output", help="path to output file", default=None
149
+ )
150
+ parser.add_argument(
151
+ "-a",
152
+ "--arch",
153
+ help="compute arch to target (e.g. sm_87). " "Defaults to sm_50.",
154
+ default="sm_50",
155
+ )
156
+
157
+ args = parser.parse_args()
158
+ outputpath = args.output
159
+
160
+ if outputpath is None:
161
+ outputpath = pathlib.Path(args.sourcepath).with_suffix(".ltoir")
162
+
163
+ main(args.sourcepath, outputpath, args.arch)
@@ -0,0 +1,19 @@
1
+ #include <cuda_fp16.h>
2
+
3
+ extern __device__ bool __heq(__half arg1, __half arg2);
4
+
5
+ __device__ __half test_add_fp16(__half arg1, __half arg2) {
6
+ return __hadd(arg1, arg2);
7
+ }
8
+
9
+ __device__ bool test_cmp_fp16(__half arg1, __half arg2) {
10
+ return __heq(arg1, arg2);
11
+ }
12
+
13
+ typedef unsigned int uint32_t;
14
+
15
+ extern "C" __device__ int add_from_numba(uint32_t *result, uint32_t a,
16
+ uint32_t b) {
17
+ *result = a + b;
18
+ return 0;
19
+ }
@@ -0,0 +1,3 @@
1
+ extern __device__ float undef(float a, float b);
2
+
3
+ __global__ void f(float *r, float *a, float *b) { r[0] = undef(a[0], b[0]); }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: numba-cuda
3
- Version: 0.0.16
3
+ Version: 0.0.18
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -1,6 +1,6 @@
1
1
  _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
2
2
  _numba_cuda_redirector.py,sha256=rc56rnb40w3AtrqnhS66JSgYTSTsi3iTn8yP3NuoQV8,2401
3
- numba_cuda/VERSION,sha256=MrKpp1z4ZK4wXVG-XDLWh_uokdSUmX_-o7BTj-ugar4,7
3
+ numba_cuda/VERSION,sha256=9p4BNLUELS6P4gQF_geoXDc4ldjt9TTmnJlhGbwWsO0,7
4
4
  numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
5
5
  numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
6
6
  numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
@@ -8,7 +8,7 @@ numba_cuda/numba/cuda/api.py,sha256=shLu7NEZHRMcaZAMEXSoyA5Gi5m0tm6ZRymxKLEKCSg,
8
8
  numba_cuda/numba/cuda/api_util.py,sha256=aQfUV2-4RM_oGVvckMjbMr5e3effOQNX04v1T0O2EfQ,861
9
9
  numba_cuda/numba/cuda/args.py,sha256=HloHkw_PQal2DT-I70Xf_XbnGObS1jiUgcRrQ85Gq28,1978
10
10
  numba_cuda/numba/cuda/cg.py,sha256=9V1uZqyGOJX1aFd9c6GAPbLSqq83lE8LoP-vxxrKENY,1490
11
- numba_cuda/numba/cuda/codegen.py,sha256=raBoCDNt_qkDgB12yU0tbJQlA5_eTlUMemgcRHen1Vk,12174
11
+ numba_cuda/numba/cuda/codegen.py,sha256=9LnTlei-4JK7iq3Rg-H2Y19Oh_u5ZXMC_CPfattANjw,12358
12
12
  numba_cuda/numba/cuda/compiler.py,sha256=47SjuI5p4yWCujAglIq0Cb0ARO8QxRp4fOZropkNMtQ,16001
13
13
  numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=iv84_F6Q9kFjV_kclrQz1msh6Dud8mI3qNkswTid7Qc,953
14
14
  numba_cuda/numba/cuda/cuda_fp16.h,sha256=1IC0mdNdkvKbvAe0-f4uYVS7WFrVqOyI1nRUbBiqr6A,126844
@@ -19,14 +19,14 @@ numba_cuda/numba/cuda/cudaimpl.py,sha256=3YMxQSCv2KClBrpuXGchrTNICV1F6NIjjL2rie5
19
19
  numba_cuda/numba/cuda/cudamath.py,sha256=EFNtdzEytAZuwijdRoFGzVKCeal76UzzaNy7wUFQx8I,3978
20
20
  numba_cuda/numba/cuda/decorators.py,sha256=qSpir16-jPYSe2YuRZ6g9INeobmsMNg6ab9IZpwJocM,7823
21
21
  numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
22
- numba_cuda/numba/cuda/device_init.py,sha256=orQK7anhnmEkYPRjHEs5I9uhdBwaHeXbaSD4ViX2_14,3460
22
+ numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
23
23
  numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
24
- numba_cuda/numba/cuda/dispatcher.py,sha256=glLglJw4D03ZAK1B0N1K93M93yHfn7ZZZm7gLeue6Jk,40190
24
+ numba_cuda/numba/cuda/dispatcher.py,sha256=CwFksBBcjNg9dLSTgC4GgqOy2sLeZYX8mvZvdzscGBw,40206
25
25
  numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
26
26
  numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
27
27
  numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
28
28
  numba_cuda/numba/cuda/intrinsic_wrapper.py,sha256=zbcUbegbfF3GdnC2Rl-z26-gozE8xBtaMxpS8LpOhfo,2239
29
- numba_cuda/numba/cuda/intrinsics.py,sha256=PazoJEYpomsMRZsnXGJWDbCwUM9eJKV16if_AEAz-HY,5961
29
+ numba_cuda/numba/cuda/intrinsics.py,sha256=k0mQYAt0FTlJeghE5V8lSBtO4fgKH1jSRRLwHHcH4M0,6100
30
30
  numba_cuda/numba/cuda/libdevice.py,sha256=476LeIEaAth409m-0OO1SMMmY5AHzN2AotXI__k_yYE,60065
31
31
  numba_cuda/numba/cuda/libdevicedecl.py,sha256=xdZbb_rCaftMf8Pbw63g_Lr230N-1QoaYzBxq8udKTg,532
32
32
  numba_cuda/numba/cuda/libdevicefuncs.py,sha256=c80lGpGoFIYkAdgr4fzbxzdNCyJYrLdss64bwa0Mc6w,37471
@@ -34,11 +34,11 @@ numba_cuda/numba/cuda/libdeviceimpl.py,sha256=a9BmJ5kRtZ_mB7KjbDWW-PEpRuNiO_SMOx
34
34
  numba_cuda/numba/cuda/mathimpl.py,sha256=d_gCoQ4hJzNBFNc2hvRON5h1F052epgQ8zh_RKTlLlI,14416
35
35
  numba_cuda/numba/cuda/models.py,sha256=2c_seT-cWX-VyWYmcapaqOEl1M4FX6_kdIOusj4s5aE,1328
36
36
  numba_cuda/numba/cuda/nvvmutils.py,sha256=W1zr1TpnmFjTkHF0qeu5wnBHub6gzrnpzsvgmu2OLcU,8295
37
- numba_cuda/numba/cuda/printimpl.py,sha256=gyXZ3q0O4yECY3zmv83wIJBSCwVlXBUmRAMRDp7wqlI,3071
37
+ numba_cuda/numba/cuda/printimpl.py,sha256=Y1BCQ7EgO2wQ7O6LibNVYBG3tmjVTvmURATW403rLao,3504
38
38
  numba_cuda/numba/cuda/random.py,sha256=khX8iDdde_RTUPWhAqrxZacHRQAorFr7BokPuxRWzrg,10456
39
39
  numba_cuda/numba/cuda/simulator_init.py,sha256=W_bPRtmPGOQVuiprbgt7ENnnnELv_LPCeLDIsfsvFZ8,460
40
40
  numba_cuda/numba/cuda/stubs.py,sha256=W3tozv4ganMnfbdFqyPjgQXYeX8GQhwx_xXgv8jk6iM,22270
41
- numba_cuda/numba/cuda/target.py,sha256=EI6XuKQeqvng0uSx_V9jDoxbgFivqSz-4jczFzAbs5o,16837
41
+ numba_cuda/numba/cuda/target.py,sha256=LUOJRvGrX7Ch3-vLbZcjti21RAwUctdodVVcl82wYJ0,16954
42
42
  numba_cuda/numba/cuda/testing.py,sha256=E0wP2vfno1yWsl0v1zg31kpbU8FrKxTF-5y9Iv4WjA4,6412
43
43
  numba_cuda/numba/cuda/types.py,sha256=WVfjcly_VUpG9FfKueiEPzZm2NV8Hg0XAFg3bNzPdVc,1314
44
44
  numba_cuda/numba/cuda/ufuncs.py,sha256=txw27IxG80W1Yo7e-XwL2AMcQo0fMnxMjBIMy-n5pCo,23317
@@ -47,12 +47,14 @@ numba_cuda/numba/cuda/vectorizers.py,sha256=u_0EzaD5tqVH8uOz4Gmqn3FgPC1rckwDAQuR
47
47
  numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=0TL4MZcJXUoo9qA7uu0vLv7eHrXRerVmyfi7O149ITw,199
48
48
  numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=B3ItYQywTnwTWjltxVRx6oaKRq7rxTtvOaiqTWsMQ2w,31123
49
49
  numba_cuda/numba/cuda/cudadrv/devices.py,sha256=6SneNmoq83gue0txFWWx4A65vViAa8xA06FzkApoqAk,7992
50
- numba_cuda/numba/cuda/cudadrv/driver.py,sha256=MfNwvOpCzjW1ctL_VZZZgBDIQhH8h0PfN3Vx54JrlJ8,105700
50
+ numba_cuda/numba/cuda/cudadrv/driver.py,sha256=uPjKugdtSJfIwVSAo3KgkvQhctbABkQphHAfcq6Q7ec,110892
51
51
  numba_cuda/numba/cuda/cudadrv/drvapi.py,sha256=52ms3X6hfPaQB8E1jb6g7QKqRvHzBMlDQ-V2DM1rXxQ,17178
52
52
  numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=nXRngdr-k3h_BNGQuJUxmp89yGNWxqEDJedpwDPEZ44,14209
53
- numba_cuda/numba/cuda/cudadrv/enums.py,sha256=E0lnh17jO4EvZ_hSIq3ZtfsE5bObmINtKb_lbK7rmMg,23708
53
+ numba_cuda/numba/cuda/cudadrv/enums.py,sha256=37zZmyrLvT-7R8wWtwKJkQhN8siLMxsDGiA3_NQ-yx8,23740
54
54
  numba_cuda/numba/cuda/cudadrv/error.py,sha256=zEIryW6aIy8GG4ypmTliB6RgY4Gy2n8ckz7I6W99LUM,524
55
55
  numba_cuda/numba/cuda/cudadrv/libs.py,sha256=PRyxal4bz9jVZmuLpKiYw-VaR59LekfwJgWKo7R5uRY,6005
56
+ numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=Q_YTv0apBo9t8pkMlKrthPPSVeLd376ZTmVDF5NtVVo,1328
57
+ numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=-dTPHvAkDjdH6vS5OjgrB71AFuqKO6CRgf7hpOk2wiw,802
56
58
  numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
57
59
  numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=CLpuD9VzPcYoXj8dZ2meSoqbWXHOOC5V5D6dFNdXqmg,9693
58
60
  numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=v2hJJTAQeRmoG59-hnhgMEp5BSVA73QHtEoy636VKao,24107
@@ -100,6 +102,7 @@ numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py,sha256=0KPe4E9wOZsSV_0QI0Lmj
100
102
  numba_cuda/numba/cuda/tests/cudadrv/test_linker.py,sha256=_l2_EQEko2Jet5ooj4XMT0L4BjOuqLjbONGj1_MVI50,10161
101
103
  numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py,sha256=kYXYMkx_3GPAITKp4reLeM8KSzKkpxiC8nxnBvXpaTA,4979
102
104
  numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py,sha256=984jATSa01SRoSrVqxPeO6ujJ7w2jsnZa39ABInFLVI,1529
105
+ numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=m5zv6K6PHLnm-AqHKo5x9f_ZBrn3rmvPX_ZGjjrkPfI,6807
103
106
  numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py,sha256=DF7KV5uh-yMztks0f47NhpalV64dvsNy-f8HY6GhAhE,7373
104
107
  numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py,sha256=u_TthSS2N-2J4eBIuF4PGg33AjD-wxly7MKpz0vRAKc,944
105
108
  numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py,sha256=MQWZx1j3lbEpWmIpQ1bV9szrGOV3VHN0QrEnJRjAhW4,508
@@ -151,7 +154,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py,sha256=Uhe8Q0u42jySrpwA
151
154
  numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py,sha256=luDtBxFS_5ZbVemXe1Z7gfqMliaU_EAOR4SuLsU5rhw,2677
152
155
  numba_cuda/numba/cuda/tests/cudapy/test_idiv.py,sha256=HLJ_f2lX8m_NNJjUbl_8zZ0-8GsBlRdBP2CUo_yWb0Y,1056
153
156
  numba_cuda/numba/cuda/tests/cudapy/test_inspect.py,sha256=lP9-8SbWFn2Xc-qmF6UNhcY6LreKTnveaK5CGW2pu8E,5196
154
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=e6lABWy8YBgYheYYGfD75_y8vMbPP71GHb95A4hlLmA,34931
157
+ numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=M6-pad8nVM0fuL18uFxvE6tmHw0spLNhnMBLVlO0FKU,36400
155
158
  numba_cuda/numba/cuda/tests/cudapy/test_ipc.py,sha256=fggyy-kmsOkCb906_q3kXPGRziccWu7Co7ir83zBMwM,10536
156
159
  numba_cuda/numba/cuda/tests/cudapy/test_iterators.py,sha256=daQW3kSkp7icCmlTn9pCvnaauz60k_eBf4x1UQF-XVY,2344
157
160
  numba_cuda/numba/cuda/tests/cudapy/test_lang.py,sha256=U1BCVZMjU1AZ4wDSmjsRIPPcAReiq4dB77Cz7GmrdmA,1691
@@ -172,7 +175,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_operator.py,sha256=0nJej4D898_JU-jhlif44
172
175
  numba_cuda/numba/cuda/tests/cudapy/test_optimization.py,sha256=SvqRsSFgcGxkFDZS-kul5B-mi8GxINTS98uUzAy4dhw,2647
173
176
  numba_cuda/numba/cuda/tests/cudapy/test_overload.py,sha256=u4yUDVFcV9E3NWMlNjM81e3IW4KaIkcDtXig8JYevsw,8538
174
177
  numba_cuda/numba/cuda/tests/cudapy/test_powi.py,sha256=TI82rYRnkSnwv9VN6PMpBnr9JqMJ_F3HhH4cKY6O8tw,3276
175
- numba_cuda/numba/cuda/tests/cudapy/test_print.py,sha256=-hYmtwvVUjk6raNGHSP_qHAqVK7xbip8eCbYo1AwQU0,4070
178
+ numba_cuda/numba/cuda/tests/cudapy/test_print.py,sha256=QXhhhnEz1d5BlldLINQVnmuHeM_dT3aLvfGS7jm24nE,4451
176
179
  numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py,sha256=R88Vfgg3mSAZ0Jy6WT6dJNmkFTsxnVnEmO7XqpqyxuU,986
177
180
  numba_cuda/numba/cuda/tests/cudapy/test_random.py,sha256=rLw7_8a7BBhD_8GNqMal0l_AbWXzLs_Q0hC6_X8gdjA,3467
178
181
  numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py,sha256=grR64kdRlsLcR0K3IxSfI2VKsTrrqxsXuROOpvj-6nw,18769
@@ -224,8 +227,12 @@ numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py,sha256=o4DYocyHK7
224
227
  numba_cuda/numba/cuda/tests/nocuda/test_import.py,sha256=teiL8rpFGQOh41kyBSSNHHFYAJYgpdStXkTcpK4_fxo,1641
225
228
  numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py,sha256=7kJOPHEcrjy_kTA9Ym-iT_B972bgFRu3UkRtwIgWtuI,7948
226
229
  numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=n0_-xFaw6QqiZbhe55oy7lnEeOwqTvA55p5EUFiTpNw,2006
227
- numba_cuda-0.0.16.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
228
- numba_cuda-0.0.16.dist-info/METADATA,sha256=n01sYKdJ2lX4fsQ8MDAixZnIl6D69fQFkUboKBvC5OY,1393
229
- numba_cuda-0.0.16.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
230
- numba_cuda-0.0.16.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
231
- numba_cuda-0.0.16.dist-info/RECORD,,
230
+ numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=OFC_6irwscCNGAyJJKq7fTchzWosCUuiVWU02m0bcUQ,2248
231
+ numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=OqqmFhDk3c0Edt4AvAGm0MQRCXb9jLSO2wpQ72oiXXI,4838
232
+ numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
233
+ numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
234
+ numba_cuda-0.0.18.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
235
+ numba_cuda-0.0.18.dist-info/METADATA,sha256=kJletXn1FHyLocorf4n5QLO1TH0v6G_8uNkbqBAwiWY,1393
236
+ numba_cuda-0.0.18.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
237
+ numba_cuda-0.0.18.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
238
+ numba_cuda-0.0.18.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5