numba-cuda 0.0.20__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
_numba_cuda_redirector.py CHANGED
@@ -67,9 +67,15 @@ class NumbaCudaFinder(importlib.abc.MetaPathFinder):
67
67
  oot_path = [p.replace(self.numba_path, self.numba_cuda_path)
68
68
  for p in path]
69
69
  for finder in sys.meta_path:
70
- spec = finder.find_spec(name, oot_path, target)
71
- if spec is not None:
72
- return spec
70
+ try:
71
+ spec = finder.find_spec(name, oot_path, target)
72
+ except AttributeError:
73
+ # Finders written to a pre-Python 3.4 spec for finders will
74
+ # not implement find_spec. We can skip those altogether.
75
+ continue
76
+ else:
77
+ if spec is not None:
78
+ return spec
73
79
 
74
80
 
75
81
  finder = NumbaCudaFinder()
numba_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.20
1
+ 0.1.0
@@ -9,7 +9,6 @@ import os
9
9
  import subprocess
10
10
  import tempfile
11
11
 
12
-
13
12
  CUDA_TRIPLE = 'nvptx64-nvidia-cuda'
14
13
 
15
14
 
@@ -181,17 +180,7 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
181
180
 
182
181
  return ltoir
183
182
 
184
- def get_cubin(self, cc=None):
185
- cc = self._ensure_cc(cc)
186
-
187
- cubin = self._cubin_cache.get(cc, None)
188
- if cubin:
189
- return cubin
190
-
191
- linker = driver.Linker.new(
192
- max_registers=self._max_registers, cc=cc, lto=self._lto
193
- )
194
-
183
+ def _link_all(self, linker, cc, ignore_nonlto=False):
195
184
  if linker.lto:
196
185
  ltoir = self.get_ltoir(cc=cc)
197
186
  linker.add_ltoir(ltoir)
@@ -200,11 +189,44 @@ class CUDACodeLibrary(serialize.ReduceMixin, CodeLibrary):
200
189
  linker.add_ptx(ptx.encode())
201
190
 
202
191
  for path in self._linking_files:
203
- linker.add_file_guess_ext(path)
192
+ linker.add_file_guess_ext(path, ignore_nonlto)
204
193
  if self.needs_cudadevrt:
205
- linker.add_file_guess_ext(get_cudalib('cudadevrt', static=True))
194
+ linker.add_file_guess_ext(
195
+ get_cudalib('cudadevrt', static=True), ignore_nonlto
196
+ )
197
+
198
+ def get_cubin(self, cc=None):
199
+ cc = self._ensure_cc(cc)
206
200
 
201
+ cubin = self._cubin_cache.get(cc, None)
202
+ if cubin:
203
+ return cubin
204
+
205
+ if self._lto and config.DUMP_ASSEMBLY:
206
+ linker = driver.Linker.new(
207
+ max_registers=self._max_registers,
208
+ cc=cc,
209
+ additional_flags=["-ptx"],
210
+ lto=self._lto
211
+ )
212
+ # `-ptx` flag is meant to view the optimized PTX for LTO objects.
213
+ # Non-LTO objects are not passed to linker.
214
+ self._link_all(linker, cc, ignore_nonlto=True)
215
+
216
+ ptx = linker.get_linked_ptx().decode('utf-8')
217
+
218
+ print(("ASSEMBLY (AFTER LTO) %s" % self._name).center(80, '-'))
219
+ print(ptx)
220
+ print('=' * 80)
221
+
222
+ linker = driver.Linker.new(
223
+ max_registers=self._max_registers,
224
+ cc=cc,
225
+ lto=self._lto
226
+ )
227
+ self._link_all(linker, cc, ignore_nonlto=False)
207
228
  cubin = linker.complete()
229
+
208
230
  self._cubin_cache[cc] = cubin
209
231
  self._linkerinfo_cache[cc] = linker.info_log
210
232
 
@@ -206,6 +206,7 @@ def compile_cuda(pyfunc, return_type, args, debug=False, lineinfo=False,
206
206
 
207
207
  if debug:
208
208
  flags.error_model = 'python'
209
+ flags.dbg_extend_lifetimes = True
209
210
  else:
210
211
  flags.error_model = 'numpy'
211
212
 
@@ -497,6 +498,9 @@ def compile(pyfunc, sig, debug=None, lineinfo=False, device=True,
497
498
  'opt': 3 if opt else 0
498
499
  }
499
500
 
501
+ if debug:
502
+ nvvm_options['g'] = None
503
+
500
504
  if lto:
501
505
  nvvm_options['gen-lto'] = None
502
506
 
@@ -21,6 +21,9 @@ import threading
21
21
  import traceback
22
22
  import asyncio
23
23
  import pathlib
24
+ import subprocess
25
+ import tempfile
26
+ import re
24
27
  from itertools import product
25
28
  from abc import ABCMeta, abstractmethod
26
29
  from ctypes import (c_int, byref, c_size_t, c_char, c_char_p, addressof,
@@ -36,7 +39,7 @@ from .error import CudaSupportError, CudaDriverError
36
39
  from .drvapi import API_PROTOTYPES
37
40
  from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
38
41
  from .mappings import FILE_EXTENSION_MAP
39
- from .linkable_code import LinkableCode
42
+ from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
40
43
  from numba.cuda.cudadrv import enums, drvapi, nvrtc
41
44
 
42
45
  USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
@@ -2683,12 +2686,18 @@ class Linker(metaclass=ABCMeta):
2683
2686
  cu = f.read()
2684
2687
  self.add_cu(cu, os.path.basename(path))
2685
2688
 
2686
- def add_file_guess_ext(self, path_or_code):
2689
+ def add_file_guess_ext(self, path_or_code, ignore_nonlto=False):
2687
2690
  """
2688
2691
  Add a file or LinkableCode object to the link. If a file is
2689
2692
  passed, the type will be inferred from the extension. A LinkableCode
2690
2693
  object represents a file already in memory.
2694
+
2695
+ When `ignore_nonlto` is set to true, do not add code that will not
2696
+ be LTO-ed in the linking process. This is useful in inspecting the
2697
+ LTO-ed portion of the PTX when linker is added with objects that can be
2698
+ both LTO-ed and not LTO-ed.
2691
2699
  """
2700
+
2692
2701
  if isinstance(path_or_code, str):
2693
2702
  ext = pathlib.Path(path_or_code).suffix
2694
2703
  if ext == '':
@@ -2704,6 +2713,26 @@ class Linker(metaclass=ABCMeta):
2704
2713
  "Don't know how to link file with extension "
2705
2714
  f"{ext}"
2706
2715
  )
2716
+
2717
+ if ignore_nonlto:
2718
+ warn_and_return = False
2719
+ if kind in (
2720
+ FILE_EXTENSION_MAP["fatbin"], FILE_EXTENSION_MAP["o"]
2721
+ ):
2722
+ entry_types = inspect_obj_content(path_or_code)
2723
+ if "nvvm" not in entry_types:
2724
+ warn_and_return = True
2725
+ elif kind != FILE_EXTENSION_MAP["ltoir"]:
2726
+ warn_and_return = True
2727
+
2728
+ if warn_and_return:
2729
+ warnings.warn(
2730
+ f"Not adding {path_or_code} as it is not "
2731
+ "optimizable at link time, and `ignore_nonlto == "
2732
+ "True`."
2733
+ )
2734
+ return
2735
+
2707
2736
  self.add_file(path_or_code, kind)
2708
2737
  return
2709
2738
  else:
@@ -2716,6 +2745,25 @@ class Linker(metaclass=ABCMeta):
2716
2745
  if path_or_code.kind == "cu":
2717
2746
  self.add_cu(path_or_code.data, path_or_code.name)
2718
2747
  else:
2748
+ if ignore_nonlto:
2749
+ warn_and_return = False
2750
+ if isinstance(path_or_code, (Fatbin, Object)):
2751
+ with tempfile.NamedTemporaryFile("w") as fp:
2752
+ fp.write(path_or_code.data)
2753
+ entry_types = inspect_obj_content(fp.name)
2754
+ if "nvvm" not in entry_types:
2755
+ warn_and_return = True
2756
+ elif not isinstance(path_or_code, LTOIR):
2757
+ warn_and_return = True
2758
+
2759
+ if warn_and_return:
2760
+ warnings.warn(
2761
+ f"Not adding {path_or_code.name} as it is not "
2762
+ "optimizable at link time, and `ignore_nonlto == "
2763
+ "True`."
2764
+ )
2765
+ return
2766
+
2719
2767
  self.add_data(
2720
2768
  path_or_code.data, path_or_code.kind, path_or_code.name
2721
2769
  )
@@ -3065,6 +3113,28 @@ class PyNvJitLinker(Linker):
3065
3113
  name = pathlib.Path(path).name
3066
3114
  self.add_data(data, kind, name)
3067
3115
 
3116
+ def add_cu(self, cu, name):
3117
+ """Add CUDA source in a string to the link. The name of the source
3118
+ file should be specified in `name`."""
3119
+ with driver.get_active_context() as ac:
3120
+ dev = driver.get_device(ac.devnum)
3121
+ cc = dev.compute_capability
3122
+
3123
+ program, log = nvrtc.compile(cu, name, cc, ltoir=self.lto)
3124
+
3125
+ if not self.lto and config.DUMP_ASSEMBLY:
3126
+ print(("ASSEMBLY %s" % name).center(80, "-"))
3127
+ print(program)
3128
+ print("=" * 80)
3129
+
3130
+ suffix = ".ltoir" if self.lto else ".ptx"
3131
+ program_name = os.path.splitext(name)[0] + suffix
3132
+ # Link the program's PTX or LTOIR using the normal linker mechanism
3133
+ if self.lto:
3134
+ self.add_ltoir(program, program_name)
3135
+ else:
3136
+ self.add_ptx(program.encode(), program_name)
3137
+
3068
3138
  def add_data(self, data, kind, name):
3069
3139
  if kind == FILE_EXTENSION_MAP["cubin"]:
3070
3140
  fn = self._linker.add_cubin
@@ -3086,6 +3156,12 @@ class PyNvJitLinker(Linker):
3086
3156
  except NvJitLinkError as e:
3087
3157
  raise LinkerError from e
3088
3158
 
3159
+ def get_linked_ptx(self):
3160
+ try:
3161
+ return self._linker.get_linked_ptx()
3162
+ except NvJitLinkError as e:
3163
+ raise LinkerError from e
3164
+
3089
3165
  def complete(self):
3090
3166
  try:
3091
3167
  return self._linker.get_linked_cubin()
@@ -3361,3 +3437,28 @@ def get_version():
3361
3437
  Return the driver version as a tuple of (major, minor)
3362
3438
  """
3363
3439
  return driver.get_version()
3440
+
3441
+
3442
+ def inspect_obj_content(objpath: str):
3443
+ """
3444
+ Given path to a fatbin or object, use `cuobjdump` to examine its content
3445
+ Return the set of entries in the object.
3446
+ """
3447
+ code_types :set[str] = set()
3448
+
3449
+ try:
3450
+ out = subprocess.run(["cuobjdump", objpath], check=True,
3451
+ capture_output=True)
3452
+ except FileNotFoundError as e:
3453
+ msg = ("cuobjdump has not been found. You may need "
3454
+ "to install the CUDA toolkit and ensure that "
3455
+ "it is available on your PATH.\n")
3456
+ raise RuntimeError(msg) from e
3457
+
3458
+ objtable = out.stdout.decode('utf-8')
3459
+ entry_pattern = r"Fatbin (.*) code"
3460
+ for line in objtable.split("\n"):
3461
+ if match := re.match(entry_pattern, line):
3462
+ code_types.add(match.group(1))
3463
+
3464
+ return code_types
@@ -61,6 +61,14 @@ class NVRTC:
61
61
  NVVM interface. Initialization is protected by a lock and uses the standard
62
62
  (for Numba) open_cudalib function to load the NVRTC library.
63
63
  """
64
+
65
+ _CU12ONLY_PROTOTYPES = {
66
+ # nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *ltoSizeRet);
67
+ "nvrtcGetLTOIRSize": (nvrtc_result, nvrtc_program, POINTER(c_size_t)),
68
+ # nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *lto);
69
+ "nvrtcGetLTOIR": (nvrtc_result, nvrtc_program, c_char_p)
70
+ }
71
+
64
72
  _PROTOTYPES = {
65
73
  # nvrtcResult nvrtcVersion(int *major, int *minor)
66
74
  'nvrtcVersion': (nvrtc_result, POINTER(c_int), POINTER(c_int)),
@@ -110,6 +118,10 @@ class NVRTC:
110
118
  cls.__INSTANCE = None
111
119
  raise NvrtcSupportError("NVRTC cannot be loaded") from e
112
120
 
121
+ from numba.cuda.cudadrv.runtime import get_version
122
+ if get_version() >= (12, 0):
123
+ inst._PROTOTYPES |= inst._CU12ONLY_PROTOTYPES
124
+
113
125
  # Find & populate functions
114
126
  for name, proto in inst._PROTOTYPES.items():
115
127
  func = getattr(lib, name)
@@ -208,10 +220,22 @@ class NVRTC:
208
220
 
209
221
  return ptx.value.decode()
210
222
 
223
+ def get_lto(self, program):
224
+ """
225
+ Get the compiled LTOIR as a Python bytes object.
226
+ """
227
+ lto_size = c_size_t()
228
+ self.nvrtcGetLTOIRSize(program.handle, byref(lto_size))
229
+
230
+ lto = b" " * lto_size.value
231
+ self.nvrtcGetLTOIR(program.handle, lto)
232
+
233
+ return lto
211
234
 
212
- def compile(src, name, cc):
235
+
236
+ def compile(src, name, cc, ltoir=False):
213
237
  """
214
- Compile a CUDA C/C++ source to PTX for a given compute capability.
238
+ Compile a CUDA C/C++ source to PTX or LTOIR for a given compute capability.
215
239
 
216
240
  :param src: The source code to compile
217
241
  :type src: str
@@ -219,6 +243,8 @@ def compile(src, name, cc):
219
243
  :type name: str
220
244
  :param cc: A tuple ``(major, minor)`` of the compute capability
221
245
  :type cc: tuple
246
+ :param ltoir: Compile into LTOIR if True, otherwise into PTX
247
+ :type ltoir: bool
222
248
  :return: The compiled PTX and compilation log
223
249
  :rtype: tuple
224
250
  """
@@ -242,6 +268,9 @@ def compile(src, name, cc):
242
268
  numba_include = f'-I{numba_cuda_path}'
243
269
  options = [arch, *cuda_include, numba_include, '-rdc', 'true']
244
270
 
271
+ if ltoir:
272
+ options.append("-dlto")
273
+
245
274
  if nvrtc.get_version() < (12, 0):
246
275
  options += ["-std=c++17"]
247
276
 
@@ -261,5 +290,9 @@ def compile(src, name, cc):
261
290
  msg = (f"NVRTC log messages whilst compiling {name}:\n\n{log}")
262
291
  warnings.warn(msg)
263
292
 
264
- ptx = nvrtc.get_ptx(program)
265
- return ptx, log
293
+ if ltoir:
294
+ ltoir = nvrtc.get_lto(program)
295
+ return ltoir, log
296
+ else:
297
+ ptx = nvrtc.get_ptx(program)
298
+ return ptx, log
@@ -95,6 +95,9 @@ class _Kernel(serialize.ReduceMixin):
95
95
  'opt': 3 if opt else 0
96
96
  }
97
97
 
98
+ if debug:
99
+ nvvm_options['g'] = None
100
+
98
101
  cc = get_current_device().compute_capability
99
102
  cres = compile_cuda(self.py_func, types.void, self.argtypes,
100
103
  debug=self.debug,
@@ -918,6 +921,9 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
918
921
  'fastmath': fastmath
919
922
  }
920
923
 
924
+ if debug:
925
+ nvvm_options['g'] = None
926
+
921
927
  cc = get_current_device().compute_capability
922
928
  cres = compile_cuda(self.py_func, return_type, args,
923
929
  debug=debug,
@@ -5,6 +5,10 @@ from numba.cuda.cudadrv.driver import PyNvJitLinker
5
5
 
6
6
  import itertools
7
7
  import os
8
+ import io
9
+ import contextlib
10
+ import warnings
11
+
8
12
  from numba.cuda import get_current_device
9
13
  from numba import cuda
10
14
  from numba import config
@@ -23,6 +27,9 @@ if TEST_BIN_DIR:
23
27
  test_device_functions_fatbin = os.path.join(
24
28
  TEST_BIN_DIR, "test_device_functions.fatbin"
25
29
  )
30
+ test_device_functions_fatbin_multi = os.path.join(
31
+ TEST_BIN_DIR, "test_device_functions_multi.fatbin"
32
+ )
26
33
  test_device_functions_o = os.path.join(
27
34
  TEST_BIN_DIR, "test_device_functions.o"
28
35
  )
@@ -156,32 +163,81 @@ class TestLinker(CUDATestCase):
156
163
  test_device_functions_o,
157
164
  test_device_functions_ptx,
158
165
  )
166
+ for lto in [True, False]:
167
+ for file in files:
168
+ with self.subTest(file=file):
169
+ sig = "uint32(uint32, uint32)"
170
+ add_from_numba = cuda.declare_device("add_from_numba", sig)
171
+
172
+ @cuda.jit(link=[file], lto=lto)
173
+ def kernel(result):
174
+ result[0] = add_from_numba(1, 2)
175
+
176
+ result = cuda.device_array(1)
177
+ kernel[1, 1](result)
178
+ assert result[0] == 3
179
+
180
+ def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly(self):
181
+ files = [
182
+ test_device_functions_cu,
183
+ test_device_functions_ltoir,
184
+ test_device_functions_fatbin_multi
185
+ ]
186
+
187
+ config.DUMP_ASSEMBLY = True
188
+
159
189
  for file in files:
160
190
  with self.subTest(file=file):
161
- sig = "uint32(uint32, uint32)"
162
- add_from_numba = cuda.declare_device("add_from_numba", sig)
191
+ f = io.StringIO()
192
+ with contextlib.redirect_stdout(f):
193
+ sig = "uint32(uint32, uint32)"
194
+ add_from_numba = cuda.declare_device("add_from_numba", sig)
163
195
 
164
- @cuda.jit(link=[file])
165
- def kernel(result):
166
- result[0] = add_from_numba(1, 2)
196
+ @cuda.jit(link=[file], lto=True)
197
+ def kernel(result):
198
+ result[0] = add_from_numba(1, 2)
167
199
 
168
- result = cuda.device_array(1)
169
- kernel[1, 1](result)
170
- assert result[0] == 3
200
+ result = cuda.device_array(1)
201
+ kernel[1, 1](result)
202
+ assert result[0] == 3
171
203
 
172
- def test_nvjitlink_jit_with_linkable_code_lto(self):
173
- file = test_device_functions_ltoir
204
+ self.assertTrue("ASSEMBLY (AFTER LTO)" in f.getvalue())
174
205
 
175
- sig = "uint32(uint32, uint32)"
176
- add_from_numba = cuda.declare_device("add_from_numba", sig)
206
+ config.DUMP_ASSEMBLY = False
177
207
 
178
- @cuda.jit(link=[file], lto=True)
179
- def kernel(result):
180
- result[0] = add_from_numba(1, 2)
208
+ def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn(self):
209
+ files = [
210
+ test_device_functions_a,
211
+ test_device_functions_cubin,
212
+ test_device_functions_fatbin,
213
+ test_device_functions_o,
214
+ test_device_functions_ptx,
215
+ ]
181
216
 
182
- result = cuda.device_array(1)
183
- kernel[1, 1](result)
184
- assert result[0] == 3
217
+ config.DUMP_ASSEMBLY = True
218
+
219
+ for file in files:
220
+ with self.subTest(file=file):
221
+ with warnings.catch_warnings(record=True) as w:
222
+ with contextlib.redirect_stdout(None): # suppress other PTX
223
+ sig = "uint32(uint32, uint32)"
224
+ add_from_numba = cuda.declare_device(
225
+ "add_from_numba", sig
226
+ )
227
+
228
+ @cuda.jit(link=[file], lto=True)
229
+ def kernel(result):
230
+ result[0] = add_from_numba(1, 2)
231
+
232
+ result = cuda.device_array(1)
233
+ kernel[1, 1](result)
234
+ assert result[0] == 3
235
+
236
+ assert len(w) == 1
237
+ self.assertIn("it is not optimizable at link time, and "
238
+ "`ignore_nonlto == True`", str(w[0].message))
239
+
240
+ config.DUMP_ASSEMBLY = False
185
241
 
186
242
  def test_nvjitlink_jit_with_invalid_linkable_code(self):
187
243
  with open(test_device_functions_cubin, "rb") as f:
@@ -14,9 +14,14 @@ endif
14
14
  # Gencode flags suitable for most tests
15
15
  GENCODE := -gencode arch=compute_$(GPU_CC),code=sm_$(GPU_CC)
16
16
 
17
+ MULTI_GENCODE := -gencode arch=compute_$(GPU_CC),code=[sm_$(GPU_CC),lto_$(GPU_CC)]
18
+
17
19
  # Fatbin tests need to generate code for an additional compute capability
18
20
  FATBIN_GENCODE := $(GENCODE) -gencode arch=compute_$(ALT_CC),code=sm_$(ALT_CC)
19
21
 
22
+ # Fatbin that contains both LTO, SASS for multiple architectures
23
+ MULTI_FATBIN_GENCODE := $(MULTI_GENCODE) -gencode arch=compute_$(ALT_CC),code=[sm_$(ALT_CC),lto_$(ALT_CC)]
24
+
20
25
  # LTO-IR tests need to generate for the LTO "architecture" instead
21
26
  LTOIR_GENCODE := -gencode arch=lto_$(GPU_CC),code=lto_$(GPU_CC)
22
27
 
@@ -30,6 +35,7 @@ PTX_FLAGS := $(GENCODE) -ptx
30
35
  OBJECT_FLAGS := $(GENCODE) -dc
31
36
  LIBRARY_FLAGS := $(GENCODE) -lib
32
37
  FATBIN_FLAGS := $(FATBIN_GENCODE) --fatbin
38
+ MULTI_FATBIN_FLAGS := $(MULTI_FATBIN_GENCODE) --fatbin
33
39
  LTOIR_FLAGS := $(LTOIR_GENCODE) -dc
34
40
 
35
41
  OUTPUT_DIR := ./
@@ -41,6 +47,7 @@ all:
41
47
  nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/undefined_extern.cubin undefined_extern.cu
42
48
  nvcc $(NVCC_FLAGS) $(CUBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.cubin test_device_functions.cu
43
49
  nvcc $(NVCC_FLAGS) $(FATBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.fatbin test_device_functions.cu
50
+ nvcc $(NVCC_FLAGS) $(MULTI_FATBIN_FLAGS) -o $(OUTPUT_DIR)/test_device_functions_multi.fatbin test_device_functions.cu
44
51
  nvcc $(NVCC_FLAGS) $(PTX_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.ptx test_device_functions.cu
45
52
  nvcc $(NVCC_FLAGS) $(OBJECT_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.o test_device_functions.cu
46
53
  nvcc $(NVCC_FLAGS) $(LIBRARY_FLAGS) -o $(OUTPUT_DIR)/test_device_functions.a test_device_functions.cu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: numba-cuda
3
- Version: 0.0.20
3
+ Version: 0.1.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -1,6 +1,6 @@
1
1
  _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
2
- _numba_cuda_redirector.py,sha256=rc56rnb40w3AtrqnhS66JSgYTSTsi3iTn8yP3NuoQV8,2401
3
- numba_cuda/VERSION,sha256=cPZ8vtzWinVd7EOHmw_3zZqy5pfWSDm-eY6LV2Q5TPI,7
2
+ _numba_cuda_redirector.py,sha256=QKJmYICSQvjvph0Zw9OW015MsuKxIF28GPFjR35AXLM,2681
3
+ numba_cuda/VERSION,sha256=6d2FB_S_DG9CRY5BrqgzrQvT9hJycjNe7pv01YVB7Wc,6
4
4
  numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
5
5
  numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
6
6
  numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
@@ -8,8 +8,8 @@ numba_cuda/numba/cuda/api.py,sha256=shLu7NEZHRMcaZAMEXSoyA5Gi5m0tm6ZRymxKLEKCSg,
8
8
  numba_cuda/numba/cuda/api_util.py,sha256=aQfUV2-4RM_oGVvckMjbMr5e3effOQNX04v1T0O2EfQ,861
9
9
  numba_cuda/numba/cuda/args.py,sha256=HloHkw_PQal2DT-I70Xf_XbnGObS1jiUgcRrQ85Gq28,1978
10
10
  numba_cuda/numba/cuda/cg.py,sha256=9V1uZqyGOJX1aFd9c6GAPbLSqq83lE8LoP-vxxrKENY,1490
11
- numba_cuda/numba/cuda/codegen.py,sha256=9LnTlei-4JK7iq3Rg-H2Y19Oh_u5ZXMC_CPfattANjw,12358
12
- numba_cuda/numba/cuda/compiler.py,sha256=XQHzUCuXl6WCtWWxv1X3Y9ebcVQVJEkzOuckNwKa4Gg,21249
11
+ numba_cuda/numba/cuda/codegen.py,sha256=ghdYBKZ3Mzk2UlLE64HkrAjb60PN9fibSNkWFRQuj4M,13184
12
+ numba_cuda/numba/cuda/compiler.py,sha256=_0qfSjnLnF29B-t8NQRJt4FBUIKxZJE6xN47_G7oRio,21339
13
13
  numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=iv84_F6Q9kFjV_kclrQz1msh6Dud8mI3qNkswTid7Qc,953
14
14
  numba_cuda/numba/cuda/cuda_fp16.h,sha256=1IC0mdNdkvKbvAe0-f4uYVS7WFrVqOyI1nRUbBiqr6A,126844
15
15
  numba_cuda/numba/cuda/cuda_fp16.hpp,sha256=vJ7NUr2X2tKhAP7ojydAiCoOjVO6n4QGoXD6m9Srrlw,89130
@@ -21,7 +21,7 @@ numba_cuda/numba/cuda/decorators.py,sha256=qSpir16-jPYSe2YuRZ6g9INeobmsMNg6ab9IZ
21
21
  numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
22
22
  numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
23
23
  numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
24
- numba_cuda/numba/cuda/dispatcher.py,sha256=JuUr0-6xQtDkyaZv7CirWaU5_sSNX4BKCTDgQG5c1xc,41116
24
+ numba_cuda/numba/cuda/dispatcher.py,sha256=nDfPCzxJ7UwA4uiz-fsMMgQb2WXByvzHLtkLMXW9JXk,41244
25
25
  numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
26
26
  numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
27
27
  numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
@@ -47,7 +47,7 @@ numba_cuda/numba/cuda/vectorizers.py,sha256=u_0EzaD5tqVH8uOz4Gmqn3FgPC1rckwDAQuR
47
47
  numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=0TL4MZcJXUoo9qA7uu0vLv7eHrXRerVmyfi7O149ITw,199
48
48
  numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=06kM7iFcx1TYiFhs1o9r1kyoA3k5yS7mFAdZDf6nrxA,31215
49
49
  numba_cuda/numba/cuda/cudadrv/devices.py,sha256=6SneNmoq83gue0txFWWx4A65vViAa8xA06FzkApoqAk,7992
50
- numba_cuda/numba/cuda/cudadrv/driver.py,sha256=uPjKugdtSJfIwVSAo3KgkvQhctbABkQphHAfcq6Q7ec,110892
50
+ numba_cuda/numba/cuda/cudadrv/driver.py,sha256=bjlGcJvyjwMjRCNkNqmBIAA0HO_fzbrW2afXsp-YiCg,114794
51
51
  numba_cuda/numba/cuda/cudadrv/drvapi.py,sha256=52ms3X6hfPaQB8E1jb6g7QKqRvHzBMlDQ-V2DM1rXxQ,17178
52
52
  numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=nXRngdr-k3h_BNGQuJUxmp89yGNWxqEDJedpwDPEZ44,14209
53
53
  numba_cuda/numba/cuda/cudadrv/enums.py,sha256=Wy5dzukTk4TnWCowg_PLceET_v2xEyiWLu9TyH8pXr8,23742
@@ -56,7 +56,7 @@ numba_cuda/numba/cuda/cudadrv/libs.py,sha256=Gk9zQ1CKcsZsWl-_9QneXeP9VH5q5R1I3Cx
56
56
  numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=Q_YTv0apBo9t8pkMlKrthPPSVeLd376ZTmVDF5NtVVo,1328
57
57
  numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=-dTPHvAkDjdH6vS5OjgrB71AFuqKO6CRgf7hpOk2wiw,802
58
58
  numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
59
- numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=rv-XQo0snJj4xyEbfeBqivziIxCwMOQzIIEOnvLQaJI,9825
59
+ numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=RR096Ic2_Zu96C-GGh8x8WTOyxnmDkwtcwag8a_npkQ,10898
60
60
  numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=v2hJJTAQeRmoG59-hnhgMEp5BSVA73QHtEoy636VKao,24107
61
61
  numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=WdeUoWzsYNYodx8kMRLVIjnNs0QzwpCihd2Q0AaqItE,226
62
62
  numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=Tj9ACrzQqNmDSO6xfpzw12EsQknSywQ-ZGuWMbDdHnQ,4255
@@ -103,7 +103,7 @@ numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py,sha256=0KPe4E9wOZsSV_0QI0Lmj
103
103
  numba_cuda/numba/cuda/tests/cudadrv/test_linker.py,sha256=_l2_EQEko2Jet5ooj4XMT0L4BjOuqLjbONGj1_MVI50,10161
104
104
  numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py,sha256=kYXYMkx_3GPAITKp4reLeM8KSzKkpxiC8nxnBvXpaTA,4979
105
105
  numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py,sha256=984jATSa01SRoSrVqxPeO6ujJ7w2jsnZa39ABInFLVI,1529
106
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=m5zv6K6PHLnm-AqHKo5x9f_ZBrn3rmvPX_ZGjjrkPfI,6807
106
+ numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py,sha256=VOOl5fLxQL5IKHEi8hL47hAH0BUf_D8NyIxptLxIwus,8856
107
107
  numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py,sha256=DF7KV5uh-yMztks0f47NhpalV64dvsNy-f8HY6GhAhE,7373
108
108
  numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py,sha256=u_TthSS2N-2J4eBIuF4PGg33AjD-wxly7MKpz0vRAKc,944
109
109
  numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py,sha256=MQWZx1j3lbEpWmIpQ1bV9szrGOV3VHN0QrEnJRjAhW4,508
@@ -232,12 +232,12 @@ numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=n0_-xFaw6QqiZbhe55oy7lnEe
232
232
  numba_cuda/numba/cuda/tests/nrt/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
233
233
  numba_cuda/numba/cuda/tests/nrt/mock_numpy.py,sha256=Qtn52GoKZ_ydre3oqkLWVdImC37tuPClUy4uHSutaJo,1568
234
234
  numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=Ox6ei2DldvSSS-CndTXRxLnsvWdteOQNgn6GvKHB244,2789
235
- numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=OFC_6irwscCNGAyJJKq7fTchzWosCUuiVWU02m0bcUQ,2248
235
+ numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=P2WzCc5d64JGq6pJwHEwmKVmJOJxPBtsMTbnuzqYkik,2679
236
236
  numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=V0raLZLGSiWbE_K-JluI0CnmNkXbhlMVj-TH7P1OV8E,5014
237
237
  numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
238
238
  numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
239
- numba_cuda-0.0.20.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
240
- numba_cuda-0.0.20.dist-info/METADATA,sha256=ObRlGZEdodWEKekrWciM8x9qa1VyfLUbFrIX8pWhyME,1497
241
- numba_cuda-0.0.20.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
242
- numba_cuda-0.0.20.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
243
- numba_cuda-0.0.20.dist-info/RECORD,,
239
+ numba_cuda-0.1.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
240
+ numba_cuda-0.1.0.dist-info/METADATA,sha256=5g2_KSGZGRHn8D-JbY7mnZREDqGMIRzVoqLAGNrmRIQ,1496
241
+ numba_cuda-0.1.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
242
+ numba_cuda-0.1.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
243
+ numba_cuda-0.1.0.dist-info/RECORD,,