numba-cuda 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
numba_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.0
1
+ 0.4.0
@@ -570,10 +570,13 @@ class DeviceNDArray(DeviceNDArrayBase):
570
570
  '''
571
571
  return self._dummy.is_c_contig
572
572
 
573
- def __array__(self, dtype=None):
573
+ def __array__(self, dtype=None, copy=None):
574
574
  """
575
575
  :return: an `numpy.ndarray`, so copies to the host.
576
576
  """
577
+ if copy is False:
578
+ msg = "`copy=False` is not supported. A copy is always created."
579
+ raise ValueError(msg)
577
580
  if dtype:
578
581
  return self.copy_to_host().__array__(dtype)
579
582
  else:
@@ -18,7 +18,6 @@ import functools
18
18
  import warnings
19
19
  import logging
20
20
  import threading
21
- import traceback
22
21
  import asyncio
23
22
  import pathlib
24
23
  import subprocess
@@ -40,6 +39,7 @@ from .drvapi import API_PROTOTYPES
40
39
  from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj, cu_uuid
41
40
  from .mappings import FILE_EXTENSION_MAP
42
41
  from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
42
+ from numba.cuda.utils import _readenv
43
43
  from numba.cuda.cudadrv import enums, drvapi, nvrtc
44
44
 
45
45
  try:
@@ -66,25 +66,6 @@ _py_decref.argtypes = [ctypes.py_object]
66
66
  _py_incref.argtypes = [ctypes.py_object]
67
67
 
68
68
 
69
- def _readenv(name, ctor, default):
70
- value = os.environ.get(name)
71
- if value is None:
72
- return default() if callable(default) else default
73
- try:
74
- if ctor is bool:
75
- return value.lower() in {'1', "true"}
76
- return ctor(value)
77
- except Exception:
78
- warnings.warn(
79
- f"Environment variable '{name}' is defined but its associated "
80
- f"value '{value}' could not be parsed.\n"
81
- "The parse failed with exception:\n"
82
- f"{traceback.format_exc()}",
83
- RuntimeWarning
84
- )
85
- return default
86
-
87
-
88
69
  _MVC_ERROR_MESSAGE = (
89
70
  "Minor version compatibility requires ptxcompiler and cubinlinker packages "
90
71
  "to be available"
@@ -266,7 +266,11 @@ def compile(src, name, cc, ltoir=False):
266
266
  cudadrv_path = os.path.dirname(os.path.abspath(__file__))
267
267
  numba_cuda_path = os.path.dirname(cudadrv_path)
268
268
  numba_include = f'-I{numba_cuda_path}'
269
- options = [arch, *cuda_include, numba_include, '-rdc', 'true']
269
+
270
+ nrt_path = os.path.join(numba_cuda_path, "runtime")
271
+ nrt_include = f'-I{nrt_path}'
272
+
273
+ options = [arch, *cuda_include, numba_include, nrt_include, '-rdc', 'true']
270
274
 
271
275
  if ltoir:
272
276
  options.append("-dlto")
@@ -21,6 +21,7 @@ from numba.cuda.descriptor import cuda_target
21
21
  from numba.cuda.errors import (missing_launch_config_msg,
22
22
  normalize_kernel_dimensions)
23
23
  from numba.cuda import types as cuda_types
24
+ from numba.cuda.runtime.nrt import rtsys
24
25
 
25
26
  from numba import cuda
26
27
  from numba import _dispatcher
@@ -253,7 +254,14 @@ class _Kernel(serialize.ReduceMixin):
253
254
  """
254
255
  Force binding to current CUDA context
255
256
  """
256
- self._codelibrary.get_cufunc()
257
+ cufunc = self._codelibrary.get_cufunc()
258
+
259
+ if hasattr(self, "target_context") and self.target_context.enable_nrt:
260
+ rtsys.ensure_initialized()
261
+ rtsys.set_memsys_to_module(cufunc.module)
262
+ # We don't know which stream the kernel will be launched on, so
263
+ # we force synchronize here.
264
+ cuda.synchronize()
257
265
 
258
266
  @property
259
267
  def regs_per_thread(self):
@@ -0,0 +1 @@
1
+ from numba.cuda.runtime.nrt import rtsys # noqa: F401
@@ -0,0 +1,94 @@
1
+ #include "memsys.cuh"
2
+
3
+ __device__ size_t memsys_size = sizeof(NRT_MemSys);
4
+
5
+ namespace detail
6
+ {
7
+ void __device__ check_memsys()
8
+ {
9
+ if (TheMSys == nullptr)
10
+ {
11
+ assert(false && "TheMSys pointer is null. Please use NRT_MemSys_set to set pointer first.");
12
+ }
13
+ }
14
+ }
15
+
16
+ extern "C" __global__ void NRT_MemSys_set(NRT_MemSys *memsys_ptr)
17
+ {
18
+ TheMSys = memsys_ptr;
19
+ }
20
+
21
+ extern "C" __global__ void NRT_MemSys_read(uint64_t *managed_memsys)
22
+ {
23
+ detail::check_memsys();
24
+ managed_memsys[0] = TheMSys->stats.alloc;
25
+ managed_memsys[1] = TheMSys->stats.free;
26
+ managed_memsys[2] = TheMSys->stats.mi_alloc;
27
+ managed_memsys[3] = TheMSys->stats.mi_free;
28
+ }
29
+
30
+ extern "C" __global__ void NRT_MemSys_read_alloc(uint64_t *managed_result)
31
+ {
32
+ detail::check_memsys();
33
+ managed_result[0] = TheMSys->stats.alloc;
34
+ }
35
+
36
+ extern "C" __global__ void NRT_MemSys_read_free(uint64_t *managed_result)
37
+ {
38
+ detail::check_memsys();
39
+ managed_result[0] = TheMSys->stats.free;
40
+ }
41
+
42
+ extern "C" __global__ void NRT_MemSys_read_mi_alloc(uint64_t *managed_result)
43
+ {
44
+ detail::check_memsys();
45
+ managed_result[0] = TheMSys->stats.mi_alloc;
46
+ }
47
+
48
+ extern "C" __global__ void NRT_MemSys_read_mi_free(uint64_t *managed_result)
49
+ {
50
+ detail::check_memsys();
51
+ managed_result[0] = TheMSys->stats.mi_free;
52
+ }
53
+
54
+ extern "C" __global__ void NRT_MemSys_init(void)
55
+ {
56
+ detail::check_memsys();
57
+ TheMSys->stats.enabled = false;
58
+ TheMSys->stats.alloc = 0;
59
+ TheMSys->stats.free = 0;
60
+ TheMSys->stats.mi_alloc = 0;
61
+ TheMSys->stats.mi_free = 0;
62
+ }
63
+
64
+ extern "C" __global__ void NRT_MemSys_enable_stats(void)
65
+ {
66
+ detail::check_memsys();
67
+ TheMSys->stats.enabled = true;
68
+ }
69
+
70
+ extern "C" __global__ void NRT_MemSys_disable_stats(void)
71
+ {
72
+ detail::check_memsys();
73
+ TheMSys->stats.enabled = false;
74
+ }
75
+
76
+ extern "C" __global__ void NRT_MemSys_stats_enabled(uint8_t *enabled)
77
+ {
78
+ detail::check_memsys();
79
+ *enabled = static_cast<uint8_t>(TheMSys->stats.enabled);
80
+ }
81
+
82
+ extern "C" __global__ void NRT_MemSys_print(void)
83
+ {
84
+ if (TheMSys != nullptr)
85
+ {
86
+ printf("TheMSys->stats.enabled %d\n", TheMSys->stats.enabled);
87
+ printf("TheMSys->stats.alloc %lu\n", TheMSys->stats.alloc.load());
88
+ printf("TheMSys->stats.free %lu\n", TheMSys->stats.free.load());
89
+ printf("TheMSys->stats.mi_alloc %lu\n", TheMSys->stats.mi_alloc.load());
90
+ printf("TheMSys->stats.mi_free %lu\n", TheMSys->stats.mi_free.load());
91
+ } else {
92
+ printf("TheMsys is null.\n");
93
+ }
94
+ }
@@ -0,0 +1,17 @@
1
+ #include <cuda/atomic>
2
+
3
+ // Globally needed variables
4
+ struct NRT_MemSys {
5
+ struct {
6
+ bool enabled;
7
+ cuda::atomic<size_t, cuda::thread_scope_device> alloc;
8
+ cuda::atomic<size_t, cuda::thread_scope_device> free;
9
+ cuda::atomic<size_t, cuda::thread_scope_device> mi_alloc;
10
+ cuda::atomic<size_t, cuda::thread_scope_device> mi_free;
11
+ } stats;
12
+ };
13
+
14
+ /* The Memory System object */
15
+ __device__ NRT_MemSys* TheMSys;
16
+
17
+ extern "C" __global__ void NRT_MemSys_set(NRT_MemSys *memsys_ptr);
@@ -3,6 +3,8 @@
3
3
 
4
4
  #include <cuda/atomic>
5
5
 
6
+ #include "memsys.cuh"
7
+
6
8
  typedef void (*NRT_dtor_function)(void* ptr, size_t size, void* info);
7
9
  typedef void (*NRT_dealloc_func)(void* ptr, void* dealloc_info);
8
10
 
@@ -18,29 +20,21 @@ struct MemInfo {
18
20
  };
19
21
  }
20
22
 
21
- // Globally needed variables
22
- struct NRT_MemSys {
23
- struct {
24
- bool enabled;
25
- cuda::atomic<size_t, cuda::thread_scope_device> alloc;
26
- cuda::atomic<size_t, cuda::thread_scope_device> free;
27
- cuda::atomic<size_t, cuda::thread_scope_device> mi_alloc;
28
- cuda::atomic<size_t, cuda::thread_scope_device> mi_free;
29
- } stats;
30
- };
23
+ extern "C" __global__ void NRT_MemSys_set(NRT_MemSys *memsys_ptr)
24
+ {
25
+ TheMSys = memsys_ptr;
26
+ }
31
27
 
32
28
  static __device__ void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, NRT_MemInfo **mi);
33
29
  static __device__ void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out);
34
30
  extern "C" __device__ void* NRT_Allocate_External(size_t size);
35
31
 
36
- /* The Memory System object */
37
- __device__ NRT_MemSys* TheMSys;
38
-
39
32
  extern "C" __device__ void* NRT_Allocate(size_t size)
40
33
  {
41
34
  void* ptr = NULL;
42
35
  ptr = malloc(size);
43
- // if (TheMSys->stats.enabled) { TheMSys->stats.alloc++; }
36
+ if (TheMSys && TheMSys->stats.enabled) {
37
+ TheMSys->stats.alloc.fetch_add(1, cuda::memory_order_relaxed); }
44
38
  return ptr;
45
39
  }
46
40
 
@@ -49,14 +43,14 @@ extern "C" __device__ void NRT_MemInfo_init(NRT_MemInfo* mi,
49
43
  size_t size,
50
44
  NRT_dtor_function dtor,
51
45
  void* dtor_info)
52
- // NRT_MemSys* TheMSys)
53
46
  {
54
47
  mi->refct = 1; /* starts with 1 refct */
55
48
  mi->dtor = dtor;
56
49
  mi->dtor_info = dtor_info;
57
50
  mi->data = data;
58
51
  mi->size = size;
59
- // if (TheMSys->stats.enabled) { TheMSys->stats.mi_alloc++; }
52
+ if (TheMSys && TheMSys->stats.enabled) {
53
+ TheMSys->stats.mi_alloc.fetch_add(1, cuda::memory_order_relaxed); }
60
54
  }
61
55
 
62
56
  extern "C"
@@ -71,7 +65,8 @@ __device__ NRT_MemInfo* NRT_MemInfo_new(
71
65
  extern "C" __device__ void NRT_Free(void* ptr)
72
66
  {
73
67
  free(ptr);
74
- //if (TheMSys->stats.enabled) { TheMSys->stats.free++; }
68
+ if (TheMSys && TheMSys->stats.enabled) {
69
+ TheMSys->stats.free.fetch_add(1, cuda::memory_order_relaxed); }
75
70
  }
76
71
 
77
72
  extern "C" __device__ void NRT_dealloc(NRT_MemInfo* mi)
@@ -82,8 +77,10 @@ extern "C" __device__ void NRT_dealloc(NRT_MemInfo* mi)
82
77
  extern "C" __device__ void NRT_MemInfo_destroy(NRT_MemInfo* mi)
83
78
  {
84
79
  NRT_dealloc(mi);
85
- //if (TheMSys->stats.enabled) { TheMSys->stats.mi_free++; }
80
+ if (TheMSys && TheMSys->stats.enabled) {
81
+ TheMSys->stats.mi_free.fetch_add(1, cuda::memory_order_relaxed); }
86
82
  }
83
+
87
84
  extern "C" __device__ void NRT_MemInfo_call_dtor(NRT_MemInfo* mi)
88
85
  {
89
86
  if (mi->dtor) /* We have a destructor */
@@ -158,10 +155,10 @@ extern "C" __device__ void* NRT_Allocate_External(size_t size) {
158
155
  ptr = malloc(size);
159
156
  //NRT_Debug(nrt_debug_print("NRT_Allocate_External bytes=%zu ptr=%p\n", size, ptr));
160
157
 
161
- //if (TheMSys.stats.enabled)
162
- //{
163
- // TheMSys.stats.alloc++;
164
- //}
158
+ if (TheMSys && TheMSys->stats.enabled)
159
+ {
160
+ TheMSys->stats.alloc.fetch_add(1, cuda::memory_order_relaxed);
161
+ }
165
162
  return ptr;
166
163
  }
167
164
 
@@ -0,0 +1,318 @@
1
+ import ctypes
2
+ import os
3
+ from functools import wraps
4
+ import numpy as np
5
+
6
+ from numba import cuda, config
7
+ from numba.core.runtime.nrt import _nrt_mstats
8
+ from numba.cuda.cudadrv.driver import Linker, driver, launch_kernel
9
+ from numba.cuda.cudadrv import devices
10
+ from numba.cuda.api import get_current_device
11
+ from numba.cuda.utils import _readenv
12
+
13
+
14
+ # Check environment variable or config for NRT statistics enablement
15
+ NRT_STATS = (
16
+ _readenv("NUMBA_CUDA_NRT_STATS", bool, False) or
17
+ getattr(config, "NUMBA_CUDA_NRT_STATS", False)
18
+ )
19
+ if not hasattr(config, "NUMBA_CUDA_NRT_STATS"):
20
+ config.CUDA_NRT_STATS = NRT_STATS
21
+
22
+
23
+ # Check environment variable or config for NRT enablement
24
+ ENABLE_NRT = (
25
+ _readenv("NUMBA_CUDA_ENABLE_NRT", bool, False) or
26
+ getattr(config, "NUMBA_CUDA_ENABLE_NRT", False)
27
+ )
28
+ if not hasattr(config, "NUMBA_CUDA_ENABLE_NRT"):
29
+ config.CUDA_ENABLE_NRT = ENABLE_NRT
30
+
31
+
32
+ # Protect method to ensure NRT memory allocation and initialization
33
+ def _alloc_init_guard(method):
34
+ """
35
+ Ensure NRT memory allocation and initialization before running the method
36
+ """
37
+ @wraps(method)
38
+ def wrapper(self, *args, **kwargs):
39
+ self.ensure_allocated()
40
+ self.ensure_initialized()
41
+ return method(self, *args, **kwargs)
42
+ return wrapper
43
+
44
+
45
+ class _Runtime:
46
+ """Singleton class for Numba CUDA runtime"""
47
+ _instance = None
48
+
49
+ def __new__(cls, *args, **kwargs):
50
+ if cls._instance is None:
51
+ cls._instance = super(_Runtime, cls).__new__(cls, *args, **kwargs)
52
+ return cls._instance
53
+
54
+ def __init__(self):
55
+ """Initialize memsys module and variable"""
56
+ self._memsys_module = None
57
+ self._memsys = None
58
+ self._initialized = False
59
+
60
+ def _compile_memsys_module(self):
61
+ """
62
+ Compile memsys.cu and create a module from it in the current context
63
+ """
64
+ # Define the path for memsys.cu
65
+ memsys_mod = os.path.join(
66
+ os.path.dirname(os.path.abspath(__file__)),
67
+ "memsys.cu"
68
+ )
69
+ cc = get_current_device().compute_capability
70
+
71
+ # Create a new linker instance and add the cu file
72
+ linker = Linker.new(cc=cc)
73
+ linker.add_cu_file(memsys_mod)
74
+
75
+ # Complete the linker and create a module from it
76
+ cubin = linker.complete()
77
+ ctx = devices.get_context()
78
+ module = ctx.create_module_image(cubin)
79
+
80
+ # Set the memsys module
81
+ self._memsys_module = module
82
+
83
+ def ensure_allocated(self, stream=None):
84
+ """
85
+ If memsys is not allocated, allocate it; otherwise, perform a no-op
86
+ """
87
+ if self._memsys is not None:
88
+ return
89
+
90
+ # Allocate the memsys
91
+ self.allocate(stream)
92
+
93
+ def allocate(self, stream=None):
94
+ """
95
+ Allocate memsys on global memory
96
+ """
97
+ from numba.cuda import device_array
98
+
99
+ # Check if memsys module is defined
100
+ if self._memsys_module is None:
101
+ # Compile the memsys module if not defined
102
+ self._compile_memsys_module()
103
+
104
+ # Allocate space for NRT_MemSys
105
+ ptr, nbytes = self._memsys_module.get_global_symbol("memsys_size")
106
+ memsys_size = ctypes.c_uint64()
107
+ driver.cuMemcpyDtoH(ctypes.addressof(memsys_size),
108
+ ptr.device_ctypes_pointer, nbytes)
109
+ self._memsys = device_array(
110
+ (memsys_size.value,), dtype="i1", stream=stream)
111
+ self.set_memsys_to_module(self._memsys_module, stream=stream)
112
+
113
+ def _single_thread_launch(self, module, stream, name, params=()):
114
+ """
115
+ Launch the specified kernel with only 1 thread
116
+ """
117
+ if stream is None:
118
+ stream = cuda.default_stream()
119
+
120
+ func = module.get_function(name)
121
+ launch_kernel(
122
+ func.handle,
123
+ 1, 1, 1,
124
+ 1, 1, 1,
125
+ 0,
126
+ stream.handle,
127
+ params,
128
+ cooperative=False
129
+ )
130
+
131
+ def ensure_initialized(self, stream=None):
132
+ """
133
+ If memsys is not initialized, initialize memsys
134
+ """
135
+ if self._initialized:
136
+ return
137
+
138
+ # Initialize the memsys
139
+ self.initialize(stream)
140
+
141
+ def initialize(self, stream=None):
142
+ """
143
+ Launch memsys initialization kernel
144
+ """
145
+ self.ensure_allocated()
146
+
147
+ self._single_thread_launch(
148
+ self._memsys_module, stream, "NRT_MemSys_init")
149
+ self._initialized = True
150
+
151
+ if config.CUDA_NRT_STATS:
152
+ self.memsys_enable_stats()
153
+
154
+ @_alloc_init_guard
155
+ def memsys_enable_stats(self, stream=None):
156
+ """
157
+ Enable memsys statistics
158
+ """
159
+ self._single_thread_launch(
160
+ self._memsys_module, stream, "NRT_MemSys_enable_stats")
161
+
162
+ @_alloc_init_guard
163
+ def memsys_disable_stats(self, stream=None):
164
+ """
165
+ Disable memsys statistics
166
+ """
167
+ self._single_thread_launch(
168
+ self._memsys_module, stream, "NRT_MemSys_disable_stats")
169
+
170
+ @_alloc_init_guard
171
+ def memsys_stats_enabled(self, stream=None):
172
+ """
173
+ Return a boolean indicating whether memsys is enabled. Synchronizes
174
+ context
175
+ """
176
+ enabled_ar = cuda.managed_array(1, np.uint8)
177
+
178
+ self._single_thread_launch(
179
+ self._memsys_module,
180
+ stream,
181
+ "NRT_MemSys_stats_enabled",
182
+ (enabled_ar.device_ctypes_pointer,)
183
+ )
184
+
185
+ cuda.synchronize()
186
+ return bool(enabled_ar[0])
187
+
188
+ @_alloc_init_guard
189
+ def _copy_memsys_to_host(self, stream):
190
+ """
191
+ Copy all statistics of memsys to the host
192
+ """
193
+ dt = np.dtype([
194
+ ('alloc', np.uint64),
195
+ ('free', np.uint64),
196
+ ('mi_alloc', np.uint64),
197
+ ('mi_free', np.uint64)
198
+ ])
199
+
200
+ stats_for_read = cuda.managed_array(1, dt)
201
+
202
+ self._single_thread_launch(
203
+ self._memsys_module,
204
+ stream,
205
+ "NRT_MemSys_read",
206
+ [stats_for_read.device_ctypes_pointer]
207
+ )
208
+ cuda.synchronize()
209
+
210
+ return stats_for_read[0]
211
+
212
+ @_alloc_init_guard
213
+ def get_allocation_stats(self, stream=None):
214
+ """
215
+ Get the allocation statistics
216
+ """
217
+ enabled = self.memsys_stats_enabled(stream)
218
+ if not enabled:
219
+ raise RuntimeError("NRT stats are disabled.")
220
+ memsys = self._copy_memsys_to_host(stream)
221
+ return _nrt_mstats(
222
+ alloc=memsys["alloc"],
223
+ free=memsys["free"],
224
+ mi_alloc=memsys["mi_alloc"],
225
+ mi_free=memsys["mi_free"]
226
+ )
227
+
228
+ @_alloc_init_guard
229
+ def _get_single_stat(self, stat, stream=None):
230
+ """
231
+ Get a single stat from the memsys
232
+ """
233
+ got = cuda.managed_array(1, np.uint64)
234
+ self._single_thread_launch(
235
+ self._memsys_module,
236
+ stream,
237
+ f"NRT_MemSys_read_{stat}",
238
+ [got.device_ctypes_pointer]
239
+ )
240
+
241
+ cuda.synchronize()
242
+ return got[0]
243
+
244
+ @_alloc_init_guard
245
+ def memsys_get_stats_alloc(self, stream=None):
246
+ """
247
+ Get the allocation statistic
248
+ """
249
+ enabled = self.memsys_stats_enabled(stream)
250
+ if not enabled:
251
+ raise RuntimeError("NRT stats are disabled.")
252
+
253
+ return self._get_single_stat("alloc")
254
+
255
+ @_alloc_init_guard
256
+ def memsys_get_stats_free(self, stream=None):
257
+ """
258
+ Get the free statistic
259
+ """
260
+ enabled = self.memsys_stats_enabled(stream)
261
+ if not enabled:
262
+ raise RuntimeError("NRT stats are disabled.")
263
+
264
+ return self._get_single_stat("free")
265
+
266
+ @_alloc_init_guard
267
+ def memsys_get_stats_mi_alloc(self, stream=None):
268
+ """
269
+ Get the mi alloc statistic
270
+ """
271
+ enabled = self.memsys_stats_enabled(stream)
272
+ if not enabled:
273
+ raise RuntimeError("NRT stats are disabled.")
274
+
275
+ return self._get_single_stat("mi_alloc")
276
+
277
+ @_alloc_init_guard
278
+ def memsys_get_stats_mi_free(self, stream=None):
279
+ """
280
+ Get the mi free statistic
281
+ """
282
+ enabled = self.memsys_stats_enabled(stream)
283
+ if not enabled:
284
+ raise RuntimeError("NRT stats are disabled.")
285
+
286
+ return self._get_single_stat("mi_free")
287
+
288
+ def set_memsys_to_module(self, module, stream=None):
289
+ """
290
+ Set the memsys module. The module must contain `NRT_MemSys_set` kernel,
291
+ and declare a pointer to NRT_MemSys structure.
292
+ """
293
+ if self._memsys is None:
294
+ raise RuntimeError(
295
+ "Please allocate NRT Memsys first before setting to module.")
296
+
297
+ self._single_thread_launch(
298
+ module,
299
+ stream,
300
+ "NRT_MemSys_set",
301
+ [self._memsys.device_ctypes_pointer,]
302
+ )
303
+
304
+ @_alloc_init_guard
305
+ def print_memsys(self, stream=None):
306
+ """
307
+ Print the current statistics of memsys, for debugging purposes
308
+ """
309
+ cuda.synchronize()
310
+ self._single_thread_launch(
311
+ self._memsys_module,
312
+ stream,
313
+ "NRT_MemSys_print"
314
+ )
315
+
316
+
317
+ # Create an instance of the runtime
318
+ rtsys = _Runtime()
@@ -49,6 +49,7 @@ def load_tests(loader, tests, pattern):
49
49
  if gpus and gpus[0].compute_capability >= (2, 0):
50
50
  suite.addTests(load_testsuite(loader, join(this_dir, 'cudadrv')))
51
51
  suite.addTests(load_testsuite(loader, join(this_dir, 'cudapy')))
52
+ suite.addTests(load_testsuite(loader, join(this_dir, 'nrt')))
52
53
  suite.addTests(load_testsuite(loader, join(this_dir,
53
54
  'doc_examples')))
54
55
  else:
@@ -4,6 +4,7 @@ from numba.cuda.cudadrv import devicearray
4
4
  from numba import cuda
5
5
  from numba.cuda.testing import unittest, CUDATestCase
6
6
  from numba.cuda.testing import skip_on_cudasim
7
+ from numba.tests.support import IS_NUMPY_2
7
8
 
8
9
 
9
10
  class TestCudaNDArray(CUDATestCase):
@@ -456,6 +457,36 @@ class TestCudaNDArray(CUDATestCase):
456
457
  dev_array_from_host.copy_to_device(dev_array)
457
458
 
458
459
 
460
+ class TestArrayMethod(CUDATestCase):
461
+ """Tests of the __array__() method via np.array"""
462
+
463
+ def test_np_array(self):
464
+ dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
465
+ host_array = np.array(dev_array)
466
+ np.testing.assert_equal(dev_array.copy_to_host(), host_array)
467
+
468
+ def test_np_array_dtype(self):
469
+ dtype = np.int32
470
+ dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
471
+ host_array = np.array(dev_array, dtype=dtype)
472
+ np.testing.assert_equal(
473
+ host_array,
474
+ dev_array.copy_to_host().astype(dtype)
475
+ )
476
+
477
+ @unittest.skipUnless(IS_NUMPY_2, "NumPy 1.x does not pass copy kwarg")
478
+ def test_np_array_copy_false(self):
479
+ dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
480
+ with self.assertRaisesRegex(ValueError, "`copy=False` is not"):
481
+ np.array(dev_array, copy=False)
482
+
483
+ @unittest.skipUnless(IS_NUMPY_2, "NumPy 1.x does not pass copy kwarg")
484
+ def test_np_array_copy_true(self):
485
+ dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
486
+ host_array = np.array(dev_array)
487
+ np.testing.assert_equal(dev_array.copy_to_host(), host_array)
488
+
489
+
459
490
  class TestRecarray(CUDATestCase):
460
491
  def test_recarray(self):
461
492
  # From issue #4111
@@ -1,8 +1,12 @@
1
+ import math
2
+
3
+ import numpy as np
1
4
 
2
5
  from numba.core import errors, types
3
6
  from numba.core.extending import overload
4
7
  from numba.np.arrayobj import (_check_const_str_dtype, is_nonelike,
5
- ty_parse_dtype, ty_parse_shape, numpy_empty_nd)
8
+ ty_parse_dtype, ty_parse_shape, numpy_empty_nd,
9
+ numpy_empty_like_nd)
6
10
 
7
11
 
8
12
  # Typical tests for allocation use array construction (e.g. np.zeros, np.empty,
@@ -20,6 +24,18 @@ def cuda_empty(shape, dtype):
20
24
  pass
21
25
 
22
26
 
27
+ def cuda_empty_like(arr):
28
+ pass
29
+
30
+
31
+ def cuda_arange(start):
32
+ pass
33
+
34
+
35
+ def cuda_ones(shape):
36
+ pass
37
+
38
+
23
39
  @overload(cuda_empty)
24
40
  def ol_cuda_empty(shape, dtype):
25
41
  _check_const_str_dtype("empty", dtype)
@@ -40,3 +56,91 @@ def ol_cuda_empty(shape, dtype):
40
56
  else:
41
57
  msg = f"Cannot parse input types to function np.empty({shape}, {dtype})"
42
58
  raise errors.TypingError(msg)
59
+
60
+
61
+ @overload(cuda_empty_like)
62
+ def ol_cuda_empty_like(arr):
63
+
64
+ if isinstance(arr, types.Array):
65
+ nb_dtype = arr.dtype
66
+ else:
67
+ nb_dtype = arr
68
+
69
+ if isinstance(arr, types.Array):
70
+ layout = arr.layout if arr.layout != 'A' else 'C'
71
+ retty = arr.copy(dtype=nb_dtype, layout=layout, readonly=False)
72
+ else:
73
+ retty = types.Array(nb_dtype, 0, 'C')
74
+
75
+ def impl(arr):
76
+ dtype = None
77
+ return numpy_empty_like_nd(arr, dtype, retty)
78
+ return impl
79
+
80
+
81
+ def _arange_dtype(*args):
82
+ bounds = [a for a in args if not isinstance(a, types.NoneType)]
83
+
84
+ if any(isinstance(a, types.Complex) for a in bounds):
85
+ dtype = types.complex128
86
+ elif any(isinstance(a, types.Float) for a in bounds):
87
+ dtype = types.float64
88
+ else:
89
+ # `np.arange(10).dtype` is always `np.dtype(int)`, aka `np.int_`, which
90
+ # in all released versions of numpy corresponds to the C `long` type.
91
+ # Windows 64 is broken by default here because Numba (as of 0.47) does
92
+ # not differentiate between Python and NumPy integers, so a `typeof(1)`
93
+ # on w64 is `int64`, i.e. `intp`. This means an arange(<some int>) will
94
+ # be typed as arange(int64) and the following will yield int64 opposed
95
+ # to int32. Example: without a load of analysis to work out of the args
96
+ # were wrapped in NumPy int*() calls it's not possible to detect the
97
+ # difference between `np.arange(10)` and `np.arange(np.int64(10)`.
98
+ NPY_TY = getattr(types, "int%s" % (8 * np.dtype(int).itemsize))
99
+
100
+ # unliteral these types such that `max` works.
101
+ unliteral_bounds = [types.unliteral(x) for x in bounds]
102
+ dtype = max(unliteral_bounds + [NPY_TY,])
103
+
104
+ return dtype
105
+
106
+
107
+ @overload(cuda_arange)
108
+ def ol_cuda_arange(start):
109
+ """Simplified arange with just 1 argument."""
110
+ if (not isinstance(start, types.Number)):
111
+ return
112
+
113
+ start_value = getattr(start, "literal_value", None)
114
+
115
+ def impl(start):
116
+ # Allow for improved performance if given literal arguments.
117
+ lit_start = start_value if start_value is not None else start
118
+
119
+ _step = 1
120
+ _start, _stop = 0, lit_start
121
+
122
+ nitems_c = (_stop - _start) / _step
123
+ nitems_r = int(math.ceil(nitems_c.real))
124
+
125
+ # Binary operator needed for compiler branch pruning.
126
+ nitems = max(nitems_r, 0)
127
+
128
+ arr = cuda_empty(nitems, np.int64)
129
+ val = _start
130
+ for i in range(nitems):
131
+ arr[i] = val + (i * _step)
132
+ return arr
133
+
134
+ return impl
135
+
136
+
137
+ @overload(cuda_ones)
138
+ def ol_cuda_ones(shape):
139
+
140
+ def impl(shape):
141
+ arr = cuda_empty(shape, np.float64)
142
+ arr_flat = arr.flat
143
+ for idx in range(len(arr_flat)):
144
+ arr_flat[idx] = 1
145
+ return arr
146
+ return impl
@@ -1,47 +1,22 @@
1
1
  import re
2
- import gc
2
+ import os
3
+
3
4
  import numpy as np
4
5
  import unittest
5
- from unittest.mock import patch
6
- from numba.core.runtime import rtsys
7
- from numba.tests.support import EnableNRTStatsMixin
8
6
  from numba.cuda.testing import CUDATestCase
9
7
 
10
- from .mock_numpy import cuda_empty
8
+ from numba.cuda.tests.nrt.mock_numpy import cuda_empty, cuda_ones, cuda_arange
9
+ from numba.tests.support import run_in_subprocess, override_config
11
10
 
12
11
  from numba import cuda
13
-
14
-
15
- class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
16
-
17
- def setUp(self):
18
- # Clean up any NRT-backed objects hanging in a dead reference cycle
19
- gc.collect()
20
- super(TestNrtRefCt, self).setUp()
21
-
22
- @unittest.expectedFailure
23
- def test_no_return(self):
24
- """
25
- Test issue #1291
26
- """
27
- n = 10
28
-
29
- @cuda.jit
30
- def kernel():
31
- for i in range(n):
32
- temp = cuda_empty(2, np.float64) # noqa: F841
33
- return None
34
-
35
- init_stats = rtsys.get_allocation_stats()
36
-
37
- with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
38
- kernel[1,1]()
39
- cur_stats = rtsys.get_allocation_stats()
40
- self.assertEqual(cur_stats.alloc - init_stats.alloc, n)
41
- self.assertEqual(cur_stats.free - init_stats.free, n)
12
+ from numba.cuda.runtime.nrt import rtsys
42
13
 
43
14
 
44
15
  class TestNrtBasic(CUDATestCase):
16
+ def run(self, result=None):
17
+ with override_config("CUDA_ENABLE_NRT", True):
18
+ super(TestNrtBasic, self).run(result)
19
+
45
20
  def test_nrt_launches(self):
46
21
  @cuda.jit
47
22
  def f(x):
@@ -52,8 +27,7 @@ class TestNrtBasic(CUDATestCase):
52
27
  x = cuda_empty(10, np.int64)
53
28
  f(x)
54
29
 
55
- with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
56
- g[1,1]()
30
+ g[1,1]()
57
31
  cuda.synchronize()
58
32
 
59
33
  def test_nrt_ptx_contains_refcount(self):
@@ -66,8 +40,7 @@ class TestNrtBasic(CUDATestCase):
66
40
  x = cuda_empty(10, np.int64)
67
41
  f(x)
68
42
 
69
- with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
70
- g[1,1]()
43
+ g[1,1]()
71
44
 
72
45
  ptx = next(iter(g.inspect_asm().values()))
73
46
 
@@ -100,11 +73,160 @@ class TestNrtBasic(CUDATestCase):
100
73
 
101
74
  out_ary = np.zeros(1, dtype=np.int64)
102
75
 
103
- with patch('numba.config.CUDA_ENABLE_NRT', True, create=True):
104
- g[1,1](out_ary)
76
+ g[1,1](out_ary)
105
77
 
106
78
  self.assertEqual(out_ary[0], 1)
107
79
 
108
80
 
81
+ class TestNrtStatistics(CUDATestCase):
82
+
83
+ def setUp(self):
84
+ self._stream = cuda.default_stream()
85
+ # Store the current stats state
86
+ self.__stats_state = rtsys.memsys_stats_enabled(self._stream)
87
+
88
+ def tearDown(self):
89
+ # Set stats state back to whatever it was before the test ran
90
+ if self.__stats_state:
91
+ rtsys.memsys_enable_stats(self._stream)
92
+ else:
93
+ rtsys.memsys_disable_stats(self._stream)
94
+
95
+ def test_stats_env_var_explicit_on(self):
96
+ # Checks that explicitly turning the stats on via the env var works.
97
+ src = """if 1:
98
+ from numba import cuda
99
+ from numba.cuda.runtime import rtsys
100
+ from numba.cuda.tests.nrt.mock_numpy import cuda_arange
101
+
102
+ @cuda.jit
103
+ def foo():
104
+ x = cuda_arange(10)[0]
105
+
106
+ # initialize the NRT before use
107
+ rtsys.initialize()
108
+ assert rtsys.memsys_stats_enabled(), "Stats not enabled"
109
+ orig_stats = rtsys.get_allocation_stats()
110
+ foo[1, 1]()
111
+ new_stats = rtsys.get_allocation_stats()
112
+ total_alloc = new_stats.alloc - orig_stats.alloc
113
+ total_free = new_stats.free - orig_stats.free
114
+ total_mi_alloc = new_stats.mi_alloc - orig_stats.mi_alloc
115
+ total_mi_free = new_stats.mi_free - orig_stats.mi_free
116
+
117
+ expected = 1
118
+ assert total_alloc == expected, \\
119
+ f"total_alloc != expected, {total_alloc} != {expected}"
120
+ assert total_free == expected, \\
121
+ f"total_free != expected, {total_free} != {expected}"
122
+ assert total_mi_alloc == expected, \\
123
+ f"total_mi_alloc != expected, {total_mi_alloc} != {expected}"
124
+ assert total_mi_free == expected, \\
125
+ f"total_mi_free != expected, {total_mi_free} != {expected}"
126
+ """
127
+
128
+ # Check env var explicitly being set works
129
+ env = os.environ.copy()
130
+ env['NUMBA_CUDA_NRT_STATS'] = "1"
131
+ env['NUMBA_CUDA_ENABLE_NRT'] = "1"
132
+ run_in_subprocess(src, env=env)
133
+
134
+ def check_env_var_off(self, env):
135
+
136
+ src = """if 1:
137
+ from numba import cuda
138
+ import numpy as np
139
+ from numba.cuda.runtime import rtsys
140
+
141
+ @cuda.jit
142
+ def foo():
143
+ arr = np.arange(10)[0]
144
+
145
+ assert rtsys.memsys_stats_enabled() == False
146
+ try:
147
+ rtsys.get_allocation_stats()
148
+ except RuntimeError as e:
149
+ assert "NRT stats are disabled." in str(e)
150
+ """
151
+ run_in_subprocess(src, env=env)
152
+
153
+ def test_stats_env_var_explicit_off(self):
154
+ # Checks that explicitly turning the stats off via the env var works.
155
+ env = os.environ.copy()
156
+ env['NUMBA_CUDA_NRT_STATS'] = "0"
157
+ self.check_env_var_off(env)
158
+
159
+ def test_stats_env_var_default_off(self):
160
+ # Checks that the env var not being set is the same as "off", i.e.
161
+ # default for Numba is off.
162
+ env = os.environ.copy()
163
+ env.pop('NUMBA_CUDA_NRT_STATS', None)
164
+ self.check_env_var_off(env)
165
+
166
+ def test_stats_status_toggle(self):
167
+
168
+ @cuda.jit
169
+ def foo():
170
+ tmp = cuda_ones(3)
171
+ arr = cuda_arange(5 * tmp[0]) # noqa: F841
172
+ return None
173
+
174
+ with override_config('CUDA_ENABLE_NRT', True):
175
+ # Switch on stats
176
+ rtsys.memsys_enable_stats()
177
+ # check the stats are on
178
+ self.assertTrue(rtsys.memsys_stats_enabled())
179
+
180
+ for i in range(2):
181
+ # capture the stats state
182
+ stats_1 = rtsys.get_allocation_stats()
183
+ # Switch off stats
184
+ rtsys.memsys_disable_stats()
185
+ # check the stats are off
186
+ self.assertFalse(rtsys.memsys_stats_enabled())
187
+ # run something that would move the counters were they enabled
188
+ foo[1, 1]()
189
+ # Switch on stats
190
+ rtsys.memsys_enable_stats()
191
+ # check the stats are on
192
+ self.assertTrue(rtsys.memsys_stats_enabled())
193
+ # capture the stats state (should not have changed)
194
+ stats_2 = rtsys.get_allocation_stats()
195
+ # run something that will move the counters
196
+ foo[1, 1]()
197
+ # capture the stats state (should have changed)
198
+ stats_3 = rtsys.get_allocation_stats()
199
+ # check stats_1 == stats_2
200
+ self.assertEqual(stats_1, stats_2)
201
+ # check stats_2 < stats_3
202
+ self.assertLess(stats_2, stats_3)
203
+
204
+ def test_rtsys_stats_query_raises_exception_when_disabled(self):
205
+ # Checks that the standard rtsys.get_allocation_stats() query raises
206
+ # when stats counters are turned off.
207
+
208
+ rtsys.memsys_disable_stats()
209
+ self.assertFalse(rtsys.memsys_stats_enabled())
210
+
211
+ with self.assertRaises(RuntimeError) as raises:
212
+ rtsys.get_allocation_stats()
213
+
214
+ self.assertIn("NRT stats are disabled.", str(raises.exception))
215
+
216
+ def test_nrt_explicit_stats_query_raises_exception_when_disabled(self):
217
+ # Checks the various memsys_get_stats functions raise if queried when
218
+ # the stats counters are disabled.
219
+ method_variations = ('alloc', 'free', 'mi_alloc', 'mi_free')
220
+ for meth in method_variations:
221
+ stats_func = getattr(rtsys, f'memsys_get_stats_{meth}')
222
+ with self.subTest(stats_func=stats_func):
223
+ # Turn stats off
224
+ rtsys.memsys_disable_stats()
225
+ self.assertFalse(rtsys.memsys_stats_enabled())
226
+ with self.assertRaises(RuntimeError) as raises:
227
+ stats_func()
228
+ self.assertIn("NRT stats are disabled.", str(raises.exception))
229
+
230
+
109
231
  if __name__ == '__main__':
110
232
  unittest.main()
@@ -0,0 +1,114 @@
1
+ import numpy as np
2
+ import unittest
3
+ from numba.tests.support import override_config
4
+ from numba.cuda.runtime import rtsys
5
+ from numba.cuda.tests.support import EnableNRTStatsMixin
6
+ from numba.cuda.testing import CUDATestCase
7
+ from numba.cuda.tests.nrt.mock_numpy import cuda_empty, cuda_empty_like
8
+
9
+ from numba import cuda
10
+
11
+
12
+ class TestNrtRefCt(EnableNRTStatsMixin, CUDATestCase):
13
+
14
+ def setUp(self):
15
+ super(TestNrtRefCt, self).setUp()
16
+
17
+ def tearDown(self):
18
+ super(TestNrtRefCt, self).tearDown()
19
+
20
+ def run(self, result=None):
21
+ with override_config("CUDA_ENABLE_NRT", True):
22
+ super(TestNrtRefCt, self).run(result)
23
+
24
+ def test_no_return(self):
25
+ """
26
+ Test issue #1291
27
+ """
28
+
29
+ n = 10
30
+
31
+ @cuda.jit
32
+ def kernel():
33
+ for i in range(n):
34
+ temp = cuda_empty(2, np.float64) # noqa: F841
35
+ return None
36
+
37
+ init_stats = rtsys.get_allocation_stats()
38
+ kernel[1, 1]()
39
+ cur_stats = rtsys.get_allocation_stats()
40
+ self.assertEqual(cur_stats.alloc - init_stats.alloc, n)
41
+ self.assertEqual(cur_stats.free - init_stats.free, n)
42
+
43
+ def test_escaping_var_init_in_loop(self):
44
+ """
45
+ Test issue #1297
46
+ """
47
+
48
+ @cuda.jit
49
+ def g(n):
50
+
51
+ x = cuda_empty((n, 2), np.float64)
52
+
53
+ for i in range(n):
54
+ y = x[i]
55
+
56
+ for i in range(n):
57
+ y = x[i] # noqa: F841
58
+
59
+ return None
60
+
61
+ init_stats = rtsys.get_allocation_stats()
62
+ g[1, 1](10)
63
+ cur_stats = rtsys.get_allocation_stats()
64
+ self.assertEqual(cur_stats.alloc - init_stats.alloc, 1)
65
+ self.assertEqual(cur_stats.free - init_stats.free, 1)
66
+
67
+ def test_invalid_computation_of_lifetime(self):
68
+ """
69
+ Test issue #1573
70
+ """
71
+ @cuda.jit
72
+ def if_with_allocation_and_initialization(arr1, test1):
73
+ tmp_arr = cuda_empty_like(arr1)
74
+
75
+ for i in range(tmp_arr.shape[0]):
76
+ pass
77
+
78
+ if test1:
79
+ cuda_empty_like(arr1)
80
+
81
+ arr = np.random.random((5, 5)) # the values are not consumed
82
+
83
+ init_stats = rtsys.get_allocation_stats()
84
+ if_with_allocation_and_initialization[1, 1](arr, False)
85
+ cur_stats = rtsys.get_allocation_stats()
86
+ self.assertEqual(cur_stats.alloc - init_stats.alloc,
87
+ cur_stats.free - init_stats.free)
88
+
89
+ def test_del_at_beginning_of_loop(self):
90
+ """
91
+ Test issue #1734
92
+ """
93
+ @cuda.jit
94
+ def f(arr):
95
+ res = 0
96
+
97
+ for i in (0, 1):
98
+ # `del t` is issued here before defining t. It must be
99
+ # correctly handled by the lowering phase.
100
+ t = arr[i]
101
+ if t[i] > 1:
102
+ res += t[i]
103
+
104
+ arr = np.ones((2, 2))
105
+
106
+ init_stats = rtsys.get_allocation_stats()
107
+ f[1, 1](arr)
108
+ cur_stats = rtsys.get_allocation_stats()
109
+ self.assertEqual(cur_stats.alloc - init_stats.alloc,
110
+ cur_stats.free - init_stats.free)
111
+
112
+
113
+ if __name__ == '__main__':
114
+ unittest.main()
@@ -0,0 +1,11 @@
1
+ from numba.cuda.runtime.nrt import rtsys
2
+
3
+
4
+ class EnableNRTStatsMixin(object):
5
+ """Mixin to enable the NRT statistics counters."""
6
+
7
+ def setUp(self):
8
+ rtsys.memsys_enable_stats()
9
+
10
+ def tearDown(self):
11
+ rtsys.memsys_disable_stats()
@@ -0,0 +1,22 @@
1
+ import os
2
+ import warnings
3
+ import traceback
4
+
5
+
6
+ def _readenv(name, ctor, default):
7
+ value = os.environ.get(name)
8
+ if value is None:
9
+ return default() if callable(default) else default
10
+ try:
11
+ if ctor is bool:
12
+ return value.lower() in {'1', "true"}
13
+ return ctor(value)
14
+ except Exception:
15
+ warnings.warn(
16
+ f"Environment variable '{name}' is defined but its associated "
17
+ f"value '{value}' could not be parsed.\n"
18
+ "The parse failed with exception:\n"
19
+ f"{traceback.format_exc()}",
20
+ RuntimeWarning
21
+ )
22
+ return default
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: numba-cuda
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -1,6 +1,6 @@
1
1
  _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
2
2
  _numba_cuda_redirector.py,sha256=QKJmYICSQvjvph0Zw9OW015MsuKxIF28GPFjR35AXLM,2681
3
- numba_cuda/VERSION,sha256=2RXMldbKj0euKXcT7UbU5cXZnd0p_Dxh4mO98wXytbA,6
3
+ numba_cuda/VERSION,sha256=QLjrQACpE6d5EJBTXykdPTaYdBYqie88nj1OiHobnnk,6
4
4
  numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
5
5
  numba_cuda/_version.py,sha256=jbdUsbR7sVllw0KxQNB0-FMd929CGg3kH2fhHdrlkuc,719
6
6
  numba_cuda/numba/cuda/__init__.py,sha256=idyVHOObC9lTYnp62v7rVprSacRM4d5F6vhXfG5ElTI,621
@@ -21,7 +21,7 @@ numba_cuda/numba/cuda/decorators.py,sha256=qSpir16-jPYSe2YuRZ6g9INeobmsMNg6ab9IZ
21
21
  numba_cuda/numba/cuda/descriptor.py,sha256=rNMaurJkjNjIBmHPozDoLC35DMURE0fn_LtnXRmaG_w,985
22
22
  numba_cuda/numba/cuda/device_init.py,sha256=lP79tCsQ0Np9xcbjv_lXcH4JOiVZvV8nwg3INdETxsc,3586
23
23
  numba_cuda/numba/cuda/deviceufunc.py,sha256=yxAH71dpgJWK8okmCJm0FUV6z2AqdThCYOTZspT7z0M,30775
24
- numba_cuda/numba/cuda/dispatcher.py,sha256=Q8WN7jTAX3xy_D2sEgSeFHAivqavI2PRlfDjR7ysing,42073
24
+ numba_cuda/numba/cuda/dispatcher.py,sha256=cJH7Jm-U26PyU-M2Igevar_Q_c_k9R-A99InnRGPzX0,42444
25
25
  numba_cuda/numba/cuda/errors.py,sha256=XwWHzCllx0DXU6BQdoRH0m3pznGxnTFOBTVYXMmCfqg,1724
26
26
  numba_cuda/numba/cuda/extending.py,sha256=URsyBYls2te-mgE0yvDY6akvawYCA0blBFfD7Lf9DO4,142
27
27
  numba_cuda/numba/cuda/initialize.py,sha256=TQGHGLQoq4ch4J6CLDcJdGsZzXM-g2kDgdyO1u-Rbhg,546
@@ -43,12 +43,13 @@ numba_cuda/numba/cuda/target.py,sha256=hBflzmxCGlmTugWT1sYhZj9f4HkQAMK2RQ9lO85pM
43
43
  numba_cuda/numba/cuda/testing.py,sha256=E0wP2vfno1yWsl0v1zg31kpbU8FrKxTF-5y9Iv4WjA4,6412
44
44
  numba_cuda/numba/cuda/types.py,sha256=WVfjcly_VUpG9FfKueiEPzZm2NV8Hg0XAFg3bNzPdVc,1314
45
45
  numba_cuda/numba/cuda/ufuncs.py,sha256=txw27IxG80W1Yo7e-XwL2AMcQo0fMnxMjBIMy-n5pCo,23317
46
+ numba_cuda/numba/cuda/utils.py,sha256=JId22EI3KkQosW6Dafdaw43qU0xXXO_4JOENLap8klU,630
46
47
  numba_cuda/numba/cuda/vector_types.py,sha256=s18dY0IUpT-RcaBvQsa_zEbYuuL2IT0Vh6afCeccwmQ,6750
47
48
  numba_cuda/numba/cuda/vectorizers.py,sha256=u_0EzaD5tqVH8uOz4Gmqn3FgPC1rckwDAQuROm0BXm8,8915
48
49
  numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=0TL4MZcJXUoo9qA7uu0vLv7eHrXRerVmyfi7O149ITw,199
49
- numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=06kM7iFcx1TYiFhs1o9r1kyoA3k5yS7mFAdZDf6nrxA,31215
50
+ numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=jsfr4LL12HWJzU3HUgzXpkk38Z-pyFyzLuGArg2G-nU,31363
50
51
  numba_cuda/numba/cuda/cudadrv/devices.py,sha256=6SneNmoq83gue0txFWWx4A65vViAa8xA06FzkApoqAk,7992
51
- numba_cuda/numba/cuda/cudadrv/driver.py,sha256=FONYaUzgexmPUIMsSq0zr_FgD9eLbWT8m1APEVrLJRo,114887
52
+ numba_cuda/numba/cuda/cudadrv/driver.py,sha256=1F-Ugsf1bdZgK-So_q_TkJckdoczlzhBrCEJn8KYxG0,114321
52
53
  numba_cuda/numba/cuda/cudadrv/drvapi.py,sha256=52ms3X6hfPaQB8E1jb6g7QKqRvHzBMlDQ-V2DM1rXxQ,17178
53
54
  numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=nXRngdr-k3h_BNGQuJUxmp89yGNWxqEDJedpwDPEZ44,14209
54
55
  numba_cuda/numba/cuda/cudadrv/enums.py,sha256=Wy5dzukTk4TnWCowg_PLceET_v2xEyiWLu9TyH8pXr8,23742
@@ -57,14 +58,18 @@ numba_cuda/numba/cuda/cudadrv/libs.py,sha256=Gk9zQ1CKcsZsWl-_9QneXeP9VH5q5R1I3Cx
57
58
  numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=Q_YTv0apBo9t8pkMlKrthPPSVeLd376ZTmVDF5NtVVo,1328
58
59
  numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=-dTPHvAkDjdH6vS5OjgrB71AFuqKO6CRgf7hpOk2wiw,802
59
60
  numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
60
- numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=RR096Ic2_Zu96C-GGh8x8WTOyxnmDkwtcwag8a_npkQ,10898
61
+ numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=XM9_Vllv7HzH5wZIR2lwFictyX68XDtNbyLkXlL6NTI,11003
61
62
  numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=v2hJJTAQeRmoG59-hnhgMEp5BSVA73QHtEoy636VKao,24107
62
63
  numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=WdeUoWzsYNYodx8kMRLVIjnNs0QzwpCihd2Q0AaqItE,226
63
64
  numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=Tj9ACrzQqNmDSO6xfpzw12EsQknSywQ-ZGuWMbDdHnQ,4255
64
65
  numba_cuda/numba/cuda/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
66
  numba_cuda/numba/cuda/kernels/reduction.py,sha256=fQnaWtoNB2yp143MNbE1DujqFIYy0KV_2moQVvbaROU,9362
66
67
  numba_cuda/numba/cuda/kernels/transpose.py,sha256=5FSu-nbTfhintxwfU-bjT2px2otQF5QkKH-JPDDWq_k,2061
67
- numba_cuda/numba/cuda/runtime/nrt.cu,sha256=i8Xcf-x84n3uNPzs_xak4c_sLHOH91ast2aE6DKKf9Q,5497
68
+ numba_cuda/numba/cuda/runtime/__init__.py,sha256=rDi_pA5HnwpuwT8wwy0hparfO7HWgfjLVj9htbk_tCg,54
69
+ numba_cuda/numba/cuda/runtime/memsys.cu,sha256=5nTXrstrUBVLeLvnDUReyhRGvVILK--VdM1u3oUCa2o,2386
70
+ numba_cuda/numba/cuda/runtime/memsys.cuh,sha256=4oDvs7LvcMmdkN58b8e0nBqPka_sdagoULSKRut74DY,503
71
+ numba_cuda/numba/cuda/runtime/nrt.cu,sha256=WB7jQxT1bLdkY6Tm7-_ytVLjJxK4iU0OFifbPIpLwvw,5403
72
+ numba_cuda/numba/cuda/runtime/nrt.py,sha256=pmacryGZn25IHjdRMwT2vZipdtu0xsjpPDic_hlRxkA,9195
68
73
  numba_cuda/numba/cuda/simulator/__init__.py,sha256=crW0VQ_8e7DMRSHKoAIziZ37ea5mpbh_49tR9M3d5YY,1610
69
74
  numba_cuda/numba/cuda/simulator/api.py,sha256=K_fX-w9X4grGx2IAp0XlBW9rth5l7wibMwinQvkE7Jc,3237
70
75
  numba_cuda/numba/cuda/simulator/compiler.py,sha256=eXnvmzSKzIZZzBz6ZFJ-vMNyRAgqbCiB-AO5IJXuUyM,232
@@ -82,7 +87,8 @@ numba_cuda/numba/cuda/simulator/cudadrv/error.py,sha256=ACSQ7ZvhuCHnvV4GmvRuKWZ5
82
87
  numba_cuda/numba/cuda/simulator/cudadrv/libs.py,sha256=ry5rerpZrnAy70LU_YBa1KNaqKBGLHE9cMxljdSzaik,101
83
88
  numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py,sha256=vIFQi4ewYXyzUYssfw78QLfoZmoVgloFCLTk55Gg1tw,474
84
89
  numba_cuda/numba/cuda/simulator/cudadrv/runtime.py,sha256=K63p7puZJZD3BQ6ZT0qoII_Z3xJiUckp2dhozFjrnEs,358
85
- numba_cuda/numba/cuda/tests/__init__.py,sha256=5Kh5-TtG6wclbWd_wDRIvtQG4O4McmqMpyNihl2nORM,2347
90
+ numba_cuda/numba/cuda/tests/__init__.py,sha256=4U2RJuURN6SazAUSEtVofVEtahN3dDfUYNyDCmu64zo,2421
91
+ numba_cuda/numba/cuda/tests/support.py,sha256=1og4VLrK2x2LF5m5ARrrHVe-JhYx9Gv9ODKt6-8r6Aw,253
86
92
  numba_cuda/numba/cuda/tests/cudadrv/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
87
93
  numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py,sha256=cjHQ0J6F8APrLm23ZCFr0S7dtQmLqwq9vxMoI5lyn68,5300
88
94
  numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py,sha256=lSEuEM7x-x95m_lS_wSIBKnBxOhzn-AJ3WjYw8bW0y4,4492
@@ -92,7 +98,7 @@ numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py,sha256=rikIJQ266l_
92
98
  numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py,sha256=y--0AZFVpp2nmbeI1jbgZsWbBP-iVEmG8WKgR9XrxKE,7663
93
99
  numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py,sha256=sqNbo8pk4Zl5ptuGXrXFndia4IyttbuGnqjVTOtGuuw,801
94
100
  numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py,sha256=MDJMIWm1jCsBOcuwdshzqwaE__uqX0562uSjxFhud3M,6627
95
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py,sha256=67dmVO6v5gzp89pgb4wpxqDrWAx1UjX4vhdDQH4mebQ,20403
101
+ numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py,sha256=eWczfXXIHS0p9eNhVagzXa4XWPwmrCb_yIBuDtjgq8c,21628
96
102
  numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py,sha256=BR1ccEj_TCVToHoHS8KwwCfKLMUl6KGb92Cx6nX-XPg,8404
97
103
  numba_cuda/numba/cuda/tests/cudadrv/test_detect.py,sha256=lCt2E8gxnd8O-fRobDEwgX4jBZ15W7cImQcZc8_u2Sg,2774
98
104
  numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py,sha256=ah82yaWFvBfUTTSfbkZBKLsUf2tTSSJNvlSxrk1RI1E,7094
@@ -231,14 +237,15 @@ numba_cuda/numba/cuda/tests/nocuda/test_import.py,sha256=teiL8rpFGQOh41kyBSSNHHF
231
237
  numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py,sha256=7kJOPHEcrjy_kTA9Ym-iT_B972bgFRu3UkRtwIgWtuI,7948
232
238
  numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py,sha256=n0_-xFaw6QqiZbhe55oy7lnEeOwqTvA55p5EUFiTpNw,2006
233
239
  numba_cuda/numba/cuda/tests/nrt/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
234
- numba_cuda/numba/cuda/tests/nrt/mock_numpy.py,sha256=Qtn52GoKZ_ydre3oqkLWVdImC37tuPClUy4uHSutaJo,1568
235
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=Ox6ei2DldvSSS-CndTXRxLnsvWdteOQNgn6GvKHB244,2789
240
+ numba_cuda/numba/cuda/tests/nrt/mock_numpy.py,sha256=Cx2DGhm2bJheShP2Ja1w9YLlRTeAMM7u1UYHsPnTzA8,4552
241
+ numba_cuda/numba/cuda/tests/nrt/test_nrt.py,sha256=b3rtK018qslhUU5UsAAa3s-mjlnlfxAwTJmARTVD2j4,7650
242
+ numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py,sha256=Wq46oICum9IXnbQ97vV8V7g-3U01PLQEQbaGSNdRuMg,3163
236
243
  numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=P2WzCc5d64JGq6pJwHEwmKVmJOJxPBtsMTbnuzqYkik,2679
237
244
  numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=V0raLZLGSiWbE_K-JluI0CnmNkXbhlMVj-TH7P1OV8E,5014
238
245
  numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
239
246
  numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
240
- numba_cuda-0.3.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
241
- numba_cuda-0.3.0.dist-info/METADATA,sha256=rbDC27qfmpgf9Qw5_p5YiSRyqc9hd_W2rAsA-geDRKk,1496
242
- numba_cuda-0.3.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
243
- numba_cuda-0.3.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
244
- numba_cuda-0.3.0.dist-info/RECORD,,
247
+ numba_cuda-0.4.0.dist-info/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
248
+ numba_cuda-0.4.0.dist-info/METADATA,sha256=BWlfqEMCG0dlSXORk9sKzY7nT_YdQzk9eQ7fBX4rvlY,1496
249
+ numba_cuda-0.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
250
+ numba_cuda-0.4.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
251
+ numba_cuda-0.4.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5