numba-cuda 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +0 -8
  3. numba_cuda/numba/cuda/_internal/cuda_fp16.py +14225 -0
  4. numba_cuda/numba/cuda/api_util.py +6 -0
  5. numba_cuda/numba/cuda/cgutils.py +1291 -0
  6. numba_cuda/numba/cuda/codegen.py +32 -14
  7. numba_cuda/numba/cuda/compiler.py +113 -10
  8. numba_cuda/numba/cuda/core/caching.py +741 -0
  9. numba_cuda/numba/cuda/core/callconv.py +338 -0
  10. numba_cuda/numba/cuda/core/codegen.py +168 -0
  11. numba_cuda/numba/cuda/core/compiler.py +205 -0
  12. numba_cuda/numba/cuda/core/typed_passes.py +139 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +1 -1
  14. numba_cuda/numba/cuda/cudadecl.py +0 -268
  15. numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
  16. numba_cuda/numba/cuda/cudadrv/devices.py +4 -6
  17. numba_cuda/numba/cuda/cudadrv/driver.py +105 -50
  18. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -1
  19. numba_cuda/numba/cuda/cudaimpl.py +4 -178
  20. numba_cuda/numba/cuda/debuginfo.py +469 -3
  21. numba_cuda/numba/cuda/device_init.py +0 -1
  22. numba_cuda/numba/cuda/dispatcher.py +311 -14
  23. numba_cuda/numba/cuda/extending.py +2 -1
  24. numba_cuda/numba/cuda/fp16.py +348 -0
  25. numba_cuda/numba/cuda/intrinsics.py +1 -1
  26. numba_cuda/numba/cuda/libdeviceimpl.py +2 -1
  27. numba_cuda/numba/cuda/lowering.py +1833 -8
  28. numba_cuda/numba/cuda/mathimpl.py +2 -90
  29. numba_cuda/numba/cuda/memory_management/nrt.py +1 -1
  30. numba_cuda/numba/cuda/nvvmutils.py +2 -1
  31. numba_cuda/numba/cuda/printimpl.py +2 -1
  32. numba_cuda/numba/cuda/serialize.py +264 -0
  33. numba_cuda/numba/cuda/simulator/__init__.py +2 -0
  34. numba_cuda/numba/cuda/simulator/dispatcher.py +7 -0
  35. numba_cuda/numba/cuda/stubs.py +0 -308
  36. numba_cuda/numba/cuda/target.py +13 -5
  37. numba_cuda/numba/cuda/testing.py +156 -5
  38. numba_cuda/numba/cuda/tests/complex_usecases.py +113 -0
  39. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +110 -0
  40. numba_cuda/numba/cuda/tests/core/test_serialize.py +359 -0
  41. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +16 -5
  42. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +5 -1
  43. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +33 -0
  44. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  45. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +2 -2
  46. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +1 -0
  47. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  48. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +5 -10
  49. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  50. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +1 -5
  51. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +381 -0
  52. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -1
  53. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
  54. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +94 -24
  55. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +37 -23
  56. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +43 -27
  57. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +2 -5
  58. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +26 -9
  59. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +27 -2
  60. numba_cuda/numba/cuda/tests/enum_usecases.py +56 -0
  61. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +1 -2
  62. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +1 -1
  63. numba_cuda/numba/cuda/utils.py +785 -0
  64. numba_cuda/numba/cuda/vector_types.py +1 -1
  65. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/METADATA +18 -4
  66. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/RECORD +69 -56
  67. numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -46
  68. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/WHEEL +0 -0
  69. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/licenses/LICENSE +0 -0
  70. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,11 @@
1
1
  import math
2
2
  import operator
3
3
  from llvmlite import ir
4
- from numba.core import types, typing, cgutils, targetconfig
4
+ from numba.core import types, typing, targetconfig
5
+ from numba.cuda import cgutils
5
6
  from numba.core.imputils import Registry
6
7
  from numba.types import float32, float64, int64, uint64
7
8
  from numba.cuda import libdevice
8
- from numba import cuda
9
9
 
10
10
  registry = Registry()
11
11
  lower = registry.lower
@@ -89,94 +89,6 @@ def math_isfinite_int(context, builder, sig, args):
89
89
  return context.get_constant(types.boolean, 1)
90
90
 
91
91
 
92
- @lower(math.sin, types.float16)
93
- def fp16_sin_impl(context, builder, sig, args):
94
- def fp16_sin(x):
95
- return cuda.fp16.hsin(x)
96
-
97
- return context.compile_internal(builder, fp16_sin, sig, args)
98
-
99
-
100
- @lower(math.cos, types.float16)
101
- def fp16_cos_impl(context, builder, sig, args):
102
- def fp16_cos(x):
103
- return cuda.fp16.hcos(x)
104
-
105
- return context.compile_internal(builder, fp16_cos, sig, args)
106
-
107
-
108
- @lower(math.log, types.float16)
109
- def fp16_log_impl(context, builder, sig, args):
110
- def fp16_log(x):
111
- return cuda.fp16.hlog(x)
112
-
113
- return context.compile_internal(builder, fp16_log, sig, args)
114
-
115
-
116
- @lower(math.log10, types.float16)
117
- def fp16_log10_impl(context, builder, sig, args):
118
- def fp16_log10(x):
119
- return cuda.fp16.hlog10(x)
120
-
121
- return context.compile_internal(builder, fp16_log10, sig, args)
122
-
123
-
124
- @lower(math.log2, types.float16)
125
- def fp16_log2_impl(context, builder, sig, args):
126
- def fp16_log2(x):
127
- return cuda.fp16.hlog2(x)
128
-
129
- return context.compile_internal(builder, fp16_log2, sig, args)
130
-
131
-
132
- @lower(math.exp, types.float16)
133
- def fp16_exp_impl(context, builder, sig, args):
134
- def fp16_exp(x):
135
- return cuda.fp16.hexp(x)
136
-
137
- return context.compile_internal(builder, fp16_exp, sig, args)
138
-
139
-
140
- @lower(math.floor, types.float16)
141
- def fp16_floor_impl(context, builder, sig, args):
142
- def fp16_floor(x):
143
- return cuda.fp16.hfloor(x)
144
-
145
- return context.compile_internal(builder, fp16_floor, sig, args)
146
-
147
-
148
- @lower(math.ceil, types.float16)
149
- def fp16_ceil_impl(context, builder, sig, args):
150
- def fp16_ceil(x):
151
- return cuda.fp16.hceil(x)
152
-
153
- return context.compile_internal(builder, fp16_ceil, sig, args)
154
-
155
-
156
- @lower(math.sqrt, types.float16)
157
- def fp16_sqrt_impl(context, builder, sig, args):
158
- def fp16_sqrt(x):
159
- return cuda.fp16.hsqrt(x)
160
-
161
- return context.compile_internal(builder, fp16_sqrt, sig, args)
162
-
163
-
164
- @lower(math.fabs, types.float16)
165
- def fp16_fabs_impl(context, builder, sig, args):
166
- def fp16_fabs(x):
167
- return cuda.fp16.habs(x)
168
-
169
- return context.compile_internal(builder, fp16_fabs, sig, args)
170
-
171
-
172
- @lower(math.trunc, types.float16)
173
- def fp16_trunc_impl(context, builder, sig, args):
174
- def fp16_trunc(x):
175
- return cuda.fp16.htrunc(x)
176
-
177
- return context.compile_internal(builder, fp16_trunc, sig, args)
178
-
179
-
180
92
  def impl_boolean(key, ty, libfunc):
181
93
  def lower_boolean_impl(context, builder, sig, args):
182
94
  libfunc_impl = context.get_function(
@@ -143,7 +143,7 @@ class _Runtime:
143
143
  1,
144
144
  1,
145
145
  0,
146
- stream.handle,
146
+ stream.handle.value,
147
147
  params,
148
148
  cooperative=False,
149
149
  )
@@ -1,6 +1,7 @@
1
1
  import itertools
2
2
  from llvmlite import ir
3
- from numba.core import cgutils, targetconfig
3
+ from numba.core import targetconfig
4
+ from numba.cuda import cgutils
4
5
  from .cudadrv import nvvm
5
6
 
6
7
 
@@ -1,6 +1,7 @@
1
1
  from functools import singledispatch
2
2
  from llvmlite import ir
3
- from numba.core import types, cgutils
3
+ from numba.core import types
4
+ from numba.cuda import cgutils
4
5
  from numba.core.errors import NumbaWarning
5
6
  from numba.core.imputils import Registry
6
7
  from numba.cuda import nvvmutils
@@ -0,0 +1,264 @@
1
+ """
2
+ Serialization support for compiled functions.
3
+ """
4
+
5
+ import sys
6
+ import abc
7
+ import io
8
+ import copyreg
9
+
10
+
11
+ import pickle
12
+ from numba import cloudpickle
13
+ from llvmlite import ir
14
+
15
+
16
+ #
17
+ # Pickle support
18
+ #
19
+
20
+
21
+ def _rebuild_reduction(cls, *args):
22
+ """
23
+ Global hook to rebuild a given class from its __reduce__ arguments.
24
+ """
25
+ return cls._rebuild(*args)
26
+
27
+
28
+ # Keep unpickled object via `numba_unpickle` alive.
29
+ _unpickled_memo = {}
30
+
31
+
32
+ def _numba_unpickle(address, bytedata, hashed):
33
+ """Used by `numba_unpickle` from _helperlib.c
34
+
35
+ Parameters
36
+ ----------
37
+ address : int
38
+ bytedata : bytes
39
+ hashed : bytes
40
+
41
+ Returns
42
+ -------
43
+ obj : object
44
+ unpickled object
45
+ """
46
+ key = (address, hashed)
47
+ try:
48
+ obj = _unpickled_memo[key]
49
+ except KeyError:
50
+ _unpickled_memo[key] = obj = cloudpickle.loads(bytedata)
51
+ return obj
52
+
53
+
54
+ def dumps(obj):
55
+ """Similar to `pickle.dumps()`. Returns the serialized object in bytes."""
56
+ pickler = NumbaPickler
57
+ with io.BytesIO() as buf:
58
+ p = pickler(buf, protocol=4)
59
+ p.dump(obj)
60
+ pickled = buf.getvalue()
61
+
62
+ return pickled
63
+
64
+
65
+ def runtime_build_excinfo_struct(static_exc, exc_args):
66
+ exc, static_args, locinfo = cloudpickle.loads(static_exc)
67
+ real_args = []
68
+ exc_args_iter = iter(exc_args)
69
+ for arg in static_args:
70
+ if isinstance(arg, ir.Value):
71
+ real_args.append(next(exc_args_iter))
72
+ else:
73
+ real_args.append(arg)
74
+ return (exc, tuple(real_args), locinfo)
75
+
76
+
77
+ # Alias to pickle.loads to allow `serialize.loads()`
78
+ loads = cloudpickle.loads
79
+
80
+
81
+ class _CustomPickled:
82
+ """A wrapper for objects that must be pickled with `NumbaPickler`.
83
+
84
+ Standard `pickle` will pick up the implementation registered via `copyreg`.
85
+ This will spawn a `NumbaPickler` instance to serialize the data.
86
+
87
+ `NumbaPickler` overrides the handling of this type so as not to spawn a
88
+ new pickler for the object when it is already being pickled by a
89
+ `NumbaPickler`.
90
+ """
91
+
92
+ __slots__ = "ctor", "states"
93
+
94
+ def __init__(self, ctor, states):
95
+ self.ctor = ctor
96
+ self.states = states
97
+
98
+ def _reduce(self):
99
+ return _CustomPickled._rebuild, (self.ctor, self.states)
100
+
101
+ @classmethod
102
+ def _rebuild(cls, ctor, states):
103
+ return cls(ctor, states)
104
+
105
+
106
+ def _unpickle__CustomPickled(serialized):
107
+ """standard unpickling for `_CustomPickled`.
108
+
109
+ Uses `NumbaPickler` to load.
110
+ """
111
+ ctor, states = loads(serialized)
112
+ return _CustomPickled(ctor, states)
113
+
114
+
115
+ def _pickle__CustomPickled(cp):
116
+ """standard pickling for `_CustomPickled`.
117
+
118
+ Uses `NumbaPickler` to dump.
119
+ """
120
+ serialized = dumps((cp.ctor, cp.states))
121
+ return _unpickle__CustomPickled, (serialized,)
122
+
123
+
124
+ # Register custom pickling for the standard pickler.
125
+ copyreg.pickle(_CustomPickled, _pickle__CustomPickled)
126
+
127
+
128
+ def custom_reduce(cls, states):
129
+ """For customizing object serialization in `__reduce__`.
130
+
131
+ Object states provided here are used as keyword arguments to the
132
+ `._rebuild()` class method.
133
+
134
+ Parameters
135
+ ----------
136
+ states : dict
137
+ Dictionary of object states to be serialized.
138
+
139
+ Returns
140
+ -------
141
+ result : tuple
142
+ This tuple conforms to the return type requirement for `__reduce__`.
143
+ """
144
+ return custom_rebuild, (_CustomPickled(cls, states),)
145
+
146
+
147
+ def custom_rebuild(custom_pickled):
148
+ """Customized object deserialization.
149
+
150
+ This function is referenced internally by `custom_reduce()`.
151
+ """
152
+ cls, states = custom_pickled.ctor, custom_pickled.states
153
+ return cls._rebuild(**states)
154
+
155
+
156
+ def is_serialiable(obj):
157
+ """Check if *obj* can be serialized.
158
+
159
+ Parameters
160
+ ----------
161
+ obj : object
162
+
163
+ Returns
164
+ --------
165
+ can_serialize : bool
166
+ """
167
+ with io.BytesIO() as fout:
168
+ pickler = NumbaPickler(fout)
169
+ try:
170
+ pickler.dump(obj)
171
+ except pickle.PicklingError:
172
+ return False
173
+ else:
174
+ return True
175
+
176
+
177
+ def _no_pickle(obj):
178
+ raise pickle.PicklingError(f"Pickling of {type(obj)} is unsupported")
179
+
180
+
181
+ def disable_pickling(typ):
182
+ """This is called on a type to disable pickling"""
183
+ NumbaPickler.disabled_types.add(typ)
184
+ # Return `typ` to allow use as a decorator
185
+ return typ
186
+
187
+
188
+ class NumbaPickler(cloudpickle.CloudPickler):
189
+ disabled_types = set()
190
+ """A set of types that pickling cannot is disabled.
191
+ """
192
+
193
+ def reducer_override(self, obj):
194
+ # Overridden to disable pickling of certain types
195
+ if type(obj) in self.disabled_types:
196
+ _no_pickle(obj) # noreturn
197
+ return super().reducer_override(obj)
198
+
199
+
200
+ def _custom_reduce__custompickled(cp):
201
+ return cp._reduce()
202
+
203
+
204
+ NumbaPickler.dispatch_table[_CustomPickled] = _custom_reduce__custompickled
205
+
206
+
207
+ class ReduceMixin(abc.ABC):
208
+ """A mixin class for objects that should be reduced by the NumbaPickler
209
+ instead of the standard pickler.
210
+ """
211
+
212
+ # Subclass MUST override the below methods
213
+
214
+ @abc.abstractmethod
215
+ def _reduce_states(self):
216
+ raise NotImplementedError
217
+
218
+ @abc.abstractclassmethod
219
+ def _rebuild(cls, **kwargs):
220
+ raise NotImplementedError
221
+
222
+ # Subclass can override the below methods
223
+
224
+ def _reduce_class(self):
225
+ return self.__class__
226
+
227
+ # Private methods
228
+
229
+ def __reduce__(self):
230
+ return custom_reduce(self._reduce_class(), self._reduce_states())
231
+
232
+
233
+ class PickleCallableByPath:
234
+ """Wrap a callable object to be pickled by path to workaround limitation
235
+ in pickling due to non-pickleable objects in function non-locals.
236
+
237
+ Note:
238
+ - Do not use this as a decorator.
239
+ - Wrapped object must be a global that exist in its parent module and it
240
+ can be imported by `from the_module import the_object`.
241
+
242
+ Usage:
243
+
244
+ >>> def my_fn(x):
245
+ >>> ...
246
+ >>> wrapped_fn = PickleCallableByPath(my_fn)
247
+ >>> # refer to `wrapped_fn` instead of `my_fn`
248
+ """
249
+
250
+ def __init__(self, fn):
251
+ self._fn = fn
252
+
253
+ def __call__(self, *args, **kwargs):
254
+ return self._fn(*args, **kwargs)
255
+
256
+ def __reduce__(self):
257
+ return type(self)._rebuild, (
258
+ self._fn.__module__,
259
+ self._fn.__name__,
260
+ )
261
+
262
+ @classmethod
263
+ def _rebuild(cls, modname, fn_path):
264
+ return cls(getattr(sys.modules[modname], fn_path))
@@ -33,6 +33,7 @@ del vector_types, name, svty, alias
33
33
  if config.ENABLE_CUDASIM:
34
34
  import sys
35
35
  from numba.cuda.simulator import cudadrv
36
+ from . import dispatcher
36
37
 
37
38
  sys.modules["numba.cuda.cudadrv"] = cudadrv
38
39
  sys.modules["numba.cuda.cudadrv.devicearray"] = cudadrv.devicearray
@@ -43,6 +44,7 @@ if config.ENABLE_CUDASIM:
43
44
  sys.modules["numba.cuda.cudadrv.drvapi"] = cudadrv.drvapi
44
45
  sys.modules["numba.cuda.cudadrv.error"] = cudadrv.error
45
46
  sys.modules["numba.cuda.cudadrv.nvvm"] = cudadrv.nvvm
47
+ sys.modules["numba.cuda.dispatcher"] = dispatcher
46
48
 
47
49
  from . import bf16, compiler, _internal
48
50
 
@@ -0,0 +1,7 @@
1
+ class CUDADispatcher:
2
+ """
3
+ Dummy class so that consumers that try to import the real CUDADispatcher
4
+ do not get an import failure when running with the simulator.
5
+ """
6
+
7
+ ...