numba-cuda 0.17.0__py3-none-any.whl → 0.18.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +0 -8
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +14225 -0
- numba_cuda/numba/cuda/api_util.py +6 -0
- numba_cuda/numba/cuda/cgutils.py +1291 -0
- numba_cuda/numba/cuda/codegen.py +32 -14
- numba_cuda/numba/cuda/compiler.py +113 -10
- numba_cuda/numba/cuda/core/caching.py +741 -0
- numba_cuda/numba/cuda/core/callconv.py +338 -0
- numba_cuda/numba/cuda/core/codegen.py +168 -0
- numba_cuda/numba/cuda/core/compiler.py +205 -0
- numba_cuda/numba/cuda/core/typed_passes.py +139 -0
- numba_cuda/numba/cuda/cudadecl.py +0 -268
- numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +2 -1
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +4 -178
- numba_cuda/numba/cuda/debuginfo.py +469 -3
- numba_cuda/numba/cuda/device_init.py +0 -1
- numba_cuda/numba/cuda/dispatcher.py +310 -11
- numba_cuda/numba/cuda/extending.py +2 -1
- numba_cuda/numba/cuda/fp16.py +348 -0
- numba_cuda/numba/cuda/intrinsics.py +1 -1
- numba_cuda/numba/cuda/libdeviceimpl.py +2 -1
- numba_cuda/numba/cuda/lowering.py +1833 -8
- numba_cuda/numba/cuda/mathimpl.py +2 -90
- numba_cuda/numba/cuda/nvvmutils.py +2 -1
- numba_cuda/numba/cuda/printimpl.py +2 -1
- numba_cuda/numba/cuda/serialize.py +264 -0
- numba_cuda/numba/cuda/simulator/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +7 -0
- numba_cuda/numba/cuda/stubs.py +0 -308
- numba_cuda/numba/cuda/target.py +13 -5
- numba_cuda/numba/cuda/testing.py +156 -5
- numba_cuda/numba/cuda/tests/complex_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +110 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +359 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +10 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +33 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +5 -10
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +15 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +381 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +108 -24
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +37 -23
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +43 -27
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +26 -9
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +27 -2
- numba_cuda/numba/cuda/tests/enum_usecases.py +56 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +1 -2
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +1 -1
- numba_cuda/numba/cuda/utils.py +785 -0
- numba_cuda/numba/cuda/vector_types.py +1 -1
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.1.dist-info}/METADATA +18 -4
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.1.dist-info}/RECORD +63 -50
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -46
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.1.dist-info}/WHEEL +0 -0
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.1.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.1.dist-info}/top_level.txt +0 -0
|
@@ -2,13 +2,19 @@ import numpy as np
|
|
|
2
2
|
import os
|
|
3
3
|
import sys
|
|
4
4
|
import ctypes
|
|
5
|
+
import collections
|
|
5
6
|
import functools
|
|
7
|
+
import types as pytypes
|
|
8
|
+
import weakref
|
|
9
|
+
import uuid
|
|
6
10
|
|
|
7
|
-
from numba.core import
|
|
8
|
-
from numba.
|
|
11
|
+
from numba.core import compiler, sigutils, types, typing, config
|
|
12
|
+
from numba.cuda import serialize, utils
|
|
13
|
+
from numba.cuda.core.caching import Cache, CacheImpl, NullCache
|
|
9
14
|
from numba.core.compiler_lock import global_compiler_lock
|
|
10
|
-
from numba.core.dispatcher import
|
|
11
|
-
from numba.core.errors import NumbaPerformanceWarning
|
|
15
|
+
from numba.core.dispatcher import _DispatcherBase
|
|
16
|
+
from numba.core.errors import NumbaPerformanceWarning, TypingError
|
|
17
|
+
from numba.core.typing.templates import fold_arguments
|
|
12
18
|
from numba.core.typing.typeof import Purpose, typeof
|
|
13
19
|
from numba.cuda.api import get_current_device
|
|
14
20
|
from numba.cuda.args import wrap_arg
|
|
@@ -185,10 +191,6 @@ class _Kernel(serialize.ReduceMixin):
|
|
|
185
191
|
|
|
186
192
|
# Link to the helper library functions if needed
|
|
187
193
|
link_to_library_functions(reshape_funcs, "reshape_funcs.cu")
|
|
188
|
-
# Link to the CUDA FP16 math library functions if needed
|
|
189
|
-
link_to_library_functions(
|
|
190
|
-
cuda_fp16_math_funcs, "cpp_function_wrappers.cu", "__numba_wrapper_"
|
|
191
|
-
)
|
|
192
194
|
|
|
193
195
|
self.maybe_link_nrt(link, tgt_ctx, asm)
|
|
194
196
|
|
|
@@ -384,6 +386,12 @@ class _Kernel(serialize.ReduceMixin):
|
|
|
384
386
|
"""
|
|
385
387
|
return self._codelibrary.get_asm_str(cc=cc)
|
|
386
388
|
|
|
389
|
+
def inspect_lto_ptx(self, cc):
|
|
390
|
+
"""
|
|
391
|
+
Returns the PTX code for the external functions linked to this kernel.
|
|
392
|
+
"""
|
|
393
|
+
return self._codelibrary.get_lto_ptx(cc=cc)
|
|
394
|
+
|
|
387
395
|
def inspect_sass_cfg(self):
|
|
388
396
|
"""
|
|
389
397
|
Returns the CFG of the SASS for this kernel.
|
|
@@ -725,7 +733,135 @@ class CUDACache(Cache):
|
|
|
725
733
|
return super().load_overload(sig, target_context)
|
|
726
734
|
|
|
727
735
|
|
|
728
|
-
class
|
|
736
|
+
class _MemoMixin:
|
|
737
|
+
__uuid = None
|
|
738
|
+
# A {uuid -> instance} mapping, for deserialization
|
|
739
|
+
_memo = weakref.WeakValueDictionary()
|
|
740
|
+
# hold refs to last N functions deserialized, retaining them in _memo
|
|
741
|
+
# regardless of whether there is another reference
|
|
742
|
+
_recent = collections.deque(maxlen=config.FUNCTION_CACHE_SIZE)
|
|
743
|
+
|
|
744
|
+
@property
|
|
745
|
+
def _uuid(self):
|
|
746
|
+
"""
|
|
747
|
+
An instance-specific UUID, to avoid multiple deserializations of
|
|
748
|
+
a given instance.
|
|
749
|
+
|
|
750
|
+
Note: this is lazily-generated, for performance reasons.
|
|
751
|
+
"""
|
|
752
|
+
u = self.__uuid
|
|
753
|
+
if u is None:
|
|
754
|
+
u = str(uuid.uuid4())
|
|
755
|
+
self._set_uuid(u)
|
|
756
|
+
return u
|
|
757
|
+
|
|
758
|
+
def _set_uuid(self, u):
|
|
759
|
+
assert self.__uuid is None
|
|
760
|
+
self.__uuid = u
|
|
761
|
+
self._memo[u] = self
|
|
762
|
+
self._recent.append(self)
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
_CompileStats = collections.namedtuple(
|
|
766
|
+
"_CompileStats", ("cache_path", "cache_hits", "cache_misses")
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
class _FunctionCompiler(object):
|
|
771
|
+
def __init__(self, py_func, targetdescr, targetoptions, pipeline_class):
|
|
772
|
+
self.py_func = py_func
|
|
773
|
+
self.targetdescr = targetdescr
|
|
774
|
+
self.targetoptions = targetoptions
|
|
775
|
+
self.locals = {}
|
|
776
|
+
self.pysig = utils.pysignature(self.py_func)
|
|
777
|
+
self.pipeline_class = pipeline_class
|
|
778
|
+
# Remember key=(args, return_type) combinations that will fail
|
|
779
|
+
# compilation to avoid compilation attempt on them. The values are
|
|
780
|
+
# the exceptions.
|
|
781
|
+
self._failed_cache = {}
|
|
782
|
+
|
|
783
|
+
def fold_argument_types(self, args, kws):
|
|
784
|
+
"""
|
|
785
|
+
Given positional and named argument types, fold keyword arguments
|
|
786
|
+
and resolve defaults by inserting types.Omitted() instances.
|
|
787
|
+
|
|
788
|
+
A (pysig, argument types) tuple is returned.
|
|
789
|
+
"""
|
|
790
|
+
|
|
791
|
+
def normal_handler(index, param, value):
|
|
792
|
+
return value
|
|
793
|
+
|
|
794
|
+
def default_handler(index, param, default):
|
|
795
|
+
return types.Omitted(default)
|
|
796
|
+
|
|
797
|
+
def stararg_handler(index, param, values):
|
|
798
|
+
return types.StarArgTuple(values)
|
|
799
|
+
|
|
800
|
+
# For now, we take argument values from the @jit function
|
|
801
|
+
args = fold_arguments(
|
|
802
|
+
self.pysig,
|
|
803
|
+
args,
|
|
804
|
+
kws,
|
|
805
|
+
normal_handler,
|
|
806
|
+
default_handler,
|
|
807
|
+
stararg_handler,
|
|
808
|
+
)
|
|
809
|
+
return self.pysig, args
|
|
810
|
+
|
|
811
|
+
def compile(self, args, return_type):
|
|
812
|
+
status, retval = self._compile_cached(args, return_type)
|
|
813
|
+
if status:
|
|
814
|
+
return retval
|
|
815
|
+
else:
|
|
816
|
+
raise retval
|
|
817
|
+
|
|
818
|
+
def _compile_cached(self, args, return_type):
|
|
819
|
+
key = tuple(args), return_type
|
|
820
|
+
try:
|
|
821
|
+
return False, self._failed_cache[key]
|
|
822
|
+
except KeyError:
|
|
823
|
+
pass
|
|
824
|
+
|
|
825
|
+
try:
|
|
826
|
+
retval = self._compile_core(args, return_type)
|
|
827
|
+
except TypingError as e:
|
|
828
|
+
self._failed_cache[key] = e
|
|
829
|
+
return False, e
|
|
830
|
+
else:
|
|
831
|
+
return True, retval
|
|
832
|
+
|
|
833
|
+
def _compile_core(self, args, return_type):
|
|
834
|
+
flags = compiler.Flags()
|
|
835
|
+
self.targetdescr.options.parse_as_flags(flags, self.targetoptions)
|
|
836
|
+
flags = self._customize_flags(flags)
|
|
837
|
+
|
|
838
|
+
impl = self._get_implementation(args, {})
|
|
839
|
+
cres = compiler.compile_extra(
|
|
840
|
+
self.targetdescr.typing_context,
|
|
841
|
+
self.targetdescr.target_context,
|
|
842
|
+
impl,
|
|
843
|
+
args=args,
|
|
844
|
+
return_type=return_type,
|
|
845
|
+
flags=flags,
|
|
846
|
+
locals=self.locals,
|
|
847
|
+
pipeline_class=self.pipeline_class,
|
|
848
|
+
)
|
|
849
|
+
# Check typing error if object mode is used
|
|
850
|
+
if cres.typing_error is not None and not flags.enable_pyobject:
|
|
851
|
+
raise cres.typing_error
|
|
852
|
+
return cres
|
|
853
|
+
|
|
854
|
+
def get_globals_for_reduction(self):
|
|
855
|
+
return serialize._get_function_globals_for_reduction(self.py_func)
|
|
856
|
+
|
|
857
|
+
def _get_implementation(self, args, kws):
|
|
858
|
+
return self.py_func
|
|
859
|
+
|
|
860
|
+
def _customize_flags(self, flags):
|
|
861
|
+
return flags
|
|
862
|
+
|
|
863
|
+
|
|
864
|
+
class CUDADispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase):
|
|
729
865
|
"""
|
|
730
866
|
CUDA Dispatcher object. When configured and called, the dispatcher will
|
|
731
867
|
specialize itself for the given arguments (if no suitable specialized
|
|
@@ -744,10 +880,42 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
|
|
|
744
880
|
targetdescr = cuda_target
|
|
745
881
|
|
|
746
882
|
def __init__(self, py_func, targetoptions, pipeline_class=CUDACompiler):
|
|
747
|
-
|
|
748
|
-
|
|
883
|
+
"""
|
|
884
|
+
Parameters
|
|
885
|
+
----------
|
|
886
|
+
py_func: function object to be compiled
|
|
887
|
+
targetoptions: dict, optional
|
|
888
|
+
Target-specific config options.
|
|
889
|
+
pipeline_class: type numba.compiler.CompilerBase
|
|
890
|
+
The compiler pipeline type.
|
|
891
|
+
"""
|
|
892
|
+
self.typingctx = self.targetdescr.typing_context
|
|
893
|
+
self.targetctx = self.targetdescr.target_context
|
|
894
|
+
|
|
895
|
+
pysig = utils.pysignature(py_func)
|
|
896
|
+
arg_count = len(pysig.parameters)
|
|
897
|
+
can_fallback = not targetoptions.get("nopython", False)
|
|
898
|
+
|
|
899
|
+
_DispatcherBase.__init__(
|
|
900
|
+
self,
|
|
901
|
+
arg_count,
|
|
902
|
+
py_func,
|
|
903
|
+
pysig,
|
|
904
|
+
can_fallback,
|
|
905
|
+
exact_match_required=False,
|
|
749
906
|
)
|
|
750
907
|
|
|
908
|
+
functools.update_wrapper(self, py_func)
|
|
909
|
+
|
|
910
|
+
self.targetoptions = targetoptions
|
|
911
|
+
self._cache = NullCache()
|
|
912
|
+
compiler_class = _FunctionCompiler
|
|
913
|
+
self._compiler = compiler_class(
|
|
914
|
+
py_func, self.targetdescr, targetoptions, pipeline_class
|
|
915
|
+
)
|
|
916
|
+
self._cache_hits = collections.Counter()
|
|
917
|
+
self._cache_misses = collections.Counter()
|
|
918
|
+
|
|
751
919
|
# The following properties are for specialization of CUDADispatchers. A
|
|
752
920
|
# specialized CUDADispatcher is one that is compiled for exactly one
|
|
753
921
|
# set of argument types, and bypasses some argument type checking for
|
|
@@ -760,6 +928,15 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
|
|
|
760
928
|
# argument types
|
|
761
929
|
self.specializations = {}
|
|
762
930
|
|
|
931
|
+
def dump(self, tab=""):
|
|
932
|
+
print(
|
|
933
|
+
f"{tab}DUMP {type(self).__name__}[{self.py_func.__name__}"
|
|
934
|
+
f", type code={self._type._code}]"
|
|
935
|
+
)
|
|
936
|
+
for cres in self.overloads.values():
|
|
937
|
+
cres.dump(tab=tab + " ")
|
|
938
|
+
print(f"{tab}END DUMP {type(self).__name__}[{self.py_func.__name__}]")
|
|
939
|
+
|
|
763
940
|
@property
|
|
764
941
|
def _numba_type_(self):
|
|
765
942
|
return cuda_types.CUDADispatcher(self)
|
|
@@ -767,6 +944,13 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
|
|
|
767
944
|
def enable_caching(self):
|
|
768
945
|
self._cache = CUDACache(self.py_func)
|
|
769
946
|
|
|
947
|
+
def __get__(self, obj, objtype=None):
|
|
948
|
+
"""Allow a JIT function to be bound as a method to an object"""
|
|
949
|
+
if obj is None: # Unbound method
|
|
950
|
+
return self
|
|
951
|
+
else: # Bound method
|
|
952
|
+
return pytypes.MethodType(self, obj)
|
|
953
|
+
|
|
770
954
|
@functools.lru_cache(maxsize=128)
|
|
771
955
|
def configure(self, griddim, blockdim, stream=0, sharedmem=0):
|
|
772
956
|
griddim, blockdim = normalize_kernel_dimensions(griddim, blockdim)
|
|
@@ -1114,6 +1298,93 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
|
|
|
1114
1298
|
|
|
1115
1299
|
return kernel
|
|
1116
1300
|
|
|
1301
|
+
def get_compile_result(self, sig):
|
|
1302
|
+
"""Compile (if needed) and return the compilation result with the
|
|
1303
|
+
given signature.
|
|
1304
|
+
|
|
1305
|
+
Returns ``CompileResult``.
|
|
1306
|
+
Raises ``NumbaError`` if the signature is incompatible.
|
|
1307
|
+
"""
|
|
1308
|
+
atypes = tuple(sig.args)
|
|
1309
|
+
if atypes not in self.overloads:
|
|
1310
|
+
if self._can_compile:
|
|
1311
|
+
# Compiling may raise any NumbaError
|
|
1312
|
+
self.compile(atypes)
|
|
1313
|
+
else:
|
|
1314
|
+
msg = f"{sig} not available and compilation disabled"
|
|
1315
|
+
raise TypingError(msg)
|
|
1316
|
+
return self.overloads[atypes]
|
|
1317
|
+
|
|
1318
|
+
def recompile(self):
|
|
1319
|
+
"""
|
|
1320
|
+
Recompile all signatures afresh.
|
|
1321
|
+
"""
|
|
1322
|
+
sigs = list(self.overloads)
|
|
1323
|
+
old_can_compile = self._can_compile
|
|
1324
|
+
# Ensure the old overloads are disposed of,
|
|
1325
|
+
# including compiled functions.
|
|
1326
|
+
self._make_finalizer()()
|
|
1327
|
+
self._reset_overloads()
|
|
1328
|
+
self._cache.flush()
|
|
1329
|
+
self._can_compile = True
|
|
1330
|
+
try:
|
|
1331
|
+
for sig in sigs:
|
|
1332
|
+
self.compile(sig)
|
|
1333
|
+
finally:
|
|
1334
|
+
self._can_compile = old_can_compile
|
|
1335
|
+
|
|
1336
|
+
@property
|
|
1337
|
+
def stats(self):
|
|
1338
|
+
return _CompileStats(
|
|
1339
|
+
cache_path=self._cache.cache_path,
|
|
1340
|
+
cache_hits=self._cache_hits,
|
|
1341
|
+
cache_misses=self._cache_misses,
|
|
1342
|
+
)
|
|
1343
|
+
|
|
1344
|
+
def parallel_diagnostics(self, signature=None, level=1):
|
|
1345
|
+
"""
|
|
1346
|
+
Print parallel diagnostic information for the given signature. If no
|
|
1347
|
+
signature is present it is printed for all known signatures. level is
|
|
1348
|
+
used to adjust the verbosity, level=1 (default) is minimal verbosity,
|
|
1349
|
+
and 2, 3, and 4 provide increasing levels of verbosity.
|
|
1350
|
+
"""
|
|
1351
|
+
|
|
1352
|
+
def dump(sig):
|
|
1353
|
+
ol = self.overloads[sig]
|
|
1354
|
+
pfdiag = ol.metadata.get("parfor_diagnostics", None)
|
|
1355
|
+
if pfdiag is None:
|
|
1356
|
+
msg = "No parfors diagnostic available, is 'parallel=True' set?"
|
|
1357
|
+
raise ValueError(msg)
|
|
1358
|
+
pfdiag.dump(level)
|
|
1359
|
+
|
|
1360
|
+
if signature is not None:
|
|
1361
|
+
dump(signature)
|
|
1362
|
+
else:
|
|
1363
|
+
[dump(sig) for sig in self.signatures]
|
|
1364
|
+
|
|
1365
|
+
def get_metadata(self, signature=None):
|
|
1366
|
+
"""
|
|
1367
|
+
Obtain the compilation metadata for a given signature.
|
|
1368
|
+
"""
|
|
1369
|
+
if signature is not None:
|
|
1370
|
+
return self.overloads[signature].metadata
|
|
1371
|
+
else:
|
|
1372
|
+
return dict(
|
|
1373
|
+
(sig, self.overloads[sig].metadata) for sig in self.signatures
|
|
1374
|
+
)
|
|
1375
|
+
|
|
1376
|
+
def get_function_type(self):
|
|
1377
|
+
"""Return unique function type of dispatcher when possible, otherwise
|
|
1378
|
+
return None.
|
|
1379
|
+
|
|
1380
|
+
A Dispatcher instance has unique function type when it
|
|
1381
|
+
contains exactly one compilation result and its compilation
|
|
1382
|
+
has been disabled (via its disable_compile method).
|
|
1383
|
+
"""
|
|
1384
|
+
if not self._can_compile and len(self.overloads) == 1:
|
|
1385
|
+
cres = tuple(self.overloads.values())[0]
|
|
1386
|
+
return types.FunctionType(cres.signature)
|
|
1387
|
+
|
|
1117
1388
|
def inspect_llvm(self, signature=None):
|
|
1118
1389
|
"""
|
|
1119
1390
|
Return the LLVM IR for this kernel.
|
|
@@ -1169,6 +1440,34 @@ class CUDADispatcher(Dispatcher, serialize.ReduceMixin):
|
|
|
1169
1440
|
for sig, overload in self.overloads.items()
|
|
1170
1441
|
}
|
|
1171
1442
|
|
|
1443
|
+
def inspect_lto_ptx(self, signature=None):
|
|
1444
|
+
"""
|
|
1445
|
+
Return link-time optimized PTX code for the given signature.
|
|
1446
|
+
|
|
1447
|
+
:param signature: A tuple of argument types.
|
|
1448
|
+
:return: The PTX code for the given signature, or a dict of PTX codes
|
|
1449
|
+
for all previously-encountered signatures.
|
|
1450
|
+
"""
|
|
1451
|
+
cc = get_current_device().compute_capability
|
|
1452
|
+
device = self.targetoptions.get("device")
|
|
1453
|
+
|
|
1454
|
+
if signature is not None:
|
|
1455
|
+
if device:
|
|
1456
|
+
return self.overloads[signature].library.get_lto_ptx(cc)
|
|
1457
|
+
else:
|
|
1458
|
+
return self.overloads[signature].inspect_lto_ptx(cc)
|
|
1459
|
+
else:
|
|
1460
|
+
if device:
|
|
1461
|
+
return {
|
|
1462
|
+
sig: overload.library.get_lto_ptx(cc)
|
|
1463
|
+
for sig, overload in self.overloads.items()
|
|
1464
|
+
}
|
|
1465
|
+
else:
|
|
1466
|
+
return {
|
|
1467
|
+
sig: overload.inspect_lto_ptx(cc)
|
|
1468
|
+
for sig, overload in self.overloads.items()
|
|
1469
|
+
}
|
|
1470
|
+
|
|
1172
1471
|
def inspect_sass_cfg(self, signature=None):
|
|
1173
1472
|
"""
|
|
1174
1473
|
Return this kernel's CFG for the device in the current context.
|
|
@@ -23,7 +23,8 @@ def make_attribute_wrapper(typeclass, struct_attr, python_attr):
|
|
|
23
23
|
from numba.core.datamodel import default_manager
|
|
24
24
|
from numba.core.datamodel.models import StructModel
|
|
25
25
|
from numba.core.imputils import impl_ret_borrowed
|
|
26
|
-
from numba.core import
|
|
26
|
+
from numba.core import types
|
|
27
|
+
from numba.cuda import cgutils
|
|
27
28
|
|
|
28
29
|
from numba.cuda.models import cuda_data_manager
|
|
29
30
|
from numba.cuda.cudadecl import registry as cuda_registry
|