numba-cuda 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +1 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
- numba_cuda/numba/cuda/api.py +6 -1
- numba_cuda/numba/cuda/bf16.py +285 -2
- numba_cuda/numba/cuda/cgutils.py +2 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +1 -1
- numba_cuda/numba/cuda/compiler.py +373 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +2 -2
- numba_cuda/numba/cuda/core/compiler.py +6 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +48 -26
- numba_cuda/numba/cuda/core/ir_utils.py +15 -26
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +738 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +422 -246
- numba_cuda/numba/cuda/cudadecl.py +1 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
- numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
- numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
- numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +5 -1
- numba_cuda/numba/cuda/debuginfo.py +85 -2
- numba_cuda/numba/cuda/decorators.py +3 -3
- numba_cuda/numba/cuda/descriptor.py +3 -4
- numba_cuda/numba/cuda/deviceufunc.py +66 -2
- numba_cuda/numba/cuda/dispatcher.py +18 -39
- numba_cuda/numba/cuda/flags.py +141 -1
- numba_cuda/numba/cuda/fp16.py +0 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/lowering.py +7 -144
- numba_cuda/numba/cuda/mathimpl.py +2 -1
- numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +9 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +1 -1
- numba_cuda/numba/cuda/printimpl.py +12 -1
- numba_cuda/numba/cuda/random.py +1 -1
- numba_cuda/numba/cuda/serialize.py +1 -1
- numba_cuda/numba/cuda/simulator/__init__.py +1 -1
- numba_cuda/numba/cuda/simulator/api.py +1 -1
- numba_cuda/numba/cuda/simulator/compiler.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
- numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
- numba_cuda/numba/cuda/target.py +35 -17
- numba_cuda/numba/cuda/testing.py +4 -19
- numba_cuda/numba/cuda/tests/__init__.py +1 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
- numba_cuda/numba/cuda/tests/support.py +55 -15
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +56 -0
- numba_cuda/numba/cuda/typing/__init__.py +9 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +7 -6
- numba_cuda/numba/cuda/ufuncs.py +3 -3
- numba_cuda/numba/cuda/utils.py +6 -112
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +2 -1
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/RECORD +170 -115
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -10,14 +10,78 @@ from collections import OrderedDict
|
|
|
10
10
|
import operator
|
|
11
11
|
import warnings
|
|
12
12
|
from functools import reduce
|
|
13
|
+
import tokenize
|
|
14
|
+
import string
|
|
13
15
|
|
|
14
16
|
import numpy as np
|
|
15
17
|
|
|
16
|
-
from numba.np.ufunc.ufuncbuilder import _BaseUFuncBuilder, parse_identity
|
|
18
|
+
from numba.cuda.np.ufunc.ufuncbuilder import _BaseUFuncBuilder, parse_identity
|
|
17
19
|
from numba.core import types
|
|
18
20
|
from numba.core.typing import signature
|
|
19
21
|
from numba.cuda.core import sigutils
|
|
20
|
-
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def parse_signature(sig):
|
|
25
|
+
"""Parse generalized ufunc signature.
|
|
26
|
+
|
|
27
|
+
NOTE: ',' (COMMA) is a delimiter; not separator.
|
|
28
|
+
This means trailing comma is legal.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def stripws(s):
|
|
32
|
+
return "".join(c for c in s if c not in string.whitespace)
|
|
33
|
+
|
|
34
|
+
def tokenizer(src):
|
|
35
|
+
def readline():
|
|
36
|
+
yield src
|
|
37
|
+
|
|
38
|
+
gen = readline()
|
|
39
|
+
return tokenize.generate_tokens(lambda: next(gen))
|
|
40
|
+
|
|
41
|
+
def parse(src):
|
|
42
|
+
tokgen = tokenizer(src)
|
|
43
|
+
while True:
|
|
44
|
+
tok = next(tokgen)
|
|
45
|
+
if tok[1] == "(":
|
|
46
|
+
symbols = []
|
|
47
|
+
while True:
|
|
48
|
+
tok = next(tokgen)
|
|
49
|
+
if tok[1] == ")":
|
|
50
|
+
break
|
|
51
|
+
elif tok[0] == tokenize.NAME:
|
|
52
|
+
symbols.append(tok[1])
|
|
53
|
+
elif tok[1] == ",":
|
|
54
|
+
continue
|
|
55
|
+
else:
|
|
56
|
+
raise ValueError('bad token in signature "%s"' % tok[1])
|
|
57
|
+
yield tuple(symbols)
|
|
58
|
+
tok = next(tokgen)
|
|
59
|
+
if tok[1] == ",":
|
|
60
|
+
continue
|
|
61
|
+
elif tokenize.ISEOF(tok[0]):
|
|
62
|
+
break
|
|
63
|
+
elif tokenize.ISEOF(tok[0]):
|
|
64
|
+
break
|
|
65
|
+
else:
|
|
66
|
+
raise ValueError('bad token in signature "%s"' % tok[1])
|
|
67
|
+
|
|
68
|
+
ins, _, outs = stripws(sig).partition("->")
|
|
69
|
+
inputs = list(parse(ins))
|
|
70
|
+
outputs = list(parse(outs))
|
|
71
|
+
|
|
72
|
+
# check that all output symbols are defined in the inputs
|
|
73
|
+
isym = set()
|
|
74
|
+
osym = set()
|
|
75
|
+
for grp in inputs:
|
|
76
|
+
isym |= set(grp)
|
|
77
|
+
for grp in outputs:
|
|
78
|
+
osym |= set(grp)
|
|
79
|
+
|
|
80
|
+
diff = osym.difference(isym)
|
|
81
|
+
if diff:
|
|
82
|
+
raise NameError("undefined output symbols: %s" % ",".join(sorted(diff)))
|
|
83
|
+
|
|
84
|
+
return inputs, outputs
|
|
21
85
|
|
|
22
86
|
|
|
23
87
|
def _broadcast_axis(a, b):
|
|
@@ -10,40 +10,42 @@ import functools
|
|
|
10
10
|
import types as pytypes
|
|
11
11
|
import weakref
|
|
12
12
|
import uuid
|
|
13
|
+
import re
|
|
14
|
+
from warnings import warn
|
|
13
15
|
|
|
14
|
-
from numba
|
|
15
|
-
|
|
16
|
-
from numba.
|
|
16
|
+
from numba import cuda, _dispatcher
|
|
17
|
+
|
|
18
|
+
from numba.core import types
|
|
17
19
|
from numba.core.compiler_lock import global_compiler_lock
|
|
18
20
|
from numba.core.dispatcher import _DispatcherBase
|
|
19
21
|
from numba.core.errors import NumbaPerformanceWarning, TypingError
|
|
20
|
-
from numba.cuda.typing.templates import fold_arguments
|
|
21
22
|
from numba.core.typing.typeof import Purpose, typeof
|
|
23
|
+
|
|
24
|
+
from numba.cuda import serialize, utils, typing
|
|
25
|
+
from numba.cuda import types as cuda_types
|
|
22
26
|
from numba.cuda.api import get_current_device
|
|
23
27
|
from numba.cuda.args import wrap_arg
|
|
24
28
|
from numba.cuda.compiler import (
|
|
25
29
|
compile_cuda,
|
|
26
30
|
CUDACompiler,
|
|
27
31
|
kernel_fixup,
|
|
32
|
+
compile_extra,
|
|
28
33
|
)
|
|
29
|
-
from numba.cuda.core import sigutils
|
|
30
|
-
import
|
|
34
|
+
from numba.cuda.core import sigutils, config
|
|
35
|
+
from numba.cuda.flags import Flags
|
|
31
36
|
from numba.cuda.cudadrv import driver, nvvm
|
|
32
|
-
from numba.cuda.
|
|
33
|
-
from numba.cuda.
|
|
37
|
+
from numba.cuda.locks import module_init_lock
|
|
38
|
+
from numba.cuda.core.caching import Cache, CacheImpl, NullCache
|
|
34
39
|
from numba.cuda.descriptor import cuda_target
|
|
35
40
|
from numba.cuda.errors import (
|
|
36
41
|
missing_launch_config_msg,
|
|
37
42
|
normalize_kernel_dimensions,
|
|
38
43
|
)
|
|
39
|
-
from numba.cuda import
|
|
40
|
-
from numba.cuda.
|
|
44
|
+
from numba.cuda.typing.templates import fold_arguments
|
|
45
|
+
from numba.cuda.cudadrv.linkable_code import LinkableCode
|
|
46
|
+
from numba.cuda.cudadrv.devices import get_context
|
|
41
47
|
from numba.cuda.memory_management.nrt import rtsys, NRT_LIBRARY
|
|
42
48
|
|
|
43
|
-
from numba import cuda
|
|
44
|
-
from numba import _dispatcher
|
|
45
|
-
|
|
46
|
-
from warnings import warn
|
|
47
49
|
|
|
48
50
|
cuda_fp16_math_funcs = [
|
|
49
51
|
"hsin",
|
|
@@ -211,13 +213,11 @@ class _Kernel(serialize.ReduceMixin):
|
|
|
211
213
|
# The following are referred to by the cache implementation. Note:
|
|
212
214
|
# - There are no referenced environments in CUDA.
|
|
213
215
|
# - Kernels don't have lifted code.
|
|
214
|
-
# - reload_init is only for parfors.
|
|
215
216
|
self.target_context = tgt_ctx
|
|
216
217
|
self.fndesc = cres.fndesc
|
|
217
218
|
self.environment = cres.environment
|
|
218
219
|
self._referenced_environments = []
|
|
219
220
|
self.lifted = []
|
|
220
|
-
self.reload_init = []
|
|
221
221
|
|
|
222
222
|
def maybe_link_nrt(self, link, tgt_ctx, asm):
|
|
223
223
|
"""
|
|
@@ -835,12 +835,12 @@ class _FunctionCompiler(object):
|
|
|
835
835
|
return True, retval
|
|
836
836
|
|
|
837
837
|
def _compile_core(self, args, return_type):
|
|
838
|
-
flags =
|
|
838
|
+
flags = Flags()
|
|
839
839
|
self.targetdescr.options.parse_as_flags(flags, self.targetoptions)
|
|
840
840
|
flags = self._customize_flags(flags)
|
|
841
841
|
|
|
842
842
|
impl = self._get_implementation(args, {})
|
|
843
|
-
cres =
|
|
843
|
+
cres = compile_extra(
|
|
844
844
|
self.targetdescr.typing_context,
|
|
845
845
|
self.targetdescr.target_context,
|
|
846
846
|
impl,
|
|
@@ -1345,27 +1345,6 @@ class CUDADispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase):
|
|
|
1345
1345
|
cache_misses=self._cache_misses,
|
|
1346
1346
|
)
|
|
1347
1347
|
|
|
1348
|
-
def parallel_diagnostics(self, signature=None, level=1):
|
|
1349
|
-
"""
|
|
1350
|
-
Print parallel diagnostic information for the given signature. If no
|
|
1351
|
-
signature is present it is printed for all known signatures. level is
|
|
1352
|
-
used to adjust the verbosity, level=1 (default) is minimal verbosity,
|
|
1353
|
-
and 2, 3, and 4 provide increasing levels of verbosity.
|
|
1354
|
-
"""
|
|
1355
|
-
|
|
1356
|
-
def dump(sig):
|
|
1357
|
-
ol = self.overloads[sig]
|
|
1358
|
-
pfdiag = ol.metadata.get("parfor_diagnostics", None)
|
|
1359
|
-
if pfdiag is None:
|
|
1360
|
-
msg = "No parfors diagnostic available, is 'parallel=True' set?"
|
|
1361
|
-
raise ValueError(msg)
|
|
1362
|
-
pfdiag.dump(level)
|
|
1363
|
-
|
|
1364
|
-
if signature is not None:
|
|
1365
|
-
dump(signature)
|
|
1366
|
-
else:
|
|
1367
|
-
[dump(sig) for sig in self.signatures]
|
|
1368
|
-
|
|
1369
1348
|
def get_metadata(self, signature=None):
|
|
1370
1349
|
"""
|
|
1371
1350
|
Obtain the compilation metadata for a given signature.
|
numba_cuda/numba/cuda/flags.py
CHANGED
|
@@ -1,6 +1,146 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
-
|
|
3
|
+
|
|
4
|
+
from numba.cuda.core.targetconfig import TargetConfig, Option
|
|
5
|
+
|
|
6
|
+
from numba.cuda.core.options import (
|
|
7
|
+
ParallelOptions,
|
|
8
|
+
FastMathOptions,
|
|
9
|
+
InlineOptions,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Flags(TargetConfig):
|
|
14
|
+
__slots__ = ()
|
|
15
|
+
|
|
16
|
+
enable_looplift = Option(
|
|
17
|
+
type=bool,
|
|
18
|
+
default=False,
|
|
19
|
+
doc="Enable loop-lifting",
|
|
20
|
+
)
|
|
21
|
+
enable_pyobject = Option(
|
|
22
|
+
type=bool,
|
|
23
|
+
default=False,
|
|
24
|
+
doc="Enable pyobject mode (in general)",
|
|
25
|
+
)
|
|
26
|
+
enable_pyobject_looplift = Option(
|
|
27
|
+
type=bool,
|
|
28
|
+
default=False,
|
|
29
|
+
doc="Enable pyobject mode inside lifted loops",
|
|
30
|
+
)
|
|
31
|
+
enable_ssa = Option(
|
|
32
|
+
type=bool,
|
|
33
|
+
default=True,
|
|
34
|
+
doc="Enable SSA",
|
|
35
|
+
)
|
|
36
|
+
force_pyobject = Option(
|
|
37
|
+
type=bool,
|
|
38
|
+
default=False,
|
|
39
|
+
doc="Force pyobject mode inside the whole function",
|
|
40
|
+
)
|
|
41
|
+
release_gil = Option(
|
|
42
|
+
type=bool,
|
|
43
|
+
default=False,
|
|
44
|
+
doc="Release GIL inside the native function",
|
|
45
|
+
)
|
|
46
|
+
no_compile = Option(
|
|
47
|
+
type=bool,
|
|
48
|
+
default=False,
|
|
49
|
+
doc="TODO",
|
|
50
|
+
)
|
|
51
|
+
debuginfo = Option(
|
|
52
|
+
type=bool,
|
|
53
|
+
default=False,
|
|
54
|
+
doc="TODO",
|
|
55
|
+
)
|
|
56
|
+
boundscheck = Option(
|
|
57
|
+
type=bool,
|
|
58
|
+
default=False,
|
|
59
|
+
doc="TODO",
|
|
60
|
+
)
|
|
61
|
+
forceinline = Option(
|
|
62
|
+
type=bool,
|
|
63
|
+
default=False,
|
|
64
|
+
doc="Force inlining of the function. Overrides _dbg_optnone.",
|
|
65
|
+
)
|
|
66
|
+
no_cpython_wrapper = Option(
|
|
67
|
+
type=bool,
|
|
68
|
+
default=False,
|
|
69
|
+
doc="TODO",
|
|
70
|
+
)
|
|
71
|
+
no_cfunc_wrapper = Option(
|
|
72
|
+
type=bool,
|
|
73
|
+
default=False,
|
|
74
|
+
doc="TODO",
|
|
75
|
+
)
|
|
76
|
+
auto_parallel = Option(
|
|
77
|
+
type=ParallelOptions,
|
|
78
|
+
default=ParallelOptions(False),
|
|
79
|
+
doc="""Enable automatic parallel optimization, can be fine-tuned by
|
|
80
|
+
taking a dictionary of sub-options instead of a boolean, see parfor.py for
|
|
81
|
+
detail""",
|
|
82
|
+
)
|
|
83
|
+
nrt = Option(
|
|
84
|
+
type=bool,
|
|
85
|
+
default=False,
|
|
86
|
+
doc="TODO",
|
|
87
|
+
)
|
|
88
|
+
no_rewrites = Option(
|
|
89
|
+
type=bool,
|
|
90
|
+
default=False,
|
|
91
|
+
doc="TODO",
|
|
92
|
+
)
|
|
93
|
+
error_model = Option(
|
|
94
|
+
type=str,
|
|
95
|
+
default="python",
|
|
96
|
+
doc="TODO",
|
|
97
|
+
)
|
|
98
|
+
fastmath = Option(
|
|
99
|
+
type=FastMathOptions,
|
|
100
|
+
default=FastMathOptions(False),
|
|
101
|
+
doc="TODO",
|
|
102
|
+
)
|
|
103
|
+
noalias = Option(
|
|
104
|
+
type=bool,
|
|
105
|
+
default=False,
|
|
106
|
+
doc="TODO",
|
|
107
|
+
)
|
|
108
|
+
inline = Option(
|
|
109
|
+
type=InlineOptions,
|
|
110
|
+
default=InlineOptions("never"),
|
|
111
|
+
doc="TODO",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
dbg_extend_lifetimes = Option(
|
|
115
|
+
type=bool,
|
|
116
|
+
default=False,
|
|
117
|
+
doc=(
|
|
118
|
+
"Extend variable lifetime for debugging. "
|
|
119
|
+
"This automatically turns on with debug=True."
|
|
120
|
+
),
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
dbg_optnone = Option(
|
|
124
|
+
type=bool,
|
|
125
|
+
default=False,
|
|
126
|
+
doc=(
|
|
127
|
+
"Disable optimization for debug. "
|
|
128
|
+
"Equivalent to adding optnone attribute in the LLVM Function."
|
|
129
|
+
),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
dbg_directives_only = Option(
|
|
133
|
+
type=bool,
|
|
134
|
+
default=False,
|
|
135
|
+
doc=(
|
|
136
|
+
"Make debug emissions directives-only. "
|
|
137
|
+
"Used when generating lineinfo."
|
|
138
|
+
),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
DEFAULT_FLAGS = Flags()
|
|
143
|
+
DEFAULT_FLAGS.nrt = True
|
|
4
144
|
|
|
5
145
|
|
|
6
146
|
def _nvvm_options_type(x):
|
numba_cuda/numba/cuda/fp16.py
CHANGED
|
@@ -73,7 +73,6 @@ from numba.cuda._internal.cuda_fp16 import (
|
|
|
73
73
|
__hadd,
|
|
74
74
|
__hadd_rn,
|
|
75
75
|
__hadd_sat,
|
|
76
|
-
__hcmadd,
|
|
77
76
|
__hdiv as hdiv,
|
|
78
77
|
__hdiv,
|
|
79
78
|
__heq as heq,
|
|
@@ -287,7 +286,6 @@ __all__ = [
|
|
|
287
286
|
"__hadd",
|
|
288
287
|
"__hadd_rn",
|
|
289
288
|
"__hadd_sat",
|
|
290
|
-
"__hcmadd",
|
|
291
289
|
"hdiv",
|
|
292
290
|
"__hdiv",
|
|
293
291
|
"heq",
|