numba-cuda 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +1 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
- numba_cuda/numba/cuda/api.py +6 -1
- numba_cuda/numba/cuda/bf16.py +285 -2
- numba_cuda/numba/cuda/cgutils.py +2 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +1 -1
- numba_cuda/numba/cuda/compiler.py +373 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +2 -2
- numba_cuda/numba/cuda/core/compiler.py +6 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +48 -26
- numba_cuda/numba/cuda/core/ir_utils.py +15 -26
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +738 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +422 -246
- numba_cuda/numba/cuda/cudadecl.py +1 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
- numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
- numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
- numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +5 -1
- numba_cuda/numba/cuda/debuginfo.py +85 -2
- numba_cuda/numba/cuda/decorators.py +3 -3
- numba_cuda/numba/cuda/descriptor.py +3 -4
- numba_cuda/numba/cuda/deviceufunc.py +66 -2
- numba_cuda/numba/cuda/dispatcher.py +18 -39
- numba_cuda/numba/cuda/flags.py +141 -1
- numba_cuda/numba/cuda/fp16.py +0 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/lowering.py +7 -144
- numba_cuda/numba/cuda/mathimpl.py +2 -1
- numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +9 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +1 -1
- numba_cuda/numba/cuda/printimpl.py +12 -1
- numba_cuda/numba/cuda/random.py +1 -1
- numba_cuda/numba/cuda/serialize.py +1 -1
- numba_cuda/numba/cuda/simulator/__init__.py +1 -1
- numba_cuda/numba/cuda/simulator/api.py +1 -1
- numba_cuda/numba/cuda/simulator/compiler.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
- numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
- numba_cuda/numba/cuda/target.py +35 -17
- numba_cuda/numba/cuda/testing.py +4 -19
- numba_cuda/numba/cuda/tests/__init__.py +1 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
- numba_cuda/numba/cuda/tests/support.py +55 -15
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +56 -0
- numba_cuda/numba/cuda/typing/__init__.py +9 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +7 -6
- numba_cuda/numba/cuda/ufuncs.py +3 -3
- numba_cuda/numba/cuda/utils.py +6 -112
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +2 -1
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/RECORD +170 -115
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
from types import ModuleType
|
|
4
|
+
|
|
5
|
+
import weakref
|
|
6
|
+
|
|
7
|
+
from numba.core.errors import ConstantInferenceError, NumbaError
|
|
8
|
+
from numba.core import ir
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ConstantInference(object):
|
|
12
|
+
"""
|
|
13
|
+
A constant inference engine for a given interpreter.
|
|
14
|
+
Inference inspects the IR to try and compute a compile-time constant for
|
|
15
|
+
a variable.
|
|
16
|
+
|
|
17
|
+
This shouldn't be used directly, instead call Interpreter.infer_constant().
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, func_ir):
|
|
21
|
+
# Avoid cyclic references as some user-visible objects may be
|
|
22
|
+
# held alive in the cache
|
|
23
|
+
self._func_ir = weakref.proxy(func_ir)
|
|
24
|
+
self._cache = {}
|
|
25
|
+
|
|
26
|
+
def infer_constant(self, name, loc=None):
|
|
27
|
+
"""
|
|
28
|
+
Infer a constant value for the given variable *name*.
|
|
29
|
+
If no value can be inferred, numba.errors.ConstantInferenceError
|
|
30
|
+
is raised.
|
|
31
|
+
"""
|
|
32
|
+
if name not in self._cache:
|
|
33
|
+
try:
|
|
34
|
+
self._cache[name] = (True, self._do_infer(name))
|
|
35
|
+
except ConstantInferenceError as exc:
|
|
36
|
+
# Store the exception args only, to avoid keeping
|
|
37
|
+
# a whole traceback alive.
|
|
38
|
+
self._cache[name] = (False, (exc.__class__, exc.args))
|
|
39
|
+
success, val = self._cache[name]
|
|
40
|
+
if success:
|
|
41
|
+
return val
|
|
42
|
+
else:
|
|
43
|
+
exc, args = val
|
|
44
|
+
if issubclass(exc, NumbaError):
|
|
45
|
+
raise exc(*args, loc=loc)
|
|
46
|
+
else:
|
|
47
|
+
raise exc(*args)
|
|
48
|
+
|
|
49
|
+
def _fail(self, val):
|
|
50
|
+
# The location here is set to None because `val` is the ir.Var name
|
|
51
|
+
# and not the actual offending use of the var. When this is raised it is
|
|
52
|
+
# caught in the flow control of `infer_constant` and the class and args
|
|
53
|
+
# (the message) are captured and then raised again but with the location
|
|
54
|
+
# set to the expression that caused the constant inference error.
|
|
55
|
+
raise ConstantInferenceError(
|
|
56
|
+
"Constant inference not possible for: %s" % (val,), loc=None
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def _do_infer(self, name):
|
|
60
|
+
if not isinstance(name, str):
|
|
61
|
+
raise TypeError("infer_constant() called with non-str %r" % (name,))
|
|
62
|
+
try:
|
|
63
|
+
defn = self._func_ir.get_definition(name)
|
|
64
|
+
except KeyError:
|
|
65
|
+
raise ConstantInferenceError(
|
|
66
|
+
"no single definition for %r" % (name,)
|
|
67
|
+
)
|
|
68
|
+
try:
|
|
69
|
+
const = defn.infer_constant()
|
|
70
|
+
except ConstantInferenceError:
|
|
71
|
+
if isinstance(defn, ir.Expr):
|
|
72
|
+
return self._infer_expr(defn)
|
|
73
|
+
self._fail(defn)
|
|
74
|
+
return const
|
|
75
|
+
|
|
76
|
+
def _infer_expr(self, expr):
|
|
77
|
+
# Infer an expression: handle supported cases
|
|
78
|
+
if expr.op == "call":
|
|
79
|
+
func = self.infer_constant(expr.func.name, loc=expr.loc)
|
|
80
|
+
return self._infer_call(func, expr)
|
|
81
|
+
elif expr.op == "getattr":
|
|
82
|
+
value = self.infer_constant(expr.value.name, loc=expr.loc)
|
|
83
|
+
return self._infer_getattr(value, expr)
|
|
84
|
+
elif expr.op == "build_list":
|
|
85
|
+
return [
|
|
86
|
+
self.infer_constant(i.name, loc=expr.loc) for i in expr.items
|
|
87
|
+
]
|
|
88
|
+
elif expr.op == "build_tuple":
|
|
89
|
+
return tuple(
|
|
90
|
+
self.infer_constant(i.name, loc=expr.loc) for i in expr.items
|
|
91
|
+
)
|
|
92
|
+
self._fail(expr)
|
|
93
|
+
|
|
94
|
+
def _infer_call(self, func, expr):
|
|
95
|
+
if expr.kws or expr.vararg:
|
|
96
|
+
self._fail(expr)
|
|
97
|
+
# Check supported callables
|
|
98
|
+
_slice = func in (slice,)
|
|
99
|
+
_exc = isinstance(func, type) and issubclass(func, BaseException)
|
|
100
|
+
if _slice or _exc:
|
|
101
|
+
args = [
|
|
102
|
+
self.infer_constant(a.name, loc=expr.loc) for a in expr.args
|
|
103
|
+
]
|
|
104
|
+
if _slice:
|
|
105
|
+
return func(*args)
|
|
106
|
+
elif _exc:
|
|
107
|
+
# If the exception class is user defined it may implement a ctor
|
|
108
|
+
# that does not pass the args to the super. Therefore return the
|
|
109
|
+
# raw class and the args so this can be instantiated at the call
|
|
110
|
+
# site in the way the user source expects it to be.
|
|
111
|
+
return func, args
|
|
112
|
+
else:
|
|
113
|
+
assert 0, "Unreachable"
|
|
114
|
+
|
|
115
|
+
self._fail(expr)
|
|
116
|
+
|
|
117
|
+
def _infer_getattr(self, value, expr):
|
|
118
|
+
if isinstance(value, (ModuleType, type)):
|
|
119
|
+
# Allow looking up a constant on a class or module
|
|
120
|
+
try:
|
|
121
|
+
return getattr(value, expr.attr)
|
|
122
|
+
except AttributeError:
|
|
123
|
+
pass
|
|
124
|
+
self._fail(expr)
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import platform
|
|
5
|
+
|
|
6
|
+
import llvmlite.binding as ll
|
|
7
|
+
from llvmlite import ir
|
|
8
|
+
|
|
9
|
+
from numba import _dynfunc
|
|
10
|
+
from numba.core.callwrapper import PyCallWrapper
|
|
11
|
+
from numba.core.base import BaseContext
|
|
12
|
+
from numba.core import (
|
|
13
|
+
utils,
|
|
14
|
+
types,
|
|
15
|
+
config,
|
|
16
|
+
cgutils,
|
|
17
|
+
callconv,
|
|
18
|
+
codegen,
|
|
19
|
+
externals,
|
|
20
|
+
fastmathpass,
|
|
21
|
+
intrinsics,
|
|
22
|
+
)
|
|
23
|
+
from numba.core.options import TargetOptions, include_default_options
|
|
24
|
+
from numba.core.runtime import rtsys
|
|
25
|
+
from numba.core.compiler_lock import global_compiler_lock
|
|
26
|
+
import numba.core.entrypoints
|
|
27
|
+
|
|
28
|
+
# Re-export these options, they are used from the cpu module throughout the code
|
|
29
|
+
# base.
|
|
30
|
+
from numba.cuda.core.options import (
|
|
31
|
+
ParallelOptions, # noqa F401
|
|
32
|
+
FastMathOptions, # noqa F401
|
|
33
|
+
InlineOptions, # noqa F401
|
|
34
|
+
) # noqa F401
|
|
35
|
+
from numba.np import ufunc_db
|
|
36
|
+
|
|
37
|
+
# Keep those structures in sync with _dynfunc.c.
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ClosureBody(cgutils.Structure):
|
|
41
|
+
_fields = [("env", types.pyobject)]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class EnvBody(cgutils.Structure):
|
|
45
|
+
_fields = [
|
|
46
|
+
("globals", types.pyobject),
|
|
47
|
+
("consts", types.pyobject),
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class CPUContext(BaseContext):
|
|
52
|
+
"""
|
|
53
|
+
Changes BaseContext calling convention
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
allow_dynamic_globals = True
|
|
57
|
+
|
|
58
|
+
def __init__(self, typingctx, target="cpu"):
|
|
59
|
+
super().__init__(typingctx, target)
|
|
60
|
+
|
|
61
|
+
# Overrides
|
|
62
|
+
def create_module(self, name):
|
|
63
|
+
return self._internal_codegen._create_empty_module(name)
|
|
64
|
+
|
|
65
|
+
@global_compiler_lock
|
|
66
|
+
def init(self):
|
|
67
|
+
self.is32bit = utils.MACHINE_BITS == 32
|
|
68
|
+
self._internal_codegen = codegen.JITCPUCodegen("numba.exec")
|
|
69
|
+
|
|
70
|
+
# Add ARM ABI functions from libgcc_s
|
|
71
|
+
if platform.machine() == "armv7l":
|
|
72
|
+
ll.load_library_permanently("libgcc_s.so.1")
|
|
73
|
+
|
|
74
|
+
# Map external C functions.
|
|
75
|
+
externals.c_math_functions.install(self)
|
|
76
|
+
|
|
77
|
+
def load_additional_registries(self):
|
|
78
|
+
# Only initialize the NRT once something is about to be compiled. The
|
|
79
|
+
# "initialized" state doesn't need to be threadsafe, there's a lock
|
|
80
|
+
# around the internal compilation and the rtsys.initialize call can be
|
|
81
|
+
# made multiple times, worse case init just gets called a bit more often
|
|
82
|
+
# than optimal.
|
|
83
|
+
rtsys.initialize(self)
|
|
84
|
+
|
|
85
|
+
# Add implementations that work via import
|
|
86
|
+
from numba.cpython import (
|
|
87
|
+
enumimpl, # noqa F401
|
|
88
|
+
iterators, # noqa F401
|
|
89
|
+
rangeobj, # noqa F401
|
|
90
|
+
tupleobj, # noqa F401
|
|
91
|
+
) # noqa F401
|
|
92
|
+
from numba.core import optional, inline_closurecall # noqa F401
|
|
93
|
+
from numba.misc import gdb_hook, literal # noqa F401
|
|
94
|
+
from numba.np import linalg, arraymath, arrayobj # noqa F401
|
|
95
|
+
from numba.np.random import generator_core, generator_methods # noqa F401
|
|
96
|
+
from numba.np.polynomial import polynomial_core, polynomial_functions # noqa F401
|
|
97
|
+
from numba.typed import typeddict, dictimpl # noqa F401
|
|
98
|
+
from numba.typed import typedlist, listobject # noqa F401
|
|
99
|
+
from numba.experimental import jitclass, function_type # noqa F401
|
|
100
|
+
from numba.np import npdatetime # noqa F401
|
|
101
|
+
|
|
102
|
+
# Add target specific implementations
|
|
103
|
+
from numba.np import npyimpl
|
|
104
|
+
from numba.cpython import cmathimpl, mathimpl, printimpl, randomimpl
|
|
105
|
+
from numba.misc import cffiimpl
|
|
106
|
+
from numba.experimental.jitclass.base import (
|
|
107
|
+
ClassBuilder as jitclassimpl,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
self.install_registry(cmathimpl.registry)
|
|
111
|
+
self.install_registry(cffiimpl.registry)
|
|
112
|
+
self.install_registry(mathimpl.registry)
|
|
113
|
+
self.install_registry(npyimpl.registry)
|
|
114
|
+
self.install_registry(printimpl.registry)
|
|
115
|
+
self.install_registry(randomimpl.registry)
|
|
116
|
+
self.install_registry(jitclassimpl.class_impl_registry)
|
|
117
|
+
|
|
118
|
+
# load 3rd party extensions
|
|
119
|
+
numba.core.entrypoints.init_all()
|
|
120
|
+
|
|
121
|
+
# fix for #8940
|
|
122
|
+
from numba.np.unsafe import ndarray # noqa F401
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def target_data(self):
|
|
126
|
+
return self._internal_codegen.target_data
|
|
127
|
+
|
|
128
|
+
def with_aot_codegen(self, name, **aot_options):
|
|
129
|
+
aot_codegen = codegen.AOTCPUCodegen(name, **aot_options)
|
|
130
|
+
return self.subtarget(_internal_codegen=aot_codegen, aot_mode=True)
|
|
131
|
+
|
|
132
|
+
def codegen(self):
|
|
133
|
+
return self._internal_codegen
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def call_conv(self):
|
|
137
|
+
return callconv.CPUCallConv(self)
|
|
138
|
+
|
|
139
|
+
def get_env_body(self, builder, envptr):
|
|
140
|
+
"""
|
|
141
|
+
From the given *envptr* (a pointer to a _dynfunc.Environment object),
|
|
142
|
+
get a EnvBody allowing structured access to environment fields.
|
|
143
|
+
"""
|
|
144
|
+
body_ptr = cgutils.pointer_add(
|
|
145
|
+
builder, envptr, _dynfunc._impl_info["offsetof_env_body"]
|
|
146
|
+
)
|
|
147
|
+
return EnvBody(self, builder, ref=body_ptr, cast_ref=True)
|
|
148
|
+
|
|
149
|
+
def get_env_manager(self, builder, return_pyobject=False):
|
|
150
|
+
envgv = self.declare_env_global(
|
|
151
|
+
builder.module, self.get_env_name(self.fndesc)
|
|
152
|
+
)
|
|
153
|
+
envarg = builder.load(envgv)
|
|
154
|
+
pyapi = self.get_python_api(builder)
|
|
155
|
+
pyapi.emit_environment_sentry(
|
|
156
|
+
envarg,
|
|
157
|
+
return_pyobject=return_pyobject,
|
|
158
|
+
debug_msg=self.fndesc.env_name,
|
|
159
|
+
)
|
|
160
|
+
env_body = self.get_env_body(builder, envarg)
|
|
161
|
+
return pyapi.get_env_manager(self.environment, env_body, envarg)
|
|
162
|
+
|
|
163
|
+
def get_generator_state(self, builder, genptr, return_type):
|
|
164
|
+
"""
|
|
165
|
+
From the given *genptr* (a pointer to a _dynfunc.Generator object),
|
|
166
|
+
get a pointer to its state area.
|
|
167
|
+
"""
|
|
168
|
+
return cgutils.pointer_add(
|
|
169
|
+
builder,
|
|
170
|
+
genptr,
|
|
171
|
+
_dynfunc._impl_info["offsetof_generator_state"],
|
|
172
|
+
return_type=return_type,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
def build_list(self, builder, list_type, items):
|
|
176
|
+
"""
|
|
177
|
+
Build a list from the Numba *list_type* and its initial *items*.
|
|
178
|
+
"""
|
|
179
|
+
from numba.cpython import listobj
|
|
180
|
+
|
|
181
|
+
return listobj.build_list(self, builder, list_type, items)
|
|
182
|
+
|
|
183
|
+
def build_set(self, builder, set_type, items):
|
|
184
|
+
"""
|
|
185
|
+
Build a set from the Numba *set_type* and its initial *items*.
|
|
186
|
+
"""
|
|
187
|
+
from numba.cpython import setobj
|
|
188
|
+
|
|
189
|
+
return setobj.build_set(self, builder, set_type, items)
|
|
190
|
+
|
|
191
|
+
def build_map(self, builder, dict_type, item_types, items):
|
|
192
|
+
from numba.typed import dictobject
|
|
193
|
+
|
|
194
|
+
return dictobject.build_map(self, builder, dict_type, item_types, items)
|
|
195
|
+
|
|
196
|
+
def post_lowering(self, mod, library):
|
|
197
|
+
if self.fastmath:
|
|
198
|
+
fastmathpass.rewrite_module(mod, self.fastmath)
|
|
199
|
+
|
|
200
|
+
if self.is32bit:
|
|
201
|
+
# 32-bit machine needs to replace all 64-bit div/rem to avoid
|
|
202
|
+
# calls to compiler-rt
|
|
203
|
+
intrinsics.fix_divmod(mod)
|
|
204
|
+
|
|
205
|
+
library.add_linking_library(rtsys.library)
|
|
206
|
+
|
|
207
|
+
def create_cpython_wrapper(
|
|
208
|
+
self, library, fndesc, env, call_helper, release_gil=False
|
|
209
|
+
):
|
|
210
|
+
wrapper_module = self.create_module("wrapper")
|
|
211
|
+
fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes)
|
|
212
|
+
wrapper_callee = ir.Function(
|
|
213
|
+
wrapper_module, fnty, fndesc.llvm_func_name
|
|
214
|
+
)
|
|
215
|
+
builder = PyCallWrapper(
|
|
216
|
+
self,
|
|
217
|
+
wrapper_module,
|
|
218
|
+
wrapper_callee,
|
|
219
|
+
fndesc,
|
|
220
|
+
env,
|
|
221
|
+
call_helper=call_helper,
|
|
222
|
+
release_gil=release_gil,
|
|
223
|
+
)
|
|
224
|
+
builder.build()
|
|
225
|
+
library.add_ir_module(wrapper_module)
|
|
226
|
+
|
|
227
|
+
def create_cfunc_wrapper(self, library, fndesc, env, call_helper):
|
|
228
|
+
wrapper_module = self.create_module("cfunc_wrapper")
|
|
229
|
+
fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes)
|
|
230
|
+
wrapper_callee = ir.Function(
|
|
231
|
+
wrapper_module, fnty, fndesc.llvm_func_name
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
ll_argtypes = [self.get_value_type(ty) for ty in fndesc.argtypes]
|
|
235
|
+
ll_return_type = self.get_value_type(fndesc.restype)
|
|
236
|
+
wrapty = ir.FunctionType(ll_return_type, ll_argtypes)
|
|
237
|
+
wrapfn = ir.Function(
|
|
238
|
+
wrapper_module, wrapty, fndesc.llvm_cfunc_wrapper_name
|
|
239
|
+
)
|
|
240
|
+
builder = ir.IRBuilder(wrapfn.append_basic_block("entry"))
|
|
241
|
+
|
|
242
|
+
status, out = self.call_conv.call_function(
|
|
243
|
+
builder,
|
|
244
|
+
wrapper_callee,
|
|
245
|
+
fndesc.restype,
|
|
246
|
+
fndesc.argtypes,
|
|
247
|
+
wrapfn.args,
|
|
248
|
+
attrs=("noinline",),
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
with builder.if_then(status.is_error, likely=False):
|
|
252
|
+
# If (and only if) an error occurred, acquire the GIL
|
|
253
|
+
# and use the interpreter to write out the exception.
|
|
254
|
+
pyapi = self.get_python_api(builder)
|
|
255
|
+
gil_state = pyapi.gil_ensure()
|
|
256
|
+
self.call_conv.raise_error(builder, pyapi, status)
|
|
257
|
+
cstr = self.insert_const_string(builder.module, repr(self))
|
|
258
|
+
strobj = pyapi.string_from_string(cstr)
|
|
259
|
+
pyapi.err_write_unraisable(strobj)
|
|
260
|
+
pyapi.decref(strobj)
|
|
261
|
+
pyapi.gil_release(gil_state)
|
|
262
|
+
|
|
263
|
+
builder.ret(out)
|
|
264
|
+
library.add_ir_module(wrapper_module)
|
|
265
|
+
|
|
266
|
+
def get_executable(self, library, fndesc, env):
|
|
267
|
+
"""
|
|
268
|
+
Returns
|
|
269
|
+
-------
|
|
270
|
+
(cfunc, fnptr)
|
|
271
|
+
|
|
272
|
+
- cfunc
|
|
273
|
+
callable function (Can be None)
|
|
274
|
+
- fnptr
|
|
275
|
+
callable function address
|
|
276
|
+
- env
|
|
277
|
+
an execution environment (from _dynfunc)
|
|
278
|
+
"""
|
|
279
|
+
# Code generation
|
|
280
|
+
fnptr = library.get_pointer_to_function(
|
|
281
|
+
fndesc.llvm_cpython_wrapper_name
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Note: we avoid reusing the original docstring to avoid encoding
|
|
285
|
+
# issues on Python 2, see issue #1908
|
|
286
|
+
doc = "compiled wrapper for %r" % (fndesc.qualname,)
|
|
287
|
+
cfunc = _dynfunc.make_function(
|
|
288
|
+
fndesc.lookup_module(),
|
|
289
|
+
fndesc.qualname.split(".")[-1],
|
|
290
|
+
doc,
|
|
291
|
+
fnptr,
|
|
292
|
+
env,
|
|
293
|
+
# objects to keepalive with the function
|
|
294
|
+
(library,),
|
|
295
|
+
)
|
|
296
|
+
library.codegen.set_env(self.get_env_name(fndesc), env)
|
|
297
|
+
return cfunc
|
|
298
|
+
|
|
299
|
+
def calc_array_sizeof(self, ndim):
|
|
300
|
+
"""
|
|
301
|
+
Calculate the size of an array struct on the CPU target
|
|
302
|
+
"""
|
|
303
|
+
aryty = types.Array(types.int32, ndim, "A")
|
|
304
|
+
return self.get_abi_sizeof(self.get_value_type(aryty))
|
|
305
|
+
|
|
306
|
+
# Overrides
|
|
307
|
+
def get_ufunc_info(self, ufunc_key):
|
|
308
|
+
return ufunc_db.get_ufunc_info(ufunc_key)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
# ----------------------------------------------------------------------------
|
|
312
|
+
# TargetOptions
|
|
313
|
+
|
|
314
|
+
_options_mixin = include_default_options(
|
|
315
|
+
"nopython",
|
|
316
|
+
"forceobj",
|
|
317
|
+
"looplift",
|
|
318
|
+
"_nrt",
|
|
319
|
+
"debug",
|
|
320
|
+
"boundscheck",
|
|
321
|
+
"nogil",
|
|
322
|
+
"no_rewrites",
|
|
323
|
+
"no_cpython_wrapper",
|
|
324
|
+
"no_cfunc_wrapper",
|
|
325
|
+
"parallel",
|
|
326
|
+
"fastmath",
|
|
327
|
+
"error_model",
|
|
328
|
+
"inline",
|
|
329
|
+
"forceinline",
|
|
330
|
+
"_dbg_extend_lifetimes",
|
|
331
|
+
"_dbg_optnone",
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
class CPUTargetOptions(_options_mixin, TargetOptions):
|
|
336
|
+
def finalize(self, flags, options):
|
|
337
|
+
if not flags.is_set("enable_pyobject"):
|
|
338
|
+
flags.enable_pyobject = True
|
|
339
|
+
|
|
340
|
+
if not flags.is_set("enable_looplift"):
|
|
341
|
+
flags.enable_looplift = True
|
|
342
|
+
|
|
343
|
+
flags.inherit_if_not_set("nrt", default=True)
|
|
344
|
+
|
|
345
|
+
if not flags.is_set("debuginfo"):
|
|
346
|
+
flags.debuginfo = config.DEBUGINFO_DEFAULT
|
|
347
|
+
|
|
348
|
+
if not flags.is_set("dbg_extend_lifetimes"):
|
|
349
|
+
if flags.debuginfo:
|
|
350
|
+
# auto turn on extend-lifetimes if debuginfo is on and
|
|
351
|
+
# dbg_extend_lifetimes is not set
|
|
352
|
+
flags.dbg_extend_lifetimes = True
|
|
353
|
+
else:
|
|
354
|
+
# set flag using env-var config
|
|
355
|
+
flags.dbg_extend_lifetimes = config.EXTEND_VARIABLE_LIFETIMES
|
|
356
|
+
|
|
357
|
+
if not flags.is_set("boundscheck"):
|
|
358
|
+
flags.boundscheck = flags.debuginfo
|
|
359
|
+
|
|
360
|
+
flags.enable_pyobject_looplift = True
|
|
361
|
+
|
|
362
|
+
flags.inherit_if_not_set("fastmath")
|
|
363
|
+
|
|
364
|
+
flags.inherit_if_not_set("error_model", default="python")
|
|
365
|
+
|
|
366
|
+
flags.inherit_if_not_set("forceinline")
|
|
367
|
+
|
|
368
|
+
if flags.forceinline:
|
|
369
|
+
# forceinline turns off optnone, just like clang.
|
|
370
|
+
flags.dbg_optnone = False
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
import weakref
|
|
5
|
+
import importlib
|
|
6
|
+
|
|
7
|
+
from numba import _dynfunc
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Environment(_dynfunc.Environment):
|
|
11
|
+
"""Stores globals and constant pyobjects for runtime.
|
|
12
|
+
|
|
13
|
+
It is often needed to convert b/w nopython objects and pyobjects.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__slots__ = ("env_name", "__weakref__")
|
|
17
|
+
# A weak-value dictionary to store live environment with env_name as the
|
|
18
|
+
# key.
|
|
19
|
+
_memo = weakref.WeakValueDictionary()
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def from_fndesc(cls, fndesc):
|
|
23
|
+
try:
|
|
24
|
+
# Avoid creating new Env
|
|
25
|
+
return cls._memo[fndesc.env_name]
|
|
26
|
+
except KeyError:
|
|
27
|
+
inst = cls(fndesc.lookup_globals())
|
|
28
|
+
inst.env_name = fndesc.env_name
|
|
29
|
+
cls._memo[fndesc.env_name] = inst
|
|
30
|
+
return inst
|
|
31
|
+
|
|
32
|
+
def can_cache(self):
|
|
33
|
+
is_dyn = "__name__" not in self.globals
|
|
34
|
+
return not is_dyn
|
|
35
|
+
|
|
36
|
+
def __reduce__(self):
|
|
37
|
+
return _rebuild_env, (
|
|
38
|
+
self.globals.get("__name__"),
|
|
39
|
+
self.consts,
|
|
40
|
+
self.env_name,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def __del__(self):
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
def __repr__(self):
|
|
47
|
+
return f"<Environment {self.env_name!r} >"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _rebuild_env(modname, consts, env_name):
|
|
51
|
+
env = lookup_environment(env_name)
|
|
52
|
+
if env is not None:
|
|
53
|
+
return env
|
|
54
|
+
|
|
55
|
+
mod = importlib.import_module(modname)
|
|
56
|
+
env = Environment(mod.__dict__)
|
|
57
|
+
env.consts[:] = consts
|
|
58
|
+
env.env_name = env_name
|
|
59
|
+
# Cache loaded object
|
|
60
|
+
Environment._memo[env_name] = env
|
|
61
|
+
return env
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def lookup_environment(env_name):
|
|
65
|
+
"""Returns the Environment object for the given name;
|
|
66
|
+
or None if not found
|
|
67
|
+
"""
|
|
68
|
+
return Environment._memo.get(env_name)
|