PyPI - numba-cuda - Versions diffs - 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl - Mend

numba-cuda 0.19.1py3-none-any.whl → 0.20.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of numba-cuda might be problematic. Click here for more details.

Files changed (172) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/__init__.py +1 -1
numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
numba_cuda/numba/cuda/api.py +6 -1
numba_cuda/numba/cuda/bf16.py +285 -2
numba_cuda/numba/cuda/cgutils.py +2 -2
numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
numba_cuda/numba/cuda/codegen.py +1 -1
numba_cuda/numba/cuda/compiler.py +373 -30
numba_cuda/numba/cuda/core/analysis.py +319 -0
numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
numba_cuda/numba/cuda/core/base.py +1289 -0
numba_cuda/numba/cuda/core/bytecode.py +727 -0
numba_cuda/numba/cuda/core/caching.py +2 -2
numba_cuda/numba/cuda/core/compiler.py +6 -14
numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
numba_cuda/numba/cuda/core/config.py +747 -0
numba_cuda/numba/cuda/core/consts.py +124 -0
numba_cuda/numba/cuda/core/cpu.py +370 -0
numba_cuda/numba/cuda/core/environment.py +68 -0
numba_cuda/numba/cuda/core/event.py +511 -0
numba_cuda/numba/cuda/core/funcdesc.py +330 -0
numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
numba_cuda/numba/cuda/core/interpreter.py +48 -26
numba_cuda/numba/cuda/core/ir_utils.py +15 -26
numba_cuda/numba/cuda/core/options.py +262 -0
numba_cuda/numba/cuda/core/postproc.py +249 -0
numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
numba_cuda/numba/cuda/core/ssa.py +496 -0
numba_cuda/numba/cuda/core/targetconfig.py +329 -0
numba_cuda/numba/cuda/core/tracing.py +231 -0
numba_cuda/numba/cuda/core/transforms.py +952 -0
numba_cuda/numba/cuda/core/typed_passes.py +738 -7
numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
numba_cuda/numba/cuda/cuda_paths.py +422 -246
numba_cuda/numba/cuda/cudadecl.py +1 -1
numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
numba_cuda/numba/cuda/cudaimpl.py +5 -1
numba_cuda/numba/cuda/debuginfo.py +85 -2
numba_cuda/numba/cuda/decorators.py +3 -3
numba_cuda/numba/cuda/descriptor.py +3 -4
numba_cuda/numba/cuda/deviceufunc.py +66 -2
numba_cuda/numba/cuda/dispatcher.py +18 -39
numba_cuda/numba/cuda/flags.py +141 -1
numba_cuda/numba/cuda/fp16.py +0 -2
numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
numba_cuda/numba/cuda/lowering.py +7 -144
numba_cuda/numba/cuda/mathimpl.py +2 -1
numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
numba_cuda/numba/cuda/misc/findlib.py +75 -0
numba_cuda/numba/cuda/models.py +9 -1
numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
numba_cuda/numba/cuda/np/numpy_support.py +553 -0
numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
numba_cuda/numba/cuda/nvvmutils.py +1 -1
numba_cuda/numba/cuda/printimpl.py +12 -1
numba_cuda/numba/cuda/random.py +1 -1
numba_cuda/numba/cuda/serialize.py +1 -1
numba_cuda/numba/cuda/simulator/__init__.py +1 -1
numba_cuda/numba/cuda/simulator/api.py +1 -1
numba_cuda/numba/cuda/simulator/compiler.py +4 -0
numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
numba_cuda/numba/cuda/target.py +35 -17
numba_cuda/numba/cuda/testing.py +7 -19
numba_cuda/numba/cuda/tests/__init__.py +1 -1
numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
numba_cuda/numba/cuda/tests/support.py +55 -15
numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
numba_cuda/numba/cuda/types.py +56 -0
numba_cuda/numba/cuda/typing/__init__.py +9 -1
numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
numba_cuda/numba/cuda/typing/context.py +751 -0
numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
numba_cuda/numba/cuda/typing/npydecl.py +658 -0
numba_cuda/numba/cuda/typing/templates.py +7 -6
numba_cuda/numba/cuda/ufuncs.py +3 -3
numba_cuda/numba/cuda/utils.py +6 -112
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/core/config.py ADDED Viewed

@@ -0,0 +1,747 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+import platform
+import sys
+import os
+import re
+import shutil
+import warnings
+import traceback
+# YAML needed to use file based Numba config
+try:
+    import yaml
+    _HAVE_YAML = True
+except ImportError:
+    _HAVE_YAML = False
+import llvmlite.binding as ll
+IS_WIN32 = sys.platform.startswith("win32")
+IS_OSX = sys.platform.startswith("darwin")
+MACHINE_BITS = tuple.__itemsize__ * 8
+IS_32BITS = MACHINE_BITS == 32
+# Python version in (major, minor) tuple
+PYVERSION = sys.version_info[:2]
+# this is the name of the user supplied configuration file
+_config_fname = ".numba_config.yaml"
+def _parse_cc(text):
+    """
+    Parse CUDA compute capability version string.
+    """
+    if not text:
+        return None
+    else:
+        m = re.match(r"(\d+)\.(\d+)", text)
+        if not m:
+            raise ValueError(
+                "Compute capability must be specified as a "
+                'string of "major.minor" where major '
+                "and minor are decimals"
+            )
+        grp = m.groups()
+        return int(grp[0]), int(grp[1])
+def _os_supports_avx():
+    """
+    Whether the current OS supports AVX, regardless of the CPU.
+    This is necessary because the user may be running a very old Linux
+    kernel (e.g. CentOS 5) on a recent CPU.
+    """
+    if not sys.platform.startswith("linux") or platform.machine() not in (
+        "i386",
+        "i586",
+        "i686",
+        "x86_64",
+    ):
+        return True
+    # Executing the CPUID instruction may report AVX available even though
+    # the kernel doesn't support it, so parse /proc/cpuinfo instead.
+    try:
+        f = open("/proc/cpuinfo", "r")
+    except OSError:
+        # If /proc isn't available, assume yes
+        return True
+    with f:
+        for line in f:
+            head, _, body = line.partition(":")
+            if head.strip() == "flags" and "avx" in body.split():
+                return True
+        else:
+            return False
+class _OptLevel(int):
+    """This class holds the "optimisation level" set in `NUMBA_OPT`. As this env
+    var can be an int or a string, but is almost always interpreted as an int,
+    this class subclasses int so as to get the common behaviour but stores the
+    actual value as a `_raw_value` member. The value "max" is a special case
+    and the property `is_opt_max` can be queried to find if the optimisation
+    level (supplied value at construction time) is "max"."""
+    def __new__(cls, *args, **kwargs):
+        assert len(args) == 1
+        (value,) = args
+        _int_value = 3 if value == "max" else int(value)
+        # the int ctor is always called with an appropriate integer value
+        new = super().__new__(cls, _int_value, **kwargs)
+        # raw value is max or int
+        new._raw_value = value if value == "max" else _int_value
+        return new
+    @property
+    def is_opt_max(self):
+        """Returns True if the the optimisation level is "max" False
+        otherwise."""
+        return self._raw_value == "max"
+    def __repr__(self):
+        if isinstance(self._raw_value, str):
+            arg = f"'{self._raw_value}'"
+        else:
+            arg = self._raw_value
+        return f"_OptLevel({arg})"
+def _process_opt_level(opt_level):
+    if opt_level not in ("0", "1", "2", "3", "max"):
+        msg = (
+            "Environment variable `NUMBA_OPT` is set to an unsupported "
+            f"value '{opt_level}', supported values are 0, 1, 2, 3, and "
+            "'max'"
+        )
+        raise ValueError(msg)
+    else:
+        return _OptLevel(opt_level)
+class _EnvVar(object):
+    """Descriptor for configuration values that checks numba.config on access."""
+    def __init__(self, value, name):
+        self.name = name
+        if isinstance(value, _EnvVar):
+            self.value = value.__get__()
+        else:
+            self.value = value
+        self.check_numba_config()
+    def check_numba_config(self):
+        """Check for conflicting value in numba.config and emit deprecation warning."""
+        try:
+            from numba import config as numba_config
+            if hasattr(numba_config, self.name):
+                config_value = getattr(numba_config, self.name)
+                if config_value != self.value:
+                    msg = (
+                        f"Configuration value '{self.name}' is explicitly set "
+                        f"to `{config_value}` in numba.config. "
+                        "numba.config is deprecated for numba-cuda "
+                        "and support for configuration values from it "
+                        "will be removed in a future release. "
+                        "Please use numba.cuda.config."
+                    )
+                    warnings.warn(msg, category=DeprecationWarning)
+                    self.value = config_value
+            else:
+                # Initialize any missing variables in numba.config
+                setattr(numba_config, self.name, self.value)
+        except ImportError:
+            pass
+    def __get__(self):
+        self.check_numba_config()
+        return self.value
+    def __set__(self, value):
+        self.value = value
+class _EnvReloader(object):
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.old_environ = {}
+        self.update(force=True)
+    def update(self, force=False):
+        new_environ = {}
+        # first check if there's a .numba_config.yaml and use values from that
+        if os.path.exists(_config_fname) and os.path.isfile(_config_fname):
+            if not _HAVE_YAML:
+                msg = (
+                    "A Numba config file is found but YAML parsing "
+                    "capabilities appear to be missing. "
+                    "To use this feature please install `pyyaml`. e.g. "
+                    "`conda install pyyaml`."
+                )
+                warnings.warn(msg)
+            else:
+                with open(_config_fname, "rt") as f:
+                    y_conf = yaml.safe_load(f)
+                if y_conf is not None:
+                    for k, v in y_conf.items():
+                        new_environ["NUMBA_" + k.upper()] = v
+        # clobber file based config with any locally defined env vars
+        for name, value in os.environ.items():
+            if name.startswith("NUMBA_"):
+                new_environ[name] = value
+        # We update the config variables if at least one NUMBA environment
+        # variable was modified.  This lets the user modify values
+        # directly in the config module without having them when
+        # reload_config() is called by the compiler.
+        if force or self.old_environ != new_environ:
+            self.process_environ(new_environ)
+            # Store a copy
+            self.old_environ = dict(new_environ)
+        self.validate()
+    def validate(self):
+        current_module = sys.modules[__name__]
+        try:
+            CUDA_USE_NVIDIA_BINDING = current_module.CUDA_USE_NVIDIA_BINDING
+        except AttributeError:
+            CUDA_USE_NVIDIA_BINDING = 0
+        try:
+            CUDA_PER_THREAD_DEFAULT_STREAM = (
+                current_module.CUDA_PER_THREAD_DEFAULT_STREAM
+            )
+        except AttributeError:
+            CUDA_PER_THREAD_DEFAULT_STREAM = 0
+        if CUDA_USE_NVIDIA_BINDING:  # noqa: F821
+            try:
+                import cuda  # noqa: F401
+            except ImportError as ie:
+                msg = (
+                    "CUDA Python bindings requested (the environment "
+                    "variable NUMBA_CUDA_USE_NVIDIA_BINDING is set), "
+                    f"but they are not importable: {ie.msg}."
+                )
+                warnings.warn(msg)
+                current_module.CUDA_USE_NVIDIA_BINDING = 0
+            if CUDA_PER_THREAD_DEFAULT_STREAM:  # noqa: F821
+                warnings.warn(
+                    "PTDS support is handled by CUDA Python when "
+                    "using the NVIDIA binding. Please set the "
+                    "environment variable "
+                    "CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM to 1 "
+                    "instead."
+                )
+    def process_environ(self, environ):
+        def _readenv(name, ctor, default):
+            value = environ.get(name)
+            if value is None:
+                result = default() if callable(default) else default
+            else:
+                try:
+                    result = ctor(value)
+                except Exception:
+                    warnings.warn(
+                        f"Environment variable '{name}' is defined but "
+                        f"its associated value '{value}' could not be "
+                        "parsed.\nThe parse failed with exception:\n"
+                        f"{traceback.format_exc()}",
+                        RuntimeWarning,
+                    )
+                    result = default() if callable(default) else default
+            var_name = name
+            if name.startswith("NUMBA_"):
+                var_name = name[6:]
+            return _EnvVar(result, var_name)
+        def optional_str(x):
+            return str(x) if x is not None else None
+        # Type casting rules selection
+        USE_LEGACY_TYPE_SYSTEM = _readenv(
+            "NUMBA_USE_LEGACY_TYPE_SYSTEM", int, 1
+        )
+        # developer mode produces full tracebacks, disables help instructions
+        DEVELOPER_MODE = _readenv("NUMBA_DEVELOPER_MODE", int, 0)
+        # disable performance warnings, will switch of the generation of
+        # warnings of the class NumbaPerformanceWarning
+        DISABLE_PERFORMANCE_WARNINGS = _readenv(
+            "NUMBA_DISABLE_PERFORMANCE_WARNINGS", int, 0
+        )
+        # Flag to enable full exception reporting
+        FULL_TRACEBACKS = _readenv("NUMBA_FULL_TRACEBACKS", int, DEVELOPER_MODE)
+        # Show help text when an error occurs
+        SHOW_HELP = _readenv("NUMBA_SHOW_HELP", int, 0)
+        # The color scheme to use for error messages, default is no color
+        # just bold fonts in use.
+        COLOR_SCHEME = _readenv("NUMBA_COLOR_SCHEME", str, "no_color")
+        # Whether to globally enable bounds checking. The default None means
+        # to use the value of the flag to @njit. 0 or 1 overrides the flag
+        # globally.
+        BOUNDSCHECK = _readenv("NUMBA_BOUNDSCHECK", int, None)
+        # Whether to always warn about potential uninitialized variables
+        # because static controlflow analysis cannot find a definition
+        # in one or more of the incoming paths.
+        ALWAYS_WARN_UNINIT_VAR = _readenv(
+            "NUMBA_ALWAYS_WARN_UNINIT_VAR",
+            int,
+            0,
+        )
+        # Whether to warn about kernel launches where the grid size will
+        # under utilize the GPU due to low occupancy. On by default.
+        CUDA_LOW_OCCUPANCY_WARNINGS = _readenv(
+            "NUMBA_CUDA_LOW_OCCUPANCY_WARNINGS", int, 1
+        )
+        # Whether to use the official CUDA Python API Bindings
+        CUDA_USE_NVIDIA_BINDING = _readenv(
+            "NUMBA_CUDA_USE_NVIDIA_BINDING", int, 0
+        )
+        # Debug flag to control compiler debug print
+        DEBUG = _readenv("NUMBA_DEBUG", int, 0)
+        # DEBUG print IR after pass names
+        DEBUG_PRINT_AFTER = _readenv("NUMBA_DEBUG_PRINT_AFTER", str, "none")
+        # DEBUG print IR before pass names
+        DEBUG_PRINT_BEFORE = _readenv("NUMBA_DEBUG_PRINT_BEFORE", str, "none")
+        # DEBUG print IR before and after pass names
+        DEBUG_PRINT_WRAP = _readenv("NUMBA_DEBUG_PRINT_WRAP", str, "none")
+        # Highlighting in intermediate dumps
+        HIGHLIGHT_DUMPS = _readenv("NUMBA_HIGHLIGHT_DUMPS", int, 0)
+        # JIT Debug flag to trigger IR instruction print
+        DEBUG_JIT = _readenv("NUMBA_DEBUG_JIT", int, 0)
+        # Enable debugging of front-end operation
+        # (up to and including IR generation)
+        DEBUG_FRONTEND = _readenv("NUMBA_DEBUG_FRONTEND", int, 0)
+        # Enable debug prints in nrtdynmod and use of "safe" API functions
+        DEBUG_NRT = _readenv("NUMBA_DEBUG_NRT", int, 0)
+        # Enable NRT statistics counters
+        NRT_STATS = _readenv("NUMBA_NRT_STATS", int, 0)
+        # Enable NRT statistics
+        CUDA_NRT_STATS = _readenv("NUMBA_CUDA_NRT_STATS", int, 0)
+        # Enable NRT
+        CUDA_ENABLE_NRT = _readenv("NUMBA_CUDA_ENABLE_NRT", int, 0)
+        # How many recently deserialized functions to retain regardless
+        # of external references
+        FUNCTION_CACHE_SIZE = _readenv("NUMBA_FUNCTION_CACHE_SIZE", int, 128)
+        # Maximum tuple size that parfors will unpack and pass to
+        # internal gufunc.
+        PARFOR_MAX_TUPLE_SIZE = _readenv(
+            "NUMBA_PARFOR_MAX_TUPLE_SIZE", int, 100
+        )
+        # Enable logging of cache operation
+        DEBUG_CACHE = _readenv("NUMBA_DEBUG_CACHE", int, DEBUG)
+        # Redirect cache directory
+        # Contains path to the directory
+        CACHE_DIR = _readenv("NUMBA_CACHE_DIR", str, "")
+        # Enable tracing support
+        TRACE = _readenv("NUMBA_TRACE", int, 0)
+        # Enable chrome tracing support
+        CHROME_TRACE = _readenv("NUMBA_CHROME_TRACE", str, "")
+        # Enable debugging of type inference
+        DEBUG_TYPEINFER = _readenv("NUMBA_DEBUG_TYPEINFER", int, 0)
+        # Configure compilation target to use the specified CPU name
+        # and CPU feature as the host information.
+        # Note: this overrides "host" option for AOT compilation.
+        CPU_NAME = _readenv("NUMBA_CPU_NAME", optional_str, None)
+        CPU_FEATURES = _readenv(
+            "NUMBA_CPU_FEATURES",
+            optional_str,
+            ("" if str(CPU_NAME).lower() == "generic" else None),
+        )
+        # Optimization level
+        OPT = _readenv("NUMBA_OPT", _process_opt_level, _OptLevel(3))
+        # Force dump of Python bytecode
+        DUMP_BYTECODE = _readenv("NUMBA_DUMP_BYTECODE", int, DEBUG_FRONTEND)
+        # Force dump of control flow graph
+        DUMP_CFG = _readenv("NUMBA_DUMP_CFG", int, DEBUG_FRONTEND)
+        # Force dump of Numba IR
+        DUMP_IR = _readenv("NUMBA_DUMP_IR", int, DEBUG_FRONTEND)
+        # Force dump of Numba IR in SSA form
+        DUMP_SSA = _readenv(
+            "NUMBA_DUMP_SSA", int, DEBUG_FRONTEND or DEBUG_TYPEINFER
+        )
+        # print debug info of analysis and optimization on array operations
+        DEBUG_ARRAY_OPT = _readenv("NUMBA_DEBUG_ARRAY_OPT", int, 0)
+        # insert debug stmts to print information at runtime
+        DEBUG_ARRAY_OPT_RUNTIME = _readenv(
+            "NUMBA_DEBUG_ARRAY_OPT_RUNTIME", int, 0
+        )
+        # print stats about parallel for-loops
+        DEBUG_ARRAY_OPT_STATS = _readenv("NUMBA_DEBUG_ARRAY_OPT_STATS", int, 0)
+        # prints user friendly information about parallel
+        PARALLEL_DIAGNOSTICS = _readenv("NUMBA_PARALLEL_DIAGNOSTICS", int, 0)
+        # print debug info of inline closure pass
+        DEBUG_INLINE_CLOSURE = _readenv("NUMBA_DEBUG_INLINE_CLOSURE", int, 0)
+        # Force dump of LLVM IR
+        DUMP_LLVM = _readenv("NUMBA_DUMP_LLVM", int, DEBUG)
+        # Force dump of Function optimized LLVM IR
+        DUMP_FUNC_OPT = _readenv("NUMBA_DUMP_FUNC_OPT", int, DEBUG)
+        # Force dump of Optimized LLVM IR
+        DUMP_OPTIMIZED = _readenv("NUMBA_DUMP_OPTIMIZED", int, DEBUG)
+        # Force disable loop vectorize
+        LOOP_VECTORIZE = _readenv("NUMBA_LOOP_VECTORIZE", int, 1)
+        # Enable superword-level parallelism vectorization, default is off
+        # since #8705 (miscompilation).
+        SLP_VECTORIZE = _readenv("NUMBA_SLP_VECTORIZE", int, 0)
+        # Force dump of generated assembly
+        DUMP_ASSEMBLY = _readenv("NUMBA_DUMP_ASSEMBLY", int, DEBUG)
+        # Force dump of type annotation
+        ANNOTATE = _readenv("NUMBA_DUMP_ANNOTATION", int, 0)
+        # Dump IR in such as way as to aid in "diff"ing.
+        DIFF_IR = _readenv("NUMBA_DIFF_IR", int, 0)
+        # Dump type annotation in html format
+        def fmt_html_path(path):
+            if path is None:
+                return path
+            else:
+                return os.path.abspath(path)
+        HTML = _readenv("NUMBA_DUMP_HTML", fmt_html_path, None)
+        # x86-64 specific
+        # Enable AVX on supported platforms where it won't degrade performance.
+        def avx_default():
+            if not _os_supports_avx():
+                return False
+            else:
+                # There are various performance issues with AVX and LLVM
+                # on some CPUs (list at
+                # http://llvm.org/bugs/buglist.cgi?quicksearch=avx).
+                # For now we'd rather disable it, since it can pessimize code
+                cpu_name = CPU_NAME or ll.get_host_cpu_name()
+                disabled_cpus = {
+                    "corei7-avx",
+                    "core-avx-i",
+                    "sandybridge",
+                    "ivybridge",
+                }
+                # Disable known baseline CPU names that virtual machines may
+                # incorrectly report as having AVX support.
+                # This can cause problems with the SVML-pass's use of AVX512.
+                # See https://github.com/numba/numba/issues/9582
+                disabled_cpus |= {"nocona"}
+                return cpu_name not in disabled_cpus
+        ENABLE_AVX = _readenv("NUMBA_ENABLE_AVX", int, avx_default)
+        # if set and SVML is available, it will be disabled
+        # By default, it's disabled on 32-bit platforms.
+        DISABLE_INTEL_SVML = _readenv(
+            "NUMBA_DISABLE_INTEL_SVML", int, IS_32BITS
+        )
+        # Disable jit for debugging
+        DISABLE_JIT = _readenv("NUMBA_DISABLE_JIT", int, 0)
+        # choose parallel backend to use
+        THREADING_LAYER_PRIORITY = _readenv(
+            "NUMBA_THREADING_LAYER_PRIORITY",
+            lambda string: string.split(),
+            ["tbb", "omp", "workqueue"],
+        )
+        THREADING_LAYER = _readenv("NUMBA_THREADING_LAYER", str, "default")
+        # CUDA Configs
+        # Whether to warn about kernel launches where a host array
+        # is used as a parameter, forcing a copy to and from the device.
+        # On by default.
+        CUDA_WARN_ON_IMPLICIT_COPY = _readenv(
+            "NUMBA_CUDA_WARN_ON_IMPLICIT_COPY", int, 1
+        )
+        # Force CUDA compute capability to a specific version
+        FORCE_CUDA_CC = _readenv("NUMBA_FORCE_CUDA_CC", _parse_cc, None)
+        # The default compute capability to target when compiling to PTX.
+        CUDA_DEFAULT_PTX_CC = _readenv(
+            "NUMBA_CUDA_DEFAULT_PTX_CC", _parse_cc, (5, 0)
+        )
+        # Disable CUDA support
+        DISABLE_CUDA = _readenv(
+            "NUMBA_DISABLE_CUDA", int, int(MACHINE_BITS == 32)
+        )
+        # Enable CUDA simulator
+        ENABLE_CUDASIM = _readenv("NUMBA_ENABLE_CUDASIM", int, 0)
+        # CUDA logging level
+        # Any level name from the *logging* module.  Case insensitive.
+        # Defaults to CRITICAL if not set or invalid.
+        # Note: This setting only applies when logging is not configured.
+        #       Any existing logging configuration is preserved.
+        CUDA_LOG_LEVEL = _readenv("NUMBA_CUDA_LOG_LEVEL", str, "")
+        # Include argument values in the CUDA Driver API logs
+        CUDA_LOG_API_ARGS = _readenv("NUMBA_CUDA_LOG_API_ARGS", int, 0)
+        # Maximum number of pending CUDA deallocations (default: 10)
+        CUDA_DEALLOCS_COUNT = _readenv(
+            "NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT", int, 10
+        )
+        # Maximum ratio of pending CUDA deallocations to capacity (default: 0.2)
+        CUDA_DEALLOCS_RATIO = _readenv(
+            "NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO", float, 0.2
+        )
+        CUDA_ARRAY_INTERFACE_SYNC = _readenv(
+            "NUMBA_CUDA_ARRAY_INTERFACE_SYNC", int, 1
+        )
+        # Path of the directory that the CUDA driver libraries are located
+        CUDA_DRIVER = _readenv("NUMBA_CUDA_DRIVER", str, "")
+        # Buffer size for logs produced by CUDA driver operations (e.g.
+        # linking)
+        CUDA_LOG_SIZE = _readenv("NUMBA_CUDA_LOG_SIZE", int, 1024)
+        # Whether to generate verbose log messages when JIT linking
+        CUDA_VERBOSE_JIT_LOG = _readenv("NUMBA_CUDA_VERBOSE_JIT_LOG", int, 1)
+        # Whether the default stream is the per-thread default stream
+        CUDA_PER_THREAD_DEFAULT_STREAM = _readenv(
+            "NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM", int, 0
+        )
+        CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = _readenv(
+            "NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY", int, 0
+        )
+        # Location of the CUDA include files
+        if IS_WIN32:
+            cuda_path = os.environ.get("CUDA_PATH")
+            if cuda_path:
+                default_cuda_include_path = os.path.join(cuda_path, "include")
+            else:
+                default_cuda_include_path = "cuda_include_not_found"
+        else:
+            default_cuda_include_path = os.path.join(
+                os.sep, "usr", "local", "cuda", "include"
+            )
+        CUDA_INCLUDE_PATH = _readenv(
+            "NUMBA_CUDA_INCLUDE_PATH", str, default_cuda_include_path
+        )
+        # Threading settings
+        # The default number of threads to use.
+        def num_threads_default():
+            try:
+                sched_getaffinity = os.sched_getaffinity
+            except AttributeError:
+                pass
+            else:
+                return max(1, len(sched_getaffinity(0)))
+            cpu_count = os.cpu_count()
+            if cpu_count is not None:
+                return max(1, cpu_count)
+            return 1
+        NUMBA_DEFAULT_NUM_THREADS = num_threads_default()
+        # Numba thread pool size (defaults to number of CPUs on the system).
+        _NUMBA_NUM_THREADS = _readenv(
+            "NUMBA_NUM_THREADS", int, NUMBA_DEFAULT_NUM_THREADS
+        )
+        if (
+            "NUMBA_NUM_THREADS" in globals()
+            and globals()["NUMBA_NUM_THREADS"] != _NUMBA_NUM_THREADS
+        ):
+            from numba.np.ufunc import parallel
+            if parallel._is_initialized:
+                raise RuntimeError(
+                    "Cannot set NUMBA_NUM_THREADS to a "
+                    "different value once the threads have been "
+                    "launched (currently have %s, "
+                    "trying to set %s)"
+                    % (_NUMBA_NUM_THREADS, globals()["NUMBA_NUM_THREADS"])
+                )
+        NUMBA_NUM_THREADS = _NUMBA_NUM_THREADS
+        del _NUMBA_NUM_THREADS
+        # sys.monitoring support
+        ENABLE_SYS_MONITORING = _readenv("NUMBA_ENABLE_SYS_MONITORING", int, 0)
+        # Profiling support
+        # Indicates if a profiler detected. Only VTune can be detected for now
+        RUNNING_UNDER_PROFILER = "VS_PROFILER" in os.environ
+        # Enables jit events in LLVM to support profiling of dynamic code
+        ENABLE_PROFILING = _readenv(
+            "NUMBA_ENABLE_PROFILING", int, int(RUNNING_UNDER_PROFILER)
+        )
+        # Debug Info
+        # The default value for the `debug` flag
+        DEBUGINFO_DEFAULT = _readenv("NUMBA_DEBUGINFO", int, ENABLE_PROFILING)
+        CUDA_DEBUGINFO_DEFAULT = _readenv("NUMBA_CUDA_DEBUGINFO", int, 0)
+        EXTEND_VARIABLE_LIFETIMES = _readenv(
+            "NUMBA_EXTEND_VARIABLE_LIFETIMES", int, 0
+        )
+        # gdb binary location
+        def which_gdb(path_or_bin):
+            gdb = shutil.which(path_or_bin)
+            return gdb if gdb is not None else path_or_bin
+        GDB_BINARY = _readenv("NUMBA_GDB_BINARY", which_gdb, "gdb")
+        # CUDA Memory management
+        CUDA_MEMORY_MANAGER = _readenv(
+            "NUMBA_CUDA_MEMORY_MANAGER", str, "default"
+        )
+        # Experimental refprune pass
+        LLVM_REFPRUNE_PASS = _readenv(
+            "NUMBA_LLVM_REFPRUNE_PASS",
+            int,
+            1,
+        )
+        LLVM_REFPRUNE_FLAGS = _readenv(
+            "NUMBA_LLVM_REFPRUNE_FLAGS",
+            str,
+            "all" if LLVM_REFPRUNE_PASS else "",
+        )
+        # llvmlite memory manager
+        USE_LLVMLITE_MEMORY_MANAGER = _readenv(
+            "NUMBA_USE_LLVMLITE_MEMORY_MANAGER", int, None
+        )
+        # Timing support.
+        # LLVM_PASS_TIMINGS enables LLVM recording of pass timings.
+        LLVM_PASS_TIMINGS = _readenv(
+            "NUMBA_LLVM_PASS_TIMINGS",
+            int,
+            0,
+        )
+        # Coverage support.
+        # JIT_COVERAGE (bool) controls whether the compiler report compiled
+        # lines to coverage tools. Defaults to off.
+        JIT_COVERAGE = _readenv(
+            "NUMBA_JIT_COVERAGE",
+            int,
+            0,
+        )
+        # Inject the configuration values into _descriptors
+        if not hasattr(self, "_descriptors"):
+            self._descriptors = {}
+        for name, value in locals().copy().items():
+            if name.isupper():
+                self._descriptors[name] = value
+_env_reloader = _EnvReloader()
+def __getattr__(name):
+    """Module-level __getattr__ provides dynamic behavior for _EnvVar descriptors."""
+    # Fetch non-descriptor globals directly
+    if name in globals():
+        return globals()[name]
+    if (
+        hasattr(_env_reloader, "_descriptors")
+        and name in _env_reloader._descriptors
+    ):
+        return _env_reloader._descriptors[name].__get__()
+    raise AttributeError(f"module {__name__} has no attribute {name}")
+def __setattr__(name, value):
+    """Module-level __setattr__ provides dynamic behavior for _EnvVar descriptors."""
+    # Update non-descriptor globals
+    if name in globals():
+        globals()[name] = value
+        return
+    if (
+        hasattr(_env_reloader, "_descriptors")
+        and name in _env_reloader._descriptors
+    ):
+        _env_reloader._descriptors[name].__set__(value)
+    else:
+        if not hasattr(_env_reloader, "_descriptors"):
+            _env_reloader._descriptors = {}
+        _env_reloader._descriptors[name] = _EnvVar(value, name)
+def reload_config():
+    """
+    Reload the configuration from environment variables, if necessary.
+    """
+    _env_reloader.update()

numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl

Potentially problematic release.

numba-cuda 0.19.1py3-none-any.whl → 0.20.1py3-none-any.whl