numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (172) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +7 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
  129. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  130. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  134. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  139. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  141. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  143. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  146. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  147. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  148. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  151. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  152. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  153. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  154. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  155. numba_cuda/numba/cuda/tests/support.py +55 -15
  156. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  157. numba_cuda/numba/cuda/types.py +56 -0
  158. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  159. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  160. numba_cuda/numba/cuda/typing/context.py +751 -0
  161. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  162. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  163. numba_cuda/numba/cuda/typing/templates.py +7 -6
  164. numba_cuda/numba/cuda/ufuncs.py +3 -3
  165. numba_cuda/numba/cuda/utils.py +6 -112
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
  167. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
  168. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
  172. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,747 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import platform
5
+ import sys
6
+ import os
7
+ import re
8
+ import shutil
9
+ import warnings
10
+ import traceback
11
+
12
+ # YAML needed to use file based Numba config
13
+ try:
14
+ import yaml
15
+
16
+ _HAVE_YAML = True
17
+ except ImportError:
18
+ _HAVE_YAML = False
19
+
20
+
21
+ import llvmlite.binding as ll
22
+
23
+
24
+ IS_WIN32 = sys.platform.startswith("win32")
25
+ IS_OSX = sys.platform.startswith("darwin")
26
+ MACHINE_BITS = tuple.__itemsize__ * 8
27
+ IS_32BITS = MACHINE_BITS == 32
28
+ # Python version in (major, minor) tuple
29
+ PYVERSION = sys.version_info[:2]
30
+
31
+ # this is the name of the user supplied configuration file
32
+ _config_fname = ".numba_config.yaml"
33
+
34
+
35
+ def _parse_cc(text):
36
+ """
37
+ Parse CUDA compute capability version string.
38
+ """
39
+ if not text:
40
+ return None
41
+ else:
42
+ m = re.match(r"(\d+)\.(\d+)", text)
43
+ if not m:
44
+ raise ValueError(
45
+ "Compute capability must be specified as a "
46
+ 'string of "major.minor" where major '
47
+ "and minor are decimals"
48
+ )
49
+ grp = m.groups()
50
+ return int(grp[0]), int(grp[1])
51
+
52
+
53
+ def _os_supports_avx():
54
+ """
55
+ Whether the current OS supports AVX, regardless of the CPU.
56
+
57
+ This is necessary because the user may be running a very old Linux
58
+ kernel (e.g. CentOS 5) on a recent CPU.
59
+ """
60
+ if not sys.platform.startswith("linux") or platform.machine() not in (
61
+ "i386",
62
+ "i586",
63
+ "i686",
64
+ "x86_64",
65
+ ):
66
+ return True
67
+ # Executing the CPUID instruction may report AVX available even though
68
+ # the kernel doesn't support it, so parse /proc/cpuinfo instead.
69
+ try:
70
+ f = open("/proc/cpuinfo", "r")
71
+ except OSError:
72
+ # If /proc isn't available, assume yes
73
+ return True
74
+ with f:
75
+ for line in f:
76
+ head, _, body = line.partition(":")
77
+ if head.strip() == "flags" and "avx" in body.split():
78
+ return True
79
+ else:
80
+ return False
81
+
82
+
83
+ class _OptLevel(int):
84
+ """This class holds the "optimisation level" set in `NUMBA_OPT`. As this env
85
+ var can be an int or a string, but is almost always interpreted as an int,
86
+ this class subclasses int so as to get the common behaviour but stores the
87
+ actual value as a `_raw_value` member. The value "max" is a special case
88
+ and the property `is_opt_max` can be queried to find if the optimisation
89
+ level (supplied value at construction time) is "max"."""
90
+
91
+ def __new__(cls, *args, **kwargs):
92
+ assert len(args) == 1
93
+ (value,) = args
94
+ _int_value = 3 if value == "max" else int(value)
95
+ # the int ctor is always called with an appropriate integer value
96
+ new = super().__new__(cls, _int_value, **kwargs)
97
+ # raw value is max or int
98
+ new._raw_value = value if value == "max" else _int_value
99
+ return new
100
+
101
+ @property
102
+ def is_opt_max(self):
103
+ """Returns True if the the optimisation level is "max" False
104
+ otherwise."""
105
+ return self._raw_value == "max"
106
+
107
+ def __repr__(self):
108
+ if isinstance(self._raw_value, str):
109
+ arg = f"'{self._raw_value}'"
110
+ else:
111
+ arg = self._raw_value
112
+ return f"_OptLevel({arg})"
113
+
114
+
115
+ def _process_opt_level(opt_level):
116
+ if opt_level not in ("0", "1", "2", "3", "max"):
117
+ msg = (
118
+ "Environment variable `NUMBA_OPT` is set to an unsupported "
119
+ f"value '{opt_level}', supported values are 0, 1, 2, 3, and "
120
+ "'max'"
121
+ )
122
+ raise ValueError(msg)
123
+ else:
124
+ return _OptLevel(opt_level)
125
+
126
+
127
+ class _EnvVar(object):
128
+ """Descriptor for configuration values that checks numba.config on access."""
129
+
130
+ def __init__(self, value, name):
131
+ self.name = name
132
+ if isinstance(value, _EnvVar):
133
+ self.value = value.__get__()
134
+ else:
135
+ self.value = value
136
+ self.check_numba_config()
137
+
138
+ def check_numba_config(self):
139
+ """Check for conflicting value in numba.config and emit deprecation warning."""
140
+ try:
141
+ from numba import config as numba_config
142
+
143
+ if hasattr(numba_config, self.name):
144
+ config_value = getattr(numba_config, self.name)
145
+ if config_value != self.value:
146
+ msg = (
147
+ f"Configuration value '{self.name}' is explicitly set "
148
+ f"to `{config_value}` in numba.config. "
149
+ "numba.config is deprecated for numba-cuda "
150
+ "and support for configuration values from it "
151
+ "will be removed in a future release. "
152
+ "Please use numba.cuda.config."
153
+ )
154
+ warnings.warn(msg, category=DeprecationWarning)
155
+ self.value = config_value
156
+ else:
157
+ # Initialize any missing variables in numba.config
158
+ setattr(numba_config, self.name, self.value)
159
+ except ImportError:
160
+ pass
161
+
162
+ def __get__(self):
163
+ self.check_numba_config()
164
+ return self.value
165
+
166
+ def __set__(self, value):
167
+ self.value = value
168
+
169
+
170
+ class _EnvReloader(object):
171
+ def __init__(self):
172
+ self.reset()
173
+
174
+ def reset(self):
175
+ self.old_environ = {}
176
+ self.update(force=True)
177
+
178
+ def update(self, force=False):
179
+ new_environ = {}
180
+
181
+ # first check if there's a .numba_config.yaml and use values from that
182
+ if os.path.exists(_config_fname) and os.path.isfile(_config_fname):
183
+ if not _HAVE_YAML:
184
+ msg = (
185
+ "A Numba config file is found but YAML parsing "
186
+ "capabilities appear to be missing. "
187
+ "To use this feature please install `pyyaml`. e.g. "
188
+ "`conda install pyyaml`."
189
+ )
190
+ warnings.warn(msg)
191
+ else:
192
+ with open(_config_fname, "rt") as f:
193
+ y_conf = yaml.safe_load(f)
194
+ if y_conf is not None:
195
+ for k, v in y_conf.items():
196
+ new_environ["NUMBA_" + k.upper()] = v
197
+
198
+ # clobber file based config with any locally defined env vars
199
+ for name, value in os.environ.items():
200
+ if name.startswith("NUMBA_"):
201
+ new_environ[name] = value
202
+ # We update the config variables if at least one NUMBA environment
203
+ # variable was modified. This lets the user modify values
204
+ # directly in the config module without having them when
205
+ # reload_config() is called by the compiler.
206
+ if force or self.old_environ != new_environ:
207
+ self.process_environ(new_environ)
208
+ # Store a copy
209
+ self.old_environ = dict(new_environ)
210
+
211
+ self.validate()
212
+
213
+ def validate(self):
214
+ current_module = sys.modules[__name__]
215
+ try:
216
+ CUDA_USE_NVIDIA_BINDING = current_module.CUDA_USE_NVIDIA_BINDING
217
+ except AttributeError:
218
+ CUDA_USE_NVIDIA_BINDING = 0
219
+
220
+ try:
221
+ CUDA_PER_THREAD_DEFAULT_STREAM = (
222
+ current_module.CUDA_PER_THREAD_DEFAULT_STREAM
223
+ )
224
+ except AttributeError:
225
+ CUDA_PER_THREAD_DEFAULT_STREAM = 0
226
+
227
+ if CUDA_USE_NVIDIA_BINDING: # noqa: F821
228
+ try:
229
+ import cuda # noqa: F401
230
+ except ImportError as ie:
231
+ msg = (
232
+ "CUDA Python bindings requested (the environment "
233
+ "variable NUMBA_CUDA_USE_NVIDIA_BINDING is set), "
234
+ f"but they are not importable: {ie.msg}."
235
+ )
236
+ warnings.warn(msg)
237
+
238
+ current_module.CUDA_USE_NVIDIA_BINDING = 0
239
+
240
+ if CUDA_PER_THREAD_DEFAULT_STREAM: # noqa: F821
241
+ warnings.warn(
242
+ "PTDS support is handled by CUDA Python when "
243
+ "using the NVIDIA binding. Please set the "
244
+ "environment variable "
245
+ "CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM to 1 "
246
+ "instead."
247
+ )
248
+
249
+ def process_environ(self, environ):
250
+ def _readenv(name, ctor, default):
251
+ value = environ.get(name)
252
+ if value is None:
253
+ result = default() if callable(default) else default
254
+ else:
255
+ try:
256
+ result = ctor(value)
257
+ except Exception:
258
+ warnings.warn(
259
+ f"Environment variable '{name}' is defined but "
260
+ f"its associated value '{value}' could not be "
261
+ "parsed.\nThe parse failed with exception:\n"
262
+ f"{traceback.format_exc()}",
263
+ RuntimeWarning,
264
+ )
265
+ result = default() if callable(default) else default
266
+ var_name = name
267
+ if name.startswith("NUMBA_"):
268
+ var_name = name[6:]
269
+ return _EnvVar(result, var_name)
270
+
271
+ def optional_str(x):
272
+ return str(x) if x is not None else None
273
+
274
+ # Type casting rules selection
275
+ USE_LEGACY_TYPE_SYSTEM = _readenv(
276
+ "NUMBA_USE_LEGACY_TYPE_SYSTEM", int, 1
277
+ )
278
+
279
+ # developer mode produces full tracebacks, disables help instructions
280
+ DEVELOPER_MODE = _readenv("NUMBA_DEVELOPER_MODE", int, 0)
281
+
282
+ # disable performance warnings, will switch of the generation of
283
+ # warnings of the class NumbaPerformanceWarning
284
+ DISABLE_PERFORMANCE_WARNINGS = _readenv(
285
+ "NUMBA_DISABLE_PERFORMANCE_WARNINGS", int, 0
286
+ )
287
+
288
+ # Flag to enable full exception reporting
289
+ FULL_TRACEBACKS = _readenv("NUMBA_FULL_TRACEBACKS", int, DEVELOPER_MODE)
290
+
291
+ # Show help text when an error occurs
292
+ SHOW_HELP = _readenv("NUMBA_SHOW_HELP", int, 0)
293
+
294
+ # The color scheme to use for error messages, default is no color
295
+ # just bold fonts in use.
296
+ COLOR_SCHEME = _readenv("NUMBA_COLOR_SCHEME", str, "no_color")
297
+
298
+ # Whether to globally enable bounds checking. The default None means
299
+ # to use the value of the flag to @njit. 0 or 1 overrides the flag
300
+ # globally.
301
+ BOUNDSCHECK = _readenv("NUMBA_BOUNDSCHECK", int, None)
302
+
303
+ # Whether to always warn about potential uninitialized variables
304
+ # because static controlflow analysis cannot find a definition
305
+ # in one or more of the incoming paths.
306
+ ALWAYS_WARN_UNINIT_VAR = _readenv(
307
+ "NUMBA_ALWAYS_WARN_UNINIT_VAR",
308
+ int,
309
+ 0,
310
+ )
311
+
312
+ # Whether to warn about kernel launches where the grid size will
313
+ # under utilize the GPU due to low occupancy. On by default.
314
+ CUDA_LOW_OCCUPANCY_WARNINGS = _readenv(
315
+ "NUMBA_CUDA_LOW_OCCUPANCY_WARNINGS", int, 1
316
+ )
317
+
318
+ # Whether to use the official CUDA Python API Bindings
319
+ CUDA_USE_NVIDIA_BINDING = _readenv(
320
+ "NUMBA_CUDA_USE_NVIDIA_BINDING", int, 0
321
+ )
322
+
323
+ # Debug flag to control compiler debug print
324
+ DEBUG = _readenv("NUMBA_DEBUG", int, 0)
325
+
326
+ # DEBUG print IR after pass names
327
+ DEBUG_PRINT_AFTER = _readenv("NUMBA_DEBUG_PRINT_AFTER", str, "none")
328
+
329
+ # DEBUG print IR before pass names
330
+ DEBUG_PRINT_BEFORE = _readenv("NUMBA_DEBUG_PRINT_BEFORE", str, "none")
331
+
332
+ # DEBUG print IR before and after pass names
333
+ DEBUG_PRINT_WRAP = _readenv("NUMBA_DEBUG_PRINT_WRAP", str, "none")
334
+
335
+ # Highlighting in intermediate dumps
336
+ HIGHLIGHT_DUMPS = _readenv("NUMBA_HIGHLIGHT_DUMPS", int, 0)
337
+
338
+ # JIT Debug flag to trigger IR instruction print
339
+ DEBUG_JIT = _readenv("NUMBA_DEBUG_JIT", int, 0)
340
+
341
+ # Enable debugging of front-end operation
342
+ # (up to and including IR generation)
343
+ DEBUG_FRONTEND = _readenv("NUMBA_DEBUG_FRONTEND", int, 0)
344
+
345
+ # Enable debug prints in nrtdynmod and use of "safe" API functions
346
+ DEBUG_NRT = _readenv("NUMBA_DEBUG_NRT", int, 0)
347
+
348
+ # Enable NRT statistics counters
349
+ NRT_STATS = _readenv("NUMBA_NRT_STATS", int, 0)
350
+
351
+ # Enable NRT statistics
352
+ CUDA_NRT_STATS = _readenv("NUMBA_CUDA_NRT_STATS", int, 0)
353
+
354
+ # Enable NRT
355
+ CUDA_ENABLE_NRT = _readenv("NUMBA_CUDA_ENABLE_NRT", int, 0)
356
+
357
+ # How many recently deserialized functions to retain regardless
358
+ # of external references
359
+ FUNCTION_CACHE_SIZE = _readenv("NUMBA_FUNCTION_CACHE_SIZE", int, 128)
360
+
361
+ # Maximum tuple size that parfors will unpack and pass to
362
+ # internal gufunc.
363
+ PARFOR_MAX_TUPLE_SIZE = _readenv(
364
+ "NUMBA_PARFOR_MAX_TUPLE_SIZE", int, 100
365
+ )
366
+
367
+ # Enable logging of cache operation
368
+ DEBUG_CACHE = _readenv("NUMBA_DEBUG_CACHE", int, DEBUG)
369
+
370
+ # Redirect cache directory
371
+ # Contains path to the directory
372
+ CACHE_DIR = _readenv("NUMBA_CACHE_DIR", str, "")
373
+
374
+ # Enable tracing support
375
+ TRACE = _readenv("NUMBA_TRACE", int, 0)
376
+
377
+ # Enable chrome tracing support
378
+ CHROME_TRACE = _readenv("NUMBA_CHROME_TRACE", str, "")
379
+
380
+ # Enable debugging of type inference
381
+ DEBUG_TYPEINFER = _readenv("NUMBA_DEBUG_TYPEINFER", int, 0)
382
+
383
+ # Configure compilation target to use the specified CPU name
384
+ # and CPU feature as the host information.
385
+ # Note: this overrides "host" option for AOT compilation.
386
+ CPU_NAME = _readenv("NUMBA_CPU_NAME", optional_str, None)
387
+ CPU_FEATURES = _readenv(
388
+ "NUMBA_CPU_FEATURES",
389
+ optional_str,
390
+ ("" if str(CPU_NAME).lower() == "generic" else None),
391
+ )
392
+ # Optimization level
393
+ OPT = _readenv("NUMBA_OPT", _process_opt_level, _OptLevel(3))
394
+
395
+ # Force dump of Python bytecode
396
+ DUMP_BYTECODE = _readenv("NUMBA_DUMP_BYTECODE", int, DEBUG_FRONTEND)
397
+
398
+ # Force dump of control flow graph
399
+ DUMP_CFG = _readenv("NUMBA_DUMP_CFG", int, DEBUG_FRONTEND)
400
+
401
+ # Force dump of Numba IR
402
+ DUMP_IR = _readenv("NUMBA_DUMP_IR", int, DEBUG_FRONTEND)
403
+
404
+ # Force dump of Numba IR in SSA form
405
+ DUMP_SSA = _readenv(
406
+ "NUMBA_DUMP_SSA", int, DEBUG_FRONTEND or DEBUG_TYPEINFER
407
+ )
408
+
409
+ # print debug info of analysis and optimization on array operations
410
+ DEBUG_ARRAY_OPT = _readenv("NUMBA_DEBUG_ARRAY_OPT", int, 0)
411
+
412
+ # insert debug stmts to print information at runtime
413
+ DEBUG_ARRAY_OPT_RUNTIME = _readenv(
414
+ "NUMBA_DEBUG_ARRAY_OPT_RUNTIME", int, 0
415
+ )
416
+
417
+ # print stats about parallel for-loops
418
+ DEBUG_ARRAY_OPT_STATS = _readenv("NUMBA_DEBUG_ARRAY_OPT_STATS", int, 0)
419
+
420
+ # prints user friendly information about parallel
421
+ PARALLEL_DIAGNOSTICS = _readenv("NUMBA_PARALLEL_DIAGNOSTICS", int, 0)
422
+
423
+ # print debug info of inline closure pass
424
+ DEBUG_INLINE_CLOSURE = _readenv("NUMBA_DEBUG_INLINE_CLOSURE", int, 0)
425
+
426
+ # Force dump of LLVM IR
427
+ DUMP_LLVM = _readenv("NUMBA_DUMP_LLVM", int, DEBUG)
428
+
429
+ # Force dump of Function optimized LLVM IR
430
+ DUMP_FUNC_OPT = _readenv("NUMBA_DUMP_FUNC_OPT", int, DEBUG)
431
+
432
+ # Force dump of Optimized LLVM IR
433
+ DUMP_OPTIMIZED = _readenv("NUMBA_DUMP_OPTIMIZED", int, DEBUG)
434
+
435
+ # Force disable loop vectorize
436
+ LOOP_VECTORIZE = _readenv("NUMBA_LOOP_VECTORIZE", int, 1)
437
+
438
+ # Enable superword-level parallelism vectorization, default is off
439
+ # since #8705 (miscompilation).
440
+ SLP_VECTORIZE = _readenv("NUMBA_SLP_VECTORIZE", int, 0)
441
+
442
+ # Force dump of generated assembly
443
+ DUMP_ASSEMBLY = _readenv("NUMBA_DUMP_ASSEMBLY", int, DEBUG)
444
+
445
+ # Force dump of type annotation
446
+ ANNOTATE = _readenv("NUMBA_DUMP_ANNOTATION", int, 0)
447
+
448
+ # Dump IR in such as way as to aid in "diff"ing.
449
+ DIFF_IR = _readenv("NUMBA_DIFF_IR", int, 0)
450
+
451
+ # Dump type annotation in html format
452
+ def fmt_html_path(path):
453
+ if path is None:
454
+ return path
455
+ else:
456
+ return os.path.abspath(path)
457
+
458
+ HTML = _readenv("NUMBA_DUMP_HTML", fmt_html_path, None)
459
+
460
+ # x86-64 specific
461
+ # Enable AVX on supported platforms where it won't degrade performance.
462
+ def avx_default():
463
+ if not _os_supports_avx():
464
+ return False
465
+ else:
466
+ # There are various performance issues with AVX and LLVM
467
+ # on some CPUs (list at
468
+ # http://llvm.org/bugs/buglist.cgi?quicksearch=avx).
469
+ # For now we'd rather disable it, since it can pessimize code
470
+ cpu_name = CPU_NAME or ll.get_host_cpu_name()
471
+ disabled_cpus = {
472
+ "corei7-avx",
473
+ "core-avx-i",
474
+ "sandybridge",
475
+ "ivybridge",
476
+ }
477
+ # Disable known baseline CPU names that virtual machines may
478
+ # incorrectly report as having AVX support.
479
+ # This can cause problems with the SVML-pass's use of AVX512.
480
+ # See https://github.com/numba/numba/issues/9582
481
+ disabled_cpus |= {"nocona"}
482
+ return cpu_name not in disabled_cpus
483
+
484
+ ENABLE_AVX = _readenv("NUMBA_ENABLE_AVX", int, avx_default)
485
+
486
+ # if set and SVML is available, it will be disabled
487
+ # By default, it's disabled on 32-bit platforms.
488
+ DISABLE_INTEL_SVML = _readenv(
489
+ "NUMBA_DISABLE_INTEL_SVML", int, IS_32BITS
490
+ )
491
+
492
+ # Disable jit for debugging
493
+ DISABLE_JIT = _readenv("NUMBA_DISABLE_JIT", int, 0)
494
+
495
+ # choose parallel backend to use
496
+ THREADING_LAYER_PRIORITY = _readenv(
497
+ "NUMBA_THREADING_LAYER_PRIORITY",
498
+ lambda string: string.split(),
499
+ ["tbb", "omp", "workqueue"],
500
+ )
501
+ THREADING_LAYER = _readenv("NUMBA_THREADING_LAYER", str, "default")
502
+
503
+ # CUDA Configs
504
+
505
+ # Whether to warn about kernel launches where a host array
506
+ # is used as a parameter, forcing a copy to and from the device.
507
+ # On by default.
508
+ CUDA_WARN_ON_IMPLICIT_COPY = _readenv(
509
+ "NUMBA_CUDA_WARN_ON_IMPLICIT_COPY", int, 1
510
+ )
511
+
512
+ # Force CUDA compute capability to a specific version
513
+ FORCE_CUDA_CC = _readenv("NUMBA_FORCE_CUDA_CC", _parse_cc, None)
514
+
515
+ # The default compute capability to target when compiling to PTX.
516
+ CUDA_DEFAULT_PTX_CC = _readenv(
517
+ "NUMBA_CUDA_DEFAULT_PTX_CC", _parse_cc, (5, 0)
518
+ )
519
+
520
+ # Disable CUDA support
521
+ DISABLE_CUDA = _readenv(
522
+ "NUMBA_DISABLE_CUDA", int, int(MACHINE_BITS == 32)
523
+ )
524
+
525
+ # Enable CUDA simulator
526
+ ENABLE_CUDASIM = _readenv("NUMBA_ENABLE_CUDASIM", int, 0)
527
+
528
+ # CUDA logging level
529
+ # Any level name from the *logging* module. Case insensitive.
530
+ # Defaults to CRITICAL if not set or invalid.
531
+ # Note: This setting only applies when logging is not configured.
532
+ # Any existing logging configuration is preserved.
533
+ CUDA_LOG_LEVEL = _readenv("NUMBA_CUDA_LOG_LEVEL", str, "")
534
+
535
+ # Include argument values in the CUDA Driver API logs
536
+ CUDA_LOG_API_ARGS = _readenv("NUMBA_CUDA_LOG_API_ARGS", int, 0)
537
+
538
+ # Maximum number of pending CUDA deallocations (default: 10)
539
+ CUDA_DEALLOCS_COUNT = _readenv(
540
+ "NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT", int, 10
541
+ )
542
+
543
+ # Maximum ratio of pending CUDA deallocations to capacity (default: 0.2)
544
+ CUDA_DEALLOCS_RATIO = _readenv(
545
+ "NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO", float, 0.2
546
+ )
547
+
548
+ CUDA_ARRAY_INTERFACE_SYNC = _readenv(
549
+ "NUMBA_CUDA_ARRAY_INTERFACE_SYNC", int, 1
550
+ )
551
+
552
+ # Path of the directory that the CUDA driver libraries are located
553
+ CUDA_DRIVER = _readenv("NUMBA_CUDA_DRIVER", str, "")
554
+
555
+ # Buffer size for logs produced by CUDA driver operations (e.g.
556
+ # linking)
557
+ CUDA_LOG_SIZE = _readenv("NUMBA_CUDA_LOG_SIZE", int, 1024)
558
+
559
+ # Whether to generate verbose log messages when JIT linking
560
+ CUDA_VERBOSE_JIT_LOG = _readenv("NUMBA_CUDA_VERBOSE_JIT_LOG", int, 1)
561
+
562
+ # Whether the default stream is the per-thread default stream
563
+ CUDA_PER_THREAD_DEFAULT_STREAM = _readenv(
564
+ "NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM", int, 0
565
+ )
566
+
567
+ CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = _readenv(
568
+ "NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY", int, 0
569
+ )
570
+
571
+ # Location of the CUDA include files
572
+ if IS_WIN32:
573
+ cuda_path = os.environ.get("CUDA_PATH")
574
+ if cuda_path:
575
+ default_cuda_include_path = os.path.join(cuda_path, "include")
576
+ else:
577
+ default_cuda_include_path = "cuda_include_not_found"
578
+ else:
579
+ default_cuda_include_path = os.path.join(
580
+ os.sep, "usr", "local", "cuda", "include"
581
+ )
582
+ CUDA_INCLUDE_PATH = _readenv(
583
+ "NUMBA_CUDA_INCLUDE_PATH", str, default_cuda_include_path
584
+ )
585
+
586
+ # Threading settings
587
+
588
+ # The default number of threads to use.
589
+ def num_threads_default():
590
+ try:
591
+ sched_getaffinity = os.sched_getaffinity
592
+ except AttributeError:
593
+ pass
594
+ else:
595
+ return max(1, len(sched_getaffinity(0)))
596
+
597
+ cpu_count = os.cpu_count()
598
+ if cpu_count is not None:
599
+ return max(1, cpu_count)
600
+
601
+ return 1
602
+
603
+ NUMBA_DEFAULT_NUM_THREADS = num_threads_default()
604
+
605
+ # Numba thread pool size (defaults to number of CPUs on the system).
606
+ _NUMBA_NUM_THREADS = _readenv(
607
+ "NUMBA_NUM_THREADS", int, NUMBA_DEFAULT_NUM_THREADS
608
+ )
609
+ if (
610
+ "NUMBA_NUM_THREADS" in globals()
611
+ and globals()["NUMBA_NUM_THREADS"] != _NUMBA_NUM_THREADS
612
+ ):
613
+ from numba.np.ufunc import parallel
614
+
615
+ if parallel._is_initialized:
616
+ raise RuntimeError(
617
+ "Cannot set NUMBA_NUM_THREADS to a "
618
+ "different value once the threads have been "
619
+ "launched (currently have %s, "
620
+ "trying to set %s)"
621
+ % (_NUMBA_NUM_THREADS, globals()["NUMBA_NUM_THREADS"])
622
+ )
623
+
624
+ NUMBA_NUM_THREADS = _NUMBA_NUM_THREADS
625
+ del _NUMBA_NUM_THREADS
626
+
627
+ # sys.monitoring support
628
+ ENABLE_SYS_MONITORING = _readenv("NUMBA_ENABLE_SYS_MONITORING", int, 0)
629
+
630
+ # Profiling support
631
+
632
+ # Indicates if a profiler detected. Only VTune can be detected for now
633
+ RUNNING_UNDER_PROFILER = "VS_PROFILER" in os.environ
634
+
635
+ # Enables jit events in LLVM to support profiling of dynamic code
636
+ ENABLE_PROFILING = _readenv(
637
+ "NUMBA_ENABLE_PROFILING", int, int(RUNNING_UNDER_PROFILER)
638
+ )
639
+
640
+ # Debug Info
641
+
642
+ # The default value for the `debug` flag
643
+ DEBUGINFO_DEFAULT = _readenv("NUMBA_DEBUGINFO", int, ENABLE_PROFILING)
644
+ CUDA_DEBUGINFO_DEFAULT = _readenv("NUMBA_CUDA_DEBUGINFO", int, 0)
645
+
646
+ EXTEND_VARIABLE_LIFETIMES = _readenv(
647
+ "NUMBA_EXTEND_VARIABLE_LIFETIMES", int, 0
648
+ )
649
+
650
+ # gdb binary location
651
+ def which_gdb(path_or_bin):
652
+ gdb = shutil.which(path_or_bin)
653
+ return gdb if gdb is not None else path_or_bin
654
+
655
+ GDB_BINARY = _readenv("NUMBA_GDB_BINARY", which_gdb, "gdb")
656
+
657
+ # CUDA Memory management
658
+ CUDA_MEMORY_MANAGER = _readenv(
659
+ "NUMBA_CUDA_MEMORY_MANAGER", str, "default"
660
+ )
661
+
662
+ # Experimental refprune pass
663
+ LLVM_REFPRUNE_PASS = _readenv(
664
+ "NUMBA_LLVM_REFPRUNE_PASS",
665
+ int,
666
+ 1,
667
+ )
668
+ LLVM_REFPRUNE_FLAGS = _readenv(
669
+ "NUMBA_LLVM_REFPRUNE_FLAGS",
670
+ str,
671
+ "all" if LLVM_REFPRUNE_PASS else "",
672
+ )
673
+
674
+ # llvmlite memory manager
675
+ USE_LLVMLITE_MEMORY_MANAGER = _readenv(
676
+ "NUMBA_USE_LLVMLITE_MEMORY_MANAGER", int, None
677
+ )
678
+
679
+ # Timing support.
680
+
681
+ # LLVM_PASS_TIMINGS enables LLVM recording of pass timings.
682
+ LLVM_PASS_TIMINGS = _readenv(
683
+ "NUMBA_LLVM_PASS_TIMINGS",
684
+ int,
685
+ 0,
686
+ )
687
+
688
+ # Coverage support.
689
+
690
+ # JIT_COVERAGE (bool) controls whether the compiler report compiled
691
+ # lines to coverage tools. Defaults to off.
692
+ JIT_COVERAGE = _readenv(
693
+ "NUMBA_JIT_COVERAGE",
694
+ int,
695
+ 0,
696
+ )
697
+
698
+ # Inject the configuration values into _descriptors
699
+ if not hasattr(self, "_descriptors"):
700
+ self._descriptors = {}
701
+
702
+ for name, value in locals().copy().items():
703
+ if name.isupper():
704
+ self._descriptors[name] = value
705
+
706
+
707
+ _env_reloader = _EnvReloader()
708
+
709
+
710
+ def __getattr__(name):
711
+ """Module-level __getattr__ provides dynamic behavior for _EnvVar descriptors."""
712
+ # Fetch non-descriptor globals directly
713
+ if name in globals():
714
+ return globals()[name]
715
+
716
+ if (
717
+ hasattr(_env_reloader, "_descriptors")
718
+ and name in _env_reloader._descriptors
719
+ ):
720
+ return _env_reloader._descriptors[name].__get__()
721
+
722
+ raise AttributeError(f"module {__name__} has no attribute {name}")
723
+
724
+
725
+ def __setattr__(name, value):
726
+ """Module-level __setattr__ provides dynamic behavior for _EnvVar descriptors."""
727
+ # Update non-descriptor globals
728
+ if name in globals():
729
+ globals()[name] = value
730
+ return
731
+
732
+ if (
733
+ hasattr(_env_reloader, "_descriptors")
734
+ and name in _env_reloader._descriptors
735
+ ):
736
+ _env_reloader._descriptors[name].__set__(value)
737
+ else:
738
+ if not hasattr(_env_reloader, "_descriptors"):
739
+ _env_reloader._descriptors = {}
740
+ _env_reloader._descriptors[name] = _EnvVar(value, name)
741
+
742
+
743
+ def reload_config():
744
+ """
745
+ Reload the configuration from environment variables, if necessary.
746
+ """
747
+ _env_reloader.update()