numba-cuda 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +0 -8
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +14225 -0
- numba_cuda/numba/cuda/api_util.py +6 -0
- numba_cuda/numba/cuda/cgutils.py +1291 -0
- numba_cuda/numba/cuda/codegen.py +32 -14
- numba_cuda/numba/cuda/compiler.py +113 -10
- numba_cuda/numba/cuda/core/caching.py +741 -0
- numba_cuda/numba/cuda/core/callconv.py +338 -0
- numba_cuda/numba/cuda/core/codegen.py +168 -0
- numba_cuda/numba/cuda/core/compiler.py +205 -0
- numba_cuda/numba/cuda/core/typed_passes.py +139 -0
- numba_cuda/numba/cuda/cudadecl.py +0 -268
- numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +2 -1
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +4 -178
- numba_cuda/numba/cuda/debuginfo.py +469 -3
- numba_cuda/numba/cuda/device_init.py +0 -1
- numba_cuda/numba/cuda/dispatcher.py +309 -11
- numba_cuda/numba/cuda/extending.py +2 -1
- numba_cuda/numba/cuda/fp16.py +348 -0
- numba_cuda/numba/cuda/intrinsics.py +1 -1
- numba_cuda/numba/cuda/libdeviceimpl.py +2 -1
- numba_cuda/numba/cuda/lowering.py +1833 -8
- numba_cuda/numba/cuda/mathimpl.py +2 -90
- numba_cuda/numba/cuda/nvvmutils.py +2 -1
- numba_cuda/numba/cuda/printimpl.py +2 -1
- numba_cuda/numba/cuda/serialize.py +264 -0
- numba_cuda/numba/cuda/simulator/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +7 -0
- numba_cuda/numba/cuda/stubs.py +0 -308
- numba_cuda/numba/cuda/target.py +13 -5
- numba_cuda/numba/cuda/testing.py +156 -5
- numba_cuda/numba/cuda/tests/complex_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +110 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +359 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +33 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +5 -10
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +381 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +94 -24
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +37 -23
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +43 -27
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +26 -9
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +27 -2
- numba_cuda/numba/cuda/tests/enum_usecases.py +56 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +1 -2
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +1 -1
- numba_cuda/numba/cuda/utils.py +785 -0
- numba_cuda/numba/cuda/vector_types.py +1 -1
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/METADATA +18 -4
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/RECORD +61 -48
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -46
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/top_level.txt +0 -0
numba_cuda/numba/cuda/utils.py
CHANGED
|
@@ -3,6 +3,791 @@ import warnings
|
|
|
3
3
|
import traceback
|
|
4
4
|
import functools
|
|
5
5
|
|
|
6
|
+
import atexit
|
|
7
|
+
import builtins
|
|
8
|
+
import inspect
|
|
9
|
+
import operator
|
|
10
|
+
import timeit
|
|
11
|
+
import math
|
|
12
|
+
import sys
|
|
13
|
+
import weakref
|
|
14
|
+
import threading
|
|
15
|
+
import contextlib
|
|
16
|
+
import json
|
|
17
|
+
import typing as _tp
|
|
18
|
+
from pprint import pformat
|
|
19
|
+
|
|
20
|
+
from types import ModuleType
|
|
21
|
+
from importlib import import_module
|
|
22
|
+
import numpy as np
|
|
23
|
+
|
|
24
|
+
from inspect import signature as pysignature # noqa: F401
|
|
25
|
+
from inspect import Signature as pySignature # noqa: F401
|
|
26
|
+
from inspect import Parameter as pyParameter # noqa: F401
|
|
27
|
+
|
|
28
|
+
from numba.core.config import (
|
|
29
|
+
MACHINE_BITS, # noqa: F401
|
|
30
|
+
DEVELOPER_MODE,
|
|
31
|
+
) # noqa: F401
|
|
32
|
+
from numba.core import types, config
|
|
33
|
+
|
|
34
|
+
from collections.abc import Mapping, Sequence, MutableSet, MutableMapping
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def erase_traceback(exc_value):
|
|
38
|
+
"""
|
|
39
|
+
Erase the traceback and hanging locals from the given exception instance.
|
|
40
|
+
"""
|
|
41
|
+
if exc_value.__traceback__ is not None:
|
|
42
|
+
traceback.clear_frames(exc_value.__traceback__)
|
|
43
|
+
return exc_value.with_traceback(None)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def safe_relpath(path, start=os.curdir):
|
|
47
|
+
"""
|
|
48
|
+
Produces a "safe" relative path, on windows relpath doesn't work across
|
|
49
|
+
drives as technically they don't share the same root.
|
|
50
|
+
See: https://bugs.python.org/issue7195 for details.
|
|
51
|
+
"""
|
|
52
|
+
# find the drive letters for path and start and if they are not the same
|
|
53
|
+
# then don't use relpath!
|
|
54
|
+
drive_letter = lambda x: os.path.splitdrive(os.path.abspath(x))[0]
|
|
55
|
+
drive_path = drive_letter(path)
|
|
56
|
+
drive_start = drive_letter(start)
|
|
57
|
+
if drive_path != drive_start:
|
|
58
|
+
return os.path.abspath(path)
|
|
59
|
+
else:
|
|
60
|
+
return os.path.relpath(path, start=start)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# Mapping between operator module functions and the corresponding built-in
|
|
64
|
+
# operators.
|
|
65
|
+
|
|
66
|
+
BINOPS_TO_OPERATORS = {
|
|
67
|
+
"+": operator.add,
|
|
68
|
+
"-": operator.sub,
|
|
69
|
+
"*": operator.mul,
|
|
70
|
+
"//": operator.floordiv,
|
|
71
|
+
"/": operator.truediv,
|
|
72
|
+
"%": operator.mod,
|
|
73
|
+
"**": operator.pow,
|
|
74
|
+
"&": operator.and_,
|
|
75
|
+
"|": operator.or_,
|
|
76
|
+
"^": operator.xor,
|
|
77
|
+
"<<": operator.lshift,
|
|
78
|
+
">>": operator.rshift,
|
|
79
|
+
"==": operator.eq,
|
|
80
|
+
"!=": operator.ne,
|
|
81
|
+
"<": operator.lt,
|
|
82
|
+
"<=": operator.le,
|
|
83
|
+
">": operator.gt,
|
|
84
|
+
">=": operator.ge,
|
|
85
|
+
"is": operator.is_,
|
|
86
|
+
"is not": operator.is_not,
|
|
87
|
+
# This one has its args reversed!
|
|
88
|
+
"in": operator.contains,
|
|
89
|
+
"@": operator.matmul,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
INPLACE_BINOPS_TO_OPERATORS = {
|
|
93
|
+
"+=": operator.iadd,
|
|
94
|
+
"-=": operator.isub,
|
|
95
|
+
"*=": operator.imul,
|
|
96
|
+
"//=": operator.ifloordiv,
|
|
97
|
+
"/=": operator.itruediv,
|
|
98
|
+
"%=": operator.imod,
|
|
99
|
+
"**=": operator.ipow,
|
|
100
|
+
"&=": operator.iand,
|
|
101
|
+
"|=": operator.ior,
|
|
102
|
+
"^=": operator.ixor,
|
|
103
|
+
"<<=": operator.ilshift,
|
|
104
|
+
">>=": operator.irshift,
|
|
105
|
+
"@=": operator.imatmul,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
ALL_BINOPS_TO_OPERATORS = {**BINOPS_TO_OPERATORS, **INPLACE_BINOPS_TO_OPERATORS}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
UNARY_BUITINS_TO_OPERATORS = {
|
|
113
|
+
"+": operator.pos,
|
|
114
|
+
"-": operator.neg,
|
|
115
|
+
"~": operator.invert,
|
|
116
|
+
"not": operator.not_,
|
|
117
|
+
"is_true": operator.truth,
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
OPERATORS_TO_BUILTINS = {
|
|
121
|
+
operator.add: "+",
|
|
122
|
+
operator.iadd: "+=",
|
|
123
|
+
operator.sub: "-",
|
|
124
|
+
operator.isub: "-=",
|
|
125
|
+
operator.mul: "*",
|
|
126
|
+
operator.imul: "*=",
|
|
127
|
+
operator.floordiv: "//",
|
|
128
|
+
operator.ifloordiv: "//=",
|
|
129
|
+
operator.truediv: "/",
|
|
130
|
+
operator.itruediv: "/=",
|
|
131
|
+
operator.mod: "%",
|
|
132
|
+
operator.imod: "%=",
|
|
133
|
+
operator.pow: "**",
|
|
134
|
+
operator.ipow: "**=",
|
|
135
|
+
operator.and_: "&",
|
|
136
|
+
operator.iand: "&=",
|
|
137
|
+
operator.or_: "|",
|
|
138
|
+
operator.ior: "|=",
|
|
139
|
+
operator.xor: "^",
|
|
140
|
+
operator.ixor: "^=",
|
|
141
|
+
operator.lshift: "<<",
|
|
142
|
+
operator.ilshift: "<<=",
|
|
143
|
+
operator.rshift: ">>",
|
|
144
|
+
operator.irshift: ">>=",
|
|
145
|
+
operator.eq: "==",
|
|
146
|
+
operator.ne: "!=",
|
|
147
|
+
operator.lt: "<",
|
|
148
|
+
operator.le: "<=",
|
|
149
|
+
operator.gt: ">",
|
|
150
|
+
operator.ge: ">=",
|
|
151
|
+
operator.is_: "is",
|
|
152
|
+
operator.is_not: "is not",
|
|
153
|
+
# This one has its args reversed!
|
|
154
|
+
operator.contains: "in",
|
|
155
|
+
# Unary
|
|
156
|
+
operator.pos: "+",
|
|
157
|
+
operator.neg: "-",
|
|
158
|
+
operator.invert: "~",
|
|
159
|
+
operator.not_: "not",
|
|
160
|
+
operator.truth: "is_true",
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
_shutting_down = False
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _at_shutdown():
|
|
168
|
+
global _shutting_down
|
|
169
|
+
_shutting_down = True
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def shutting_down(globals=globals):
|
|
173
|
+
"""
|
|
174
|
+
Whether the interpreter is currently shutting down.
|
|
175
|
+
For use in finalizers, __del__ methods, and similar; it is advised
|
|
176
|
+
to early bind this function rather than look it up when calling it,
|
|
177
|
+
since at shutdown module globals may be cleared.
|
|
178
|
+
"""
|
|
179
|
+
# At shutdown, the attribute may have been cleared or set to None.
|
|
180
|
+
v = globals().get("_shutting_down")
|
|
181
|
+
return v is True or v is None
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# weakref.finalize registers an exit function that runs all finalizers for
|
|
185
|
+
# which atexit is True. Some of these finalizers may call shutting_down() to
|
|
186
|
+
# check whether the interpreter is shutting down. For this to behave correctly,
|
|
187
|
+
# we need to make sure that _at_shutdown is called before the finalizer exit
|
|
188
|
+
# function. Since atexit operates as a LIFO stack, we first construct a dummy
|
|
189
|
+
# finalizer then register atexit to ensure this ordering.
|
|
190
|
+
weakref.finalize(lambda: None, lambda: None)
|
|
191
|
+
atexit.register(_at_shutdown)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class ThreadLocalStack:
|
|
195
|
+
"""A TLS stack container.
|
|
196
|
+
|
|
197
|
+
Uses the BORG pattern and stores states in threadlocal storage.
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
_tls = threading.local()
|
|
201
|
+
stack_name: str
|
|
202
|
+
_registered = {}
|
|
203
|
+
|
|
204
|
+
def __init_subclass__(cls, *, stack_name, **kwargs):
|
|
205
|
+
super().__init_subclass__(**kwargs)
|
|
206
|
+
# Register stack_name mapping to the new subclass
|
|
207
|
+
assert stack_name not in cls._registered, (
|
|
208
|
+
f"stack_name: '{stack_name}' already in use"
|
|
209
|
+
)
|
|
210
|
+
cls.stack_name = stack_name
|
|
211
|
+
cls._registered[stack_name] = cls
|
|
212
|
+
|
|
213
|
+
def __init__(self):
|
|
214
|
+
# This class must not be used directly.
|
|
215
|
+
assert type(self) is not ThreadLocalStack
|
|
216
|
+
tls = self._tls
|
|
217
|
+
attr = f"stack_{self.stack_name}"
|
|
218
|
+
try:
|
|
219
|
+
tls_stack = getattr(tls, attr)
|
|
220
|
+
except AttributeError:
|
|
221
|
+
tls_stack = list()
|
|
222
|
+
setattr(tls, attr, tls_stack)
|
|
223
|
+
|
|
224
|
+
self._stack = tls_stack
|
|
225
|
+
|
|
226
|
+
def push(self, state):
|
|
227
|
+
"""Push to the stack"""
|
|
228
|
+
self._stack.append(state)
|
|
229
|
+
|
|
230
|
+
def pop(self):
|
|
231
|
+
"""Pop from the stack"""
|
|
232
|
+
return self._stack.pop()
|
|
233
|
+
|
|
234
|
+
def top(self):
|
|
235
|
+
"""Get the top item on the stack.
|
|
236
|
+
|
|
237
|
+
Raises IndexError if the stack is empty. Users should check the size
|
|
238
|
+
of the stack beforehand.
|
|
239
|
+
"""
|
|
240
|
+
return self._stack[-1]
|
|
241
|
+
|
|
242
|
+
def __len__(self):
|
|
243
|
+
return len(self._stack)
|
|
244
|
+
|
|
245
|
+
@contextlib.contextmanager
|
|
246
|
+
def enter(self, state):
|
|
247
|
+
"""A contextmanager that pushes ``state`` for the duration of the
|
|
248
|
+
context.
|
|
249
|
+
"""
|
|
250
|
+
self.push(state)
|
|
251
|
+
try:
|
|
252
|
+
yield
|
|
253
|
+
finally:
|
|
254
|
+
self.pop()
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class ConfigOptions(object):
|
|
258
|
+
OPTIONS = {}
|
|
259
|
+
|
|
260
|
+
def __init__(self):
|
|
261
|
+
self._values = self.OPTIONS.copy()
|
|
262
|
+
|
|
263
|
+
def set(self, name, value=True):
|
|
264
|
+
if name not in self.OPTIONS:
|
|
265
|
+
raise NameError("Invalid flag: %s" % name)
|
|
266
|
+
self._values[name] = value
|
|
267
|
+
|
|
268
|
+
def unset(self, name):
|
|
269
|
+
self.set(name, False)
|
|
270
|
+
|
|
271
|
+
def _check_attr(self, name):
|
|
272
|
+
if name not in self.OPTIONS:
|
|
273
|
+
raise AttributeError("Invalid flag: %s" % name)
|
|
274
|
+
|
|
275
|
+
def __getattr__(self, name):
|
|
276
|
+
self._check_attr(name)
|
|
277
|
+
return self._values[name]
|
|
278
|
+
|
|
279
|
+
def __setattr__(self, name, value):
|
|
280
|
+
if name.startswith("_"):
|
|
281
|
+
super(ConfigOptions, self).__setattr__(name, value)
|
|
282
|
+
else:
|
|
283
|
+
self._check_attr(name)
|
|
284
|
+
self._values[name] = value
|
|
285
|
+
|
|
286
|
+
def __repr__(self):
|
|
287
|
+
return "Flags(%s)" % ", ".join(
|
|
288
|
+
"%s=%s" % (k, v) for k, v in self._values.items() if v is not False
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
def copy(self):
|
|
292
|
+
copy = type(self)()
|
|
293
|
+
copy._values = self._values.copy()
|
|
294
|
+
return copy
|
|
295
|
+
|
|
296
|
+
def __eq__(self, other):
|
|
297
|
+
return (
|
|
298
|
+
isinstance(other, ConfigOptions) and other._values == self._values
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
def __ne__(self, other):
|
|
302
|
+
return not self == other
|
|
303
|
+
|
|
304
|
+
def __hash__(self):
|
|
305
|
+
return hash(tuple(sorted(self._values.items())))
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def order_by_target_specificity(target, templates, fnkey=""):
|
|
309
|
+
"""This orders the given templates from most to least specific against the
|
|
310
|
+
current "target". "fnkey" is an indicative typing key for use in the
|
|
311
|
+
exception message in the case that there's no usable templates for the
|
|
312
|
+
current "target".
|
|
313
|
+
"""
|
|
314
|
+
# No templates... return early!
|
|
315
|
+
if templates == []:
|
|
316
|
+
return []
|
|
317
|
+
|
|
318
|
+
from numba.core.target_extension import target_registry
|
|
319
|
+
|
|
320
|
+
# fish out templates that are specific to the target if a target is
|
|
321
|
+
# specified
|
|
322
|
+
DEFAULT_TARGET = "generic"
|
|
323
|
+
usable = []
|
|
324
|
+
for ix, temp_cls in enumerate(templates):
|
|
325
|
+
# ? Need to do something about this next line
|
|
326
|
+
md = getattr(temp_cls, "metadata", {})
|
|
327
|
+
hw = md.get("target", DEFAULT_TARGET)
|
|
328
|
+
if hw is not None:
|
|
329
|
+
hw_clazz = target_registry[hw]
|
|
330
|
+
if target.inherits_from(hw_clazz):
|
|
331
|
+
usable.append((temp_cls, hw_clazz, ix))
|
|
332
|
+
|
|
333
|
+
# sort templates based on target specificity
|
|
334
|
+
def key(x):
|
|
335
|
+
return target.__mro__.index(x[1])
|
|
336
|
+
|
|
337
|
+
order = [x[0] for x in sorted(usable, key=key)]
|
|
338
|
+
|
|
339
|
+
if not order:
|
|
340
|
+
msg = (
|
|
341
|
+
f"Function resolution cannot find any matches for function "
|
|
342
|
+
f"'{fnkey}' for the current target: '{target}'."
|
|
343
|
+
)
|
|
344
|
+
from numba.core.errors import UnsupportedError
|
|
345
|
+
|
|
346
|
+
raise UnsupportedError(msg)
|
|
347
|
+
|
|
348
|
+
return order
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
T = _tp.TypeVar("T")
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
class OrderedSet(MutableSet[T]):
|
|
355
|
+
def __init__(self, iterable: _tp.Iterable[T] = ()):
|
|
356
|
+
# Just uses a dictionary under-the-hood to maintain insertion order.
|
|
357
|
+
self._data = dict.fromkeys(iterable, None)
|
|
358
|
+
|
|
359
|
+
def __contains__(self, key):
|
|
360
|
+
return key in self._data
|
|
361
|
+
|
|
362
|
+
def __iter__(self):
|
|
363
|
+
return iter(self._data)
|
|
364
|
+
|
|
365
|
+
def __len__(self):
|
|
366
|
+
return len(self._data)
|
|
367
|
+
|
|
368
|
+
def add(self, item):
|
|
369
|
+
self._data[item] = None
|
|
370
|
+
|
|
371
|
+
def discard(self, item):
|
|
372
|
+
self._data.pop(item, None)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
class MutableSortedSet(MutableSet[T], _tp.Generic[T]):
|
|
376
|
+
"""Mutable Sorted Set"""
|
|
377
|
+
|
|
378
|
+
def __init__(self, values: _tp.Iterable[T] = ()):
|
|
379
|
+
self._values = set(values)
|
|
380
|
+
|
|
381
|
+
def __len__(self):
|
|
382
|
+
return len(self._values)
|
|
383
|
+
|
|
384
|
+
def __iter__(self):
|
|
385
|
+
return iter(k for k in sorted(self._values))
|
|
386
|
+
|
|
387
|
+
def __contains__(self, x: T) -> bool:
|
|
388
|
+
return self._values.__contains__(x)
|
|
389
|
+
|
|
390
|
+
def add(self, x: T):
|
|
391
|
+
return self._values.add(x)
|
|
392
|
+
|
|
393
|
+
def discard(self, value: T):
|
|
394
|
+
self._values.discard(value)
|
|
395
|
+
|
|
396
|
+
def update(self, values):
|
|
397
|
+
self._values.update(values)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
Tk = _tp.TypeVar("Tk")
|
|
401
|
+
Tv = _tp.TypeVar("Tv")
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
class SortedMap(Mapping[Tk, Tv], _tp.Generic[Tk, Tv]):
|
|
405
|
+
"""Immutable"""
|
|
406
|
+
|
|
407
|
+
def __init__(self, seq):
|
|
408
|
+
self._values = []
|
|
409
|
+
self._index = {}
|
|
410
|
+
for i, (k, v) in enumerate(sorted(seq)):
|
|
411
|
+
self._index[k] = i
|
|
412
|
+
self._values.append((k, v))
|
|
413
|
+
|
|
414
|
+
def __getitem__(self, k):
|
|
415
|
+
i = self._index[k]
|
|
416
|
+
return self._values[i][1]
|
|
417
|
+
|
|
418
|
+
def __len__(self):
|
|
419
|
+
return len(self._values)
|
|
420
|
+
|
|
421
|
+
def __iter__(self):
|
|
422
|
+
return iter(k for k, v in self._values)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
class MutableSortedMap(MutableMapping[Tk, Tv], _tp.Generic[Tk, Tv]):
|
|
426
|
+
def __init__(self, dct=None):
|
|
427
|
+
if dct is None:
|
|
428
|
+
dct = {}
|
|
429
|
+
self._dct: dict[Tk, Tv] = dct
|
|
430
|
+
|
|
431
|
+
def __getitem__(self, k: Tk) -> Tv:
|
|
432
|
+
return self._dct[k]
|
|
433
|
+
|
|
434
|
+
def __setitem__(self, k: Tk, v: Tv):
|
|
435
|
+
self._dct[k] = v
|
|
436
|
+
|
|
437
|
+
def __delitem__(self, k: Tk):
|
|
438
|
+
del self._dct[k]
|
|
439
|
+
|
|
440
|
+
def __len__(self) -> int:
|
|
441
|
+
return len(self._dct)
|
|
442
|
+
|
|
443
|
+
def __iter__(self) -> int:
|
|
444
|
+
return iter(k for k in sorted(self._dct))
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
class UniqueDict(dict):
|
|
448
|
+
def __setitem__(self, key, value):
|
|
449
|
+
if key in self:
|
|
450
|
+
raise AssertionError("key already in dictionary: %r" % (key,))
|
|
451
|
+
super(UniqueDict, self).__setitem__(key, value)
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def runonce(fn):
|
|
455
|
+
@functools.wraps(fn)
|
|
456
|
+
def inner():
|
|
457
|
+
if not inner._ran:
|
|
458
|
+
res = fn()
|
|
459
|
+
inner._result = res
|
|
460
|
+
inner._ran = True
|
|
461
|
+
return inner._result
|
|
462
|
+
|
|
463
|
+
inner._ran = False
|
|
464
|
+
return inner
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def bit_length(intval):
|
|
468
|
+
"""
|
|
469
|
+
Return the number of bits necessary to represent integer `intval`.
|
|
470
|
+
"""
|
|
471
|
+
assert isinstance(intval, int)
|
|
472
|
+
if intval >= 0:
|
|
473
|
+
return len(bin(intval)) - 2
|
|
474
|
+
else:
|
|
475
|
+
return len(bin(-intval - 1)) - 2
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def stream_list(lst):
|
|
479
|
+
"""
|
|
480
|
+
Given a list, return an infinite iterator of iterators.
|
|
481
|
+
Each iterator iterates over the list from the last seen point up to
|
|
482
|
+
the current end-of-list.
|
|
483
|
+
|
|
484
|
+
In effect, each iterator will give the newly appended elements from the
|
|
485
|
+
previous iterator instantiation time.
|
|
486
|
+
"""
|
|
487
|
+
|
|
488
|
+
def sublist_iterator(start, stop):
|
|
489
|
+
return iter(lst[start:stop])
|
|
490
|
+
|
|
491
|
+
start = 0
|
|
492
|
+
while True:
|
|
493
|
+
stop = len(lst)
|
|
494
|
+
yield sublist_iterator(start, stop)
|
|
495
|
+
start = stop
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
class BenchmarkResult(object):
|
|
499
|
+
def __init__(self, func, records, loop):
|
|
500
|
+
self.func = func
|
|
501
|
+
self.loop = loop
|
|
502
|
+
self.records = np.array(records) / loop
|
|
503
|
+
self.best = np.min(self.records)
|
|
504
|
+
|
|
505
|
+
def __repr__(self):
|
|
506
|
+
name = getattr(self.func, "__name__", self.func)
|
|
507
|
+
args = (name, self.loop, self.records.size, format_time(self.best))
|
|
508
|
+
return "%20s: %10d loops, best of %d: %s per loop" % args
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def format_time(tm):
|
|
512
|
+
units = "s ms us ns ps".split()
|
|
513
|
+
base = 1
|
|
514
|
+
for unit in units[:-1]:
|
|
515
|
+
if tm >= base:
|
|
516
|
+
break
|
|
517
|
+
base /= 1000
|
|
518
|
+
else:
|
|
519
|
+
unit = units[-1]
|
|
520
|
+
return "%.1f%s" % (tm / base, unit)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def benchmark(func, maxsec=1):
|
|
524
|
+
timer = timeit.Timer(func)
|
|
525
|
+
number = 1
|
|
526
|
+
result = timer.repeat(1, number)
|
|
527
|
+
# Too fast to be measured
|
|
528
|
+
while min(result) / number == 0:
|
|
529
|
+
number *= 10
|
|
530
|
+
result = timer.repeat(3, number)
|
|
531
|
+
best = min(result) / number
|
|
532
|
+
if best >= maxsec:
|
|
533
|
+
return BenchmarkResult(func, result, number)
|
|
534
|
+
# Scale it up to make it close the maximum time
|
|
535
|
+
max_per_run_time = maxsec / 3 / number
|
|
536
|
+
number = max(max_per_run_time / best / 3, 1)
|
|
537
|
+
# Round to the next power of 10
|
|
538
|
+
number = int(10 ** math.ceil(math.log10(number)))
|
|
539
|
+
records = timer.repeat(3, number)
|
|
540
|
+
return BenchmarkResult(func, records, number)
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
# A dummy module for dynamically-generated functions
|
|
544
|
+
_dynamic_modname = "<dynamic>"
|
|
545
|
+
_dynamic_module = ModuleType(_dynamic_modname)
|
|
546
|
+
_dynamic_module.__builtins__ = builtins
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def chain_exception(new_exc, old_exc):
|
|
550
|
+
"""Set the __cause__ attribute on *new_exc* for explicit exception
|
|
551
|
+
chaining. Returns the inplace modified *new_exc*.
|
|
552
|
+
"""
|
|
553
|
+
if DEVELOPER_MODE:
|
|
554
|
+
new_exc.__cause__ = old_exc
|
|
555
|
+
return new_exc
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
def get_nargs_range(pyfunc):
|
|
559
|
+
"""Return the minimal and maximal number of Python function
|
|
560
|
+
positional arguments.
|
|
561
|
+
"""
|
|
562
|
+
sig = pysignature(pyfunc)
|
|
563
|
+
min_nargs = 0
|
|
564
|
+
max_nargs = 0
|
|
565
|
+
for p in sig.parameters.values():
|
|
566
|
+
max_nargs += 1
|
|
567
|
+
if p.default == inspect._empty:
|
|
568
|
+
min_nargs += 1
|
|
569
|
+
return min_nargs, max_nargs
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def unify_function_types(numba_types):
|
|
573
|
+
"""Return a normalized tuple of Numba function types so that
|
|
574
|
+
|
|
575
|
+
Tuple(numba_types)
|
|
576
|
+
|
|
577
|
+
becomes
|
|
578
|
+
|
|
579
|
+
UniTuple(dtype=<unified function type>, count=len(numba_types))
|
|
580
|
+
|
|
581
|
+
If the above transformation would be incorrect, return the
|
|
582
|
+
original input as given. For instance, if the input tuple contains
|
|
583
|
+
types that are not function or dispatcher type, the transformation
|
|
584
|
+
is considered incorrect.
|
|
585
|
+
"""
|
|
586
|
+
dtype = unified_function_type(numba_types)
|
|
587
|
+
if dtype is None:
|
|
588
|
+
return numba_types
|
|
589
|
+
return (dtype,) * len(numba_types)
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def unified_function_type(numba_types, require_precise=True):
|
|
593
|
+
"""Returns a unified Numba function type if possible.
|
|
594
|
+
|
|
595
|
+
Parameters
|
|
596
|
+
----------
|
|
597
|
+
numba_types : Sequence of numba Type instances.
|
|
598
|
+
require_precise : bool
|
|
599
|
+
If True, the returned Numba function type must be precise.
|
|
600
|
+
|
|
601
|
+
Returns
|
|
602
|
+
-------
|
|
603
|
+
typ : {numba.core.types.Type, None}
|
|
604
|
+
A unified Numba function type. Or ``None`` when the Numba types
|
|
605
|
+
cannot be unified, e.g. when the ``numba_types`` contains at
|
|
606
|
+
least two different Numba function type instances.
|
|
607
|
+
|
|
608
|
+
If ``numba_types`` contains a Numba dispatcher type, the unified
|
|
609
|
+
Numba function type will be an imprecise ``UndefinedFunctionType``
|
|
610
|
+
instance, or None when ``require_precise=True`` is specified.
|
|
611
|
+
|
|
612
|
+
Specifying ``require_precise=False`` enables unifying imprecise
|
|
613
|
+
Numba dispatcher instances when used in tuples or if-then branches
|
|
614
|
+
when the precise Numba function cannot be determined on the first
|
|
615
|
+
occurrence that is not a call expression.
|
|
616
|
+
"""
|
|
617
|
+
from numba.core.errors import NumbaExperimentalFeatureWarning
|
|
618
|
+
|
|
619
|
+
if not (
|
|
620
|
+
isinstance(numba_types, Sequence)
|
|
621
|
+
and len(numba_types) > 0
|
|
622
|
+
and isinstance(numba_types[0], (types.Dispatcher, types.FunctionType))
|
|
623
|
+
):
|
|
624
|
+
return
|
|
625
|
+
|
|
626
|
+
warnings.warn(
|
|
627
|
+
"First-class function type feature is experimental",
|
|
628
|
+
category=NumbaExperimentalFeatureWarning,
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
mnargs, mxargs = None, None
|
|
632
|
+
dispatchers = set()
|
|
633
|
+
function = None
|
|
634
|
+
undefined_function = None
|
|
635
|
+
|
|
636
|
+
for t in numba_types:
|
|
637
|
+
if isinstance(t, types.Dispatcher):
|
|
638
|
+
mnargs1, mxargs1 = get_nargs_range(t.dispatcher.py_func)
|
|
639
|
+
if mnargs is None:
|
|
640
|
+
mnargs, mxargs = mnargs1, mxargs1
|
|
641
|
+
elif not (mnargs, mxargs) == (mnargs1, mxargs1):
|
|
642
|
+
return
|
|
643
|
+
dispatchers.add(t.dispatcher)
|
|
644
|
+
t = t.dispatcher.get_function_type()
|
|
645
|
+
if t is None:
|
|
646
|
+
continue
|
|
647
|
+
if isinstance(t, types.FunctionType):
|
|
648
|
+
if mnargs is None:
|
|
649
|
+
mnargs = mxargs = t.nargs
|
|
650
|
+
elif not (mnargs == mxargs == t.nargs):
|
|
651
|
+
return
|
|
652
|
+
if isinstance(t, types.UndefinedFunctionType):
|
|
653
|
+
if undefined_function is None:
|
|
654
|
+
undefined_function = t
|
|
655
|
+
else:
|
|
656
|
+
# Refuse to unify using function type
|
|
657
|
+
return
|
|
658
|
+
dispatchers.update(t.dispatchers)
|
|
659
|
+
else:
|
|
660
|
+
if function is None:
|
|
661
|
+
function = t
|
|
662
|
+
else:
|
|
663
|
+
assert function == t
|
|
664
|
+
else:
|
|
665
|
+
return
|
|
666
|
+
if require_precise and (function is None or undefined_function is not None):
|
|
667
|
+
return
|
|
668
|
+
if function is not None:
|
|
669
|
+
if undefined_function is not None:
|
|
670
|
+
assert function.nargs == undefined_function.nargs
|
|
671
|
+
function = undefined_function
|
|
672
|
+
elif undefined_function is not None:
|
|
673
|
+
undefined_function.dispatchers.update(dispatchers)
|
|
674
|
+
function = undefined_function
|
|
675
|
+
else:
|
|
676
|
+
function = types.UndefinedFunctionType(mnargs, dispatchers)
|
|
677
|
+
|
|
678
|
+
return function
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
class _RedirectSubpackage(ModuleType):
|
|
682
|
+
"""Redirect a subpackage to a subpackage.
|
|
683
|
+
|
|
684
|
+
This allows all references like:
|
|
685
|
+
|
|
686
|
+
>>> from numba.old_subpackage import module
|
|
687
|
+
>>> module.item
|
|
688
|
+
|
|
689
|
+
>>> import numba.old_subpackage.module
|
|
690
|
+
>>> numba.old_subpackage.module.item
|
|
691
|
+
|
|
692
|
+
>>> from numba.old_subpackage.module import item
|
|
693
|
+
"""
|
|
694
|
+
|
|
695
|
+
def __init__(self, old_module_locals, new_module):
|
|
696
|
+
old_module = old_module_locals["__name__"]
|
|
697
|
+
super().__init__(old_module)
|
|
698
|
+
|
|
699
|
+
self.__old_module_states = {}
|
|
700
|
+
self.__new_module = new_module
|
|
701
|
+
|
|
702
|
+
new_mod_obj = import_module(new_module)
|
|
703
|
+
|
|
704
|
+
# Map all sub-modules over
|
|
705
|
+
for k, v in new_mod_obj.__dict__.items():
|
|
706
|
+
# Get attributes so that `subpackage.xyz` and
|
|
707
|
+
# `from subpackage import xyz` work
|
|
708
|
+
setattr(self, k, v)
|
|
709
|
+
if isinstance(v, ModuleType):
|
|
710
|
+
# Map modules into the interpreter so that
|
|
711
|
+
# `import subpackage.xyz` works
|
|
712
|
+
sys.modules[f"{old_module}.{k}"] = sys.modules[v.__name__]
|
|
713
|
+
|
|
714
|
+
# copy across dunders so that package imports work too
|
|
715
|
+
for attr, value in old_module_locals.items():
|
|
716
|
+
if attr.startswith("__") and attr.endswith("__"):
|
|
717
|
+
if attr != "__builtins__":
|
|
718
|
+
setattr(self, attr, value)
|
|
719
|
+
self.__old_module_states[attr] = value
|
|
720
|
+
|
|
721
|
+
def __reduce__(self):
|
|
722
|
+
args = (self.__old_module_states, self.__new_module)
|
|
723
|
+
return _RedirectSubpackage, args
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def get_hashable_key(value):
|
|
727
|
+
"""
|
|
728
|
+
Given a value, returns a key that can be used
|
|
729
|
+
as a hash. If the value is hashable, we return
|
|
730
|
+
the value, otherwise we return id(value).
|
|
731
|
+
|
|
732
|
+
See discussion in gh #6957
|
|
733
|
+
"""
|
|
734
|
+
try:
|
|
735
|
+
hash(value)
|
|
736
|
+
except TypeError:
|
|
737
|
+
return id(value)
|
|
738
|
+
else:
|
|
739
|
+
return value
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
class threadsafe_cached_property(functools.cached_property):
|
|
743
|
+
def __init__(self, *args, **kwargs):
|
|
744
|
+
super().__init__(*args, **kwargs)
|
|
745
|
+
self._lock = threading.RLock()
|
|
746
|
+
|
|
747
|
+
def __get__(self, *args, **kwargs):
|
|
748
|
+
with self._lock:
|
|
749
|
+
return super().__get__(*args, **kwargs)
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
def dump_llvm(fndesc, module):
|
|
753
|
+
print(("LLVM DUMP %s" % fndesc).center(80, "-"))
|
|
754
|
+
if config.HIGHLIGHT_DUMPS:
|
|
755
|
+
try:
|
|
756
|
+
from pygments import highlight
|
|
757
|
+
from pygments.lexers import LlvmLexer as lexer
|
|
758
|
+
from pygments.formatters import Terminal256Formatter
|
|
759
|
+
from numba.misc.dump_style import by_colorscheme
|
|
760
|
+
|
|
761
|
+
print(
|
|
762
|
+
highlight(
|
|
763
|
+
module.__repr__(),
|
|
764
|
+
lexer(),
|
|
765
|
+
Terminal256Formatter(style=by_colorscheme()),
|
|
766
|
+
)
|
|
767
|
+
)
|
|
768
|
+
except ImportError:
|
|
769
|
+
msg = "Please install pygments to see highlighted dumps"
|
|
770
|
+
raise ValueError(msg)
|
|
771
|
+
else:
|
|
772
|
+
print(module)
|
|
773
|
+
print("=" * 80)
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
class _lazy_pformat(object):
|
|
777
|
+
def __init__(self, *args, **kwargs):
|
|
778
|
+
self.args = args
|
|
779
|
+
self.kwargs = kwargs
|
|
780
|
+
|
|
781
|
+
def __str__(self):
|
|
782
|
+
return pformat(*self.args, **self.kwargs)
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
class _LazyJSONEncoder(json.JSONEncoder):
|
|
786
|
+
def default(self, obj):
|
|
787
|
+
if isinstance(obj, _lazy_pformat):
|
|
788
|
+
return str(obj)
|
|
789
|
+
return super().default(obj)
|
|
790
|
+
|
|
6
791
|
|
|
7
792
|
def _readenv(name, ctor, default):
|
|
8
793
|
value = os.environ.get(name)
|