numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +1 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
- numba_cuda/numba/cuda/api.py +6 -1
- numba_cuda/numba/cuda/bf16.py +285 -2
- numba_cuda/numba/cuda/cgutils.py +2 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +1 -1
- numba_cuda/numba/cuda/compiler.py +373 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +2 -2
- numba_cuda/numba/cuda/core/compiler.py +6 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +48 -26
- numba_cuda/numba/cuda/core/ir_utils.py +15 -26
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +738 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +422 -246
- numba_cuda/numba/cuda/cudadecl.py +1 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
- numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
- numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
- numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +5 -1
- numba_cuda/numba/cuda/debuginfo.py +85 -2
- numba_cuda/numba/cuda/decorators.py +3 -3
- numba_cuda/numba/cuda/descriptor.py +3 -4
- numba_cuda/numba/cuda/deviceufunc.py +66 -2
- numba_cuda/numba/cuda/dispatcher.py +18 -39
- numba_cuda/numba/cuda/flags.py +141 -1
- numba_cuda/numba/cuda/fp16.py +0 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/lowering.py +7 -144
- numba_cuda/numba/cuda/mathimpl.py +2 -1
- numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +9 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +1 -1
- numba_cuda/numba/cuda/printimpl.py +12 -1
- numba_cuda/numba/cuda/random.py +1 -1
- numba_cuda/numba/cuda/serialize.py +1 -1
- numba_cuda/numba/cuda/simulator/__init__.py +1 -1
- numba_cuda/numba/cuda/simulator/api.py +1 -1
- numba_cuda/numba/cuda/simulator/compiler.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
- numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
- numba_cuda/numba/cuda/target.py +35 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +1 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
- numba_cuda/numba/cuda/tests/support.py +55 -15
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +56 -0
- numba_cuda/numba/cuda/typing/__init__.py +9 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +7 -6
- numba_cuda/numba/cuda/ufuncs.py +3 -3
- numba_cuda/numba/cuda/utils.py +6 -112
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
|
@@ -8,28 +8,29 @@ import operator
|
|
|
8
8
|
import logging
|
|
9
9
|
import textwrap
|
|
10
10
|
|
|
11
|
-
from numba.core import errors, ir
|
|
11
|
+
from numba.core import errors, ir
|
|
12
|
+
from numba.cuda.core import config
|
|
12
13
|
from numba.cuda.errors import UnsupportedBytecodeError
|
|
13
14
|
from numba.core.errors import (
|
|
14
15
|
NotDefinedError,
|
|
15
16
|
error_extras,
|
|
16
17
|
)
|
|
17
18
|
from numba.cuda.core import ir_utils
|
|
18
|
-
from numba.core.utils import PYVERSION
|
|
19
19
|
from numba.cuda.utils import (
|
|
20
|
+
PYVERSION,
|
|
20
21
|
BINOPS_TO_OPERATORS,
|
|
21
22
|
INPLACE_BINOPS_TO_OPERATORS,
|
|
22
|
-
_lazy_pformat,
|
|
23
23
|
)
|
|
24
|
+
from numba.cuda.utils import _lazy_pformat
|
|
24
25
|
from numba.core.byteflow import Flow, AdaptDFA, AdaptCFA, BlockKind
|
|
25
|
-
from numba.core.unsafe import eh
|
|
26
|
+
from numba.cuda.core.unsafe import eh
|
|
26
27
|
from numba.cpython.unsafe.tuple import unpack_single_tuple
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
if PYVERSION in ((3, 12), (3, 13)):
|
|
30
31
|
# Operands for CALL_INTRINSIC_1
|
|
31
32
|
from numba.core.byteflow import CALL_INTRINSIC_1_Operand as ci1op
|
|
32
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
33
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
33
34
|
pass
|
|
34
35
|
else:
|
|
35
36
|
raise NotImplementedError(PYVERSION)
|
|
@@ -1395,7 +1396,7 @@ class Interpreter(object):
|
|
|
1395
1396
|
if entry.start < self.last_active_offset
|
|
1396
1397
|
]
|
|
1397
1398
|
)
|
|
1398
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
1399
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
1399
1400
|
pass
|
|
1400
1401
|
else:
|
|
1401
1402
|
raise NotImplementedError(PYVERSION)
|
|
@@ -1410,7 +1411,10 @@ class Interpreter(object):
|
|
|
1410
1411
|
if PYVERSION in ((3, 11), (3, 12), (3, 13)):
|
|
1411
1412
|
# Insert end of try markers
|
|
1412
1413
|
self._end_try_blocks()
|
|
1413
|
-
elif PYVERSION in (
|
|
1414
|
+
elif PYVERSION in (
|
|
1415
|
+
(3, 9),
|
|
1416
|
+
(3, 10),
|
|
1417
|
+
):
|
|
1414
1418
|
pass
|
|
1415
1419
|
else:
|
|
1416
1420
|
raise NotImplementedError(PYVERSION)
|
|
@@ -1434,7 +1438,7 @@ class Interpreter(object):
|
|
|
1434
1438
|
peepholes = []
|
|
1435
1439
|
if PYVERSION in ((3, 11), (3, 12), (3, 13)):
|
|
1436
1440
|
peepholes.append(peep_hole_split_at_pop_block)
|
|
1437
|
-
if PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
|
|
1441
|
+
if PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12), (3, 13)):
|
|
1438
1442
|
peepholes.append(peep_hole_list_to_tuple)
|
|
1439
1443
|
peepholes.append(peep_hole_delete_with_exit)
|
|
1440
1444
|
if PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
|
|
@@ -1592,7 +1596,10 @@ class Interpreter(object):
|
|
|
1592
1596
|
if newtryblk is not None:
|
|
1593
1597
|
if newtryblk is not tryblk:
|
|
1594
1598
|
self._insert_try_block_begin()
|
|
1595
|
-
elif PYVERSION in (
|
|
1599
|
+
elif PYVERSION in (
|
|
1600
|
+
(3, 9),
|
|
1601
|
+
(3, 10),
|
|
1602
|
+
):
|
|
1596
1603
|
while self.syntax_blocks:
|
|
1597
1604
|
if offset >= self.syntax_blocks[-1].exit:
|
|
1598
1605
|
self.syntax_blocks.pop()
|
|
@@ -1829,7 +1836,10 @@ class Interpreter(object):
|
|
|
1829
1836
|
if inst.offset >= top.exit:
|
|
1830
1837
|
self.current_block.append(ir.PopBlock(loc=self.loc))
|
|
1831
1838
|
self.syntax_blocks.pop()
|
|
1832
|
-
elif PYVERSION in (
|
|
1839
|
+
elif PYVERSION in (
|
|
1840
|
+
(3, 9),
|
|
1841
|
+
(3, 10),
|
|
1842
|
+
):
|
|
1833
1843
|
pass
|
|
1834
1844
|
else:
|
|
1835
1845
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2026,7 +2036,7 @@ class Interpreter(object):
|
|
|
2026
2036
|
target = self.get(container)
|
|
2027
2037
|
expr = ir.Expr.getitem(target, index=index, loc=self.loc)
|
|
2028
2038
|
self.store(expr, res)
|
|
2029
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
2039
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
2030
2040
|
pass
|
|
2031
2041
|
else:
|
|
2032
2042
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2052,7 +2062,7 @@ class Interpreter(object):
|
|
|
2052
2062
|
target=target, index=index, value=value, loc=self.loc
|
|
2053
2063
|
)
|
|
2054
2064
|
self.current_block.append(stmt)
|
|
2055
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
2065
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
2056
2066
|
pass
|
|
2057
2067
|
else:
|
|
2058
2068
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2345,7 +2355,7 @@ class Interpreter(object):
|
|
|
2345
2355
|
dstname = self.code_locals[oparg2]
|
|
2346
2356
|
self.store(value=self.get(value2), name=dstname)
|
|
2347
2357
|
|
|
2348
|
-
elif PYVERSION in ((3, 10), (3, 11), (3, 12)):
|
|
2358
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12)):
|
|
2349
2359
|
pass
|
|
2350
2360
|
else:
|
|
2351
2361
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2363,7 +2373,7 @@ class Interpreter(object):
|
|
|
2363
2373
|
undef = ir.Expr.undef(loc=self.loc)
|
|
2364
2374
|
self.store(undef, name=res)
|
|
2365
2375
|
|
|
2366
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
2376
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
2367
2377
|
pass
|
|
2368
2378
|
else:
|
|
2369
2379
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2403,7 +2413,7 @@ class Interpreter(object):
|
|
|
2403
2413
|
item = self.get(item)
|
|
2404
2414
|
if PYVERSION in ((3, 12), (3, 13)):
|
|
2405
2415
|
attr = self.code_names[inst.arg >> 1]
|
|
2406
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
2416
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
2407
2417
|
attr = self.code_names[inst.arg]
|
|
2408
2418
|
else:
|
|
2409
2419
|
raise NotImplementedError(PYVERSION)
|
|
@@ -2439,7 +2449,10 @@ class Interpreter(object):
|
|
|
2439
2449
|
value = self.get_global_value(name)
|
|
2440
2450
|
gl = ir.Global(name, value, loc=self.loc)
|
|
2441
2451
|
self.store(gl, res)
|
|
2442
|
-
elif PYVERSION in (
|
|
2452
|
+
elif PYVERSION in (
|
|
2453
|
+
(3, 9),
|
|
2454
|
+
(3, 10),
|
|
2455
|
+
):
|
|
2443
2456
|
|
|
2444
2457
|
def op_LOAD_GLOBAL(self, inst, res):
|
|
2445
2458
|
name = self.code_names[inst.arg]
|
|
@@ -2467,7 +2480,10 @@ class Interpreter(object):
|
|
|
2467
2480
|
value = self.get_closure_value(idx)
|
|
2468
2481
|
gl = ir.FreeVar(idx, name, value, loc=self.loc)
|
|
2469
2482
|
self.store(gl, res)
|
|
2470
|
-
elif PYVERSION in (
|
|
2483
|
+
elif PYVERSION in (
|
|
2484
|
+
(3, 9),
|
|
2485
|
+
(3, 10),
|
|
2486
|
+
):
|
|
2471
2487
|
|
|
2472
2488
|
def op_LOAD_DEREF(self, inst, res):
|
|
2473
2489
|
n_cellvars = len(self.code_cellvars)
|
|
@@ -2494,7 +2510,10 @@ class Interpreter(object):
|
|
|
2494
2510
|
name = self.func_id.func.__code__._varname_from_oparg(inst.arg)
|
|
2495
2511
|
value = self.get(value)
|
|
2496
2512
|
self.store(value=value, name=name)
|
|
2497
|
-
elif PYVERSION in (
|
|
2513
|
+
elif PYVERSION in (
|
|
2514
|
+
(3, 9),
|
|
2515
|
+
(3, 10),
|
|
2516
|
+
):
|
|
2498
2517
|
|
|
2499
2518
|
def op_STORE_DEREF(self, inst, value):
|
|
2500
2519
|
n_cellvars = len(self.code_cellvars)
|
|
@@ -2546,7 +2565,7 @@ class Interpreter(object):
|
|
|
2546
2565
|
if ex.target == end
|
|
2547
2566
|
]
|
|
2548
2567
|
)
|
|
2549
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
2568
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
2550
2569
|
pass
|
|
2551
2570
|
else:
|
|
2552
2571
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3107,7 +3126,7 @@ class Interpreter(object):
|
|
|
3107
3126
|
self.store(ir.Expr.cast(self.get(retval), loc=self.loc), castval)
|
|
3108
3127
|
ret = ir.Return(self.get(castval), loc=self.loc)
|
|
3109
3128
|
self.current_block.append(ret)
|
|
3110
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
3129
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
3111
3130
|
pass
|
|
3112
3131
|
else:
|
|
3113
3132
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3117,7 +3136,7 @@ class Interpreter(object):
|
|
|
3117
3136
|
def op_TO_BOOL(self, inst, val, res):
|
|
3118
3137
|
self.store(self.get(val), res) # TODO: just a lazy hack
|
|
3119
3138
|
|
|
3120
|
-
elif PYVERSION in ((3, 10), (3, 11), (3, 12)):
|
|
3139
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12)):
|
|
3121
3140
|
pass
|
|
3122
3141
|
else:
|
|
3123
3142
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3128,7 +3147,7 @@ class Interpreter(object):
|
|
|
3128
3147
|
# TODO: fifth lowest bit now indicates a forced version to bool.
|
|
3129
3148
|
elif PYVERSION in ((3, 12),):
|
|
3130
3149
|
op = dis.cmp_op[inst.arg >> 4]
|
|
3131
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
3150
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
3132
3151
|
op = dis.cmp_op[inst.arg]
|
|
3133
3152
|
else:
|
|
3134
3153
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3259,7 +3278,7 @@ class Interpreter(object):
|
|
|
3259
3278
|
|
|
3260
3279
|
def op_POP_JUMP_IF_NOT_NONE(self, inst, pred):
|
|
3261
3280
|
self._jump_if_none(inst, pred, False)
|
|
3262
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
3281
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
3263
3282
|
pass
|
|
3264
3283
|
else:
|
|
3265
3284
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3429,7 +3448,10 @@ class Interpreter(object):
|
|
|
3429
3448
|
assert 0, "unreachable"
|
|
3430
3449
|
self.store(gl, res)
|
|
3431
3450
|
|
|
3432
|
-
elif PYVERSION in (
|
|
3451
|
+
elif PYVERSION in (
|
|
3452
|
+
(3, 9),
|
|
3453
|
+
(3, 10),
|
|
3454
|
+
):
|
|
3433
3455
|
|
|
3434
3456
|
def op_LOAD_CLOSURE(self, inst, res):
|
|
3435
3457
|
n_cellvars = len(self.code_cellvars)
|
|
@@ -3579,7 +3601,7 @@ class Interpreter(object):
|
|
|
3579
3601
|
return
|
|
3580
3602
|
else:
|
|
3581
3603
|
raise NotImplementedError(operand)
|
|
3582
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
3604
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
3583
3605
|
pass
|
|
3584
3606
|
else:
|
|
3585
3607
|
raise NotImplementedError(PYVERSION)
|
|
@@ -3589,7 +3611,7 @@ if PYVERSION in ((3, 12), (3, 13)):
|
|
|
3589
3611
|
|
|
3590
3612
|
class INTRINSIC_STOPITERATION_ERROR(AssertionError):
|
|
3591
3613
|
pass
|
|
3592
|
-
elif PYVERSION in ((3, 10), (3, 11)):
|
|
3614
|
+
elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
|
|
3593
3615
|
pass
|
|
3594
3616
|
else:
|
|
3595
3617
|
raise NotImplementedError(PYVERSION)
|
|
@@ -11,7 +11,9 @@ import warnings
|
|
|
11
11
|
|
|
12
12
|
import numba
|
|
13
13
|
from numba.core.extending import _Intrinsic
|
|
14
|
-
from numba.core import types,
|
|
14
|
+
from numba.core import types, ir, analysis
|
|
15
|
+
from numba.cuda import typing
|
|
16
|
+
from numba.cuda.core import postproc, rewrites, config
|
|
15
17
|
from numba.core.typing.templates import signature
|
|
16
18
|
from numba.core.analysis import (
|
|
17
19
|
compute_live_map,
|
|
@@ -248,12 +250,7 @@ def mk_range_block(typemap, start, stop, step, calltypes, scope, loc):
|
|
|
248
250
|
range_call_assign = ir.Assign(range_call, range_call_var, loc)
|
|
249
251
|
# iter_var = getiter(range_call_var)
|
|
250
252
|
iter_call = ir.Expr.getiter(range_call_var, loc)
|
|
251
|
-
|
|
252
|
-
calltype_sig = signature(
|
|
253
|
-
types.range_iter64_type, types.range_state64_type
|
|
254
|
-
)
|
|
255
|
-
else:
|
|
256
|
-
calltype_sig = signature(types.range_iter_type, types.range_state_type)
|
|
253
|
+
calltype_sig = signature(types.range_iter64_type, types.range_state64_type)
|
|
257
254
|
calltypes[iter_call] = calltype_sig
|
|
258
255
|
iter_var = ir.Var(scope, mk_unique_var("$iter_var"), loc)
|
|
259
256
|
typemap[iter_var.name] = types.iterators.RangeIteratorType(types.intp)
|
|
@@ -332,10 +329,7 @@ def mk_loop_header(typemap, phi_var, calltypes, scope, loc):
|
|
|
332
329
|
types.intp, types.boolean
|
|
333
330
|
)
|
|
334
331
|
iternext_call = ir.Expr.iternext(phi_var, loc)
|
|
335
|
-
|
|
336
|
-
range_iter_type = types.range_iter64_type
|
|
337
|
-
else:
|
|
338
|
-
range_iter_type = types.range_iter_type
|
|
332
|
+
range_iter_type = types.range_iter64_type
|
|
339
333
|
calltypes[iternext_call] = signature(
|
|
340
334
|
types.containers.Pair(types.intp, types.boolean), range_iter_type
|
|
341
335
|
)
|
|
@@ -812,8 +806,6 @@ def has_no_side_effect(rhs, lives, call_table):
|
|
|
812
806
|
"""Returns True if this expression has no side effects that
|
|
813
807
|
would prevent re-ordering.
|
|
814
808
|
"""
|
|
815
|
-
from numba.parfors import array_analysis, parfor
|
|
816
|
-
from numba.misc.special import prange
|
|
817
809
|
|
|
818
810
|
if isinstance(rhs, ir.Expr) and rhs.op == "call":
|
|
819
811
|
func_name = rhs.func.name
|
|
@@ -826,11 +818,7 @@ def has_no_side_effect(rhs, lives, call_table):
|
|
|
826
818
|
or call_list == ["stencil", numba]
|
|
827
819
|
or call_list == ["log", numpy]
|
|
828
820
|
or call_list == ["dtype", numpy]
|
|
829
|
-
or call_list == [array_analysis.wrap_index]
|
|
830
|
-
or call_list == [prange]
|
|
831
|
-
or call_list == ["prange", numba]
|
|
832
821
|
or call_list == ["pndindex", numba]
|
|
833
|
-
or call_list == [parfor.internal_prange]
|
|
834
822
|
or call_list == ["ceil", math]
|
|
835
823
|
or call_list == [max]
|
|
836
824
|
or call_list == [int]
|
|
@@ -1893,7 +1881,7 @@ def compile_to_numba_ir(
|
|
|
1893
1881
|
if typingctx and other typing inputs are available and update typemap and
|
|
1894
1882
|
calltypes.
|
|
1895
1883
|
"""
|
|
1896
|
-
from numba.core import typed_passes
|
|
1884
|
+
from numba.cuda.core import typed_passes
|
|
1897
1885
|
|
|
1898
1886
|
# mk_func can be actual function or make_function node, or a njit function
|
|
1899
1887
|
if hasattr(mk_func, "code"):
|
|
@@ -1975,7 +1963,8 @@ def get_ir_of_code(glbls, fcode):
|
|
|
1975
1963
|
fcode, func_env, func_arg, func_clo, glbls
|
|
1976
1964
|
)
|
|
1977
1965
|
|
|
1978
|
-
from numba.
|
|
1966
|
+
from numba.cuda import compiler
|
|
1967
|
+
from numba.cuda.core.compiler import StateDict
|
|
1979
1968
|
|
|
1980
1969
|
ir = compiler.run_frontend(f)
|
|
1981
1970
|
|
|
@@ -1984,7 +1973,7 @@ def get_ir_of_code(glbls, fcode):
|
|
|
1984
1973
|
# for example, Raise nodes need to become StaticRaise before type inference
|
|
1985
1974
|
class DummyPipeline(object):
|
|
1986
1975
|
def __init__(self, f_ir):
|
|
1987
|
-
self.state =
|
|
1976
|
+
self.state = StateDict()
|
|
1988
1977
|
self.state.typingctx = None
|
|
1989
1978
|
self.state.targetctx = None
|
|
1990
1979
|
self.state.args = None
|
|
@@ -1997,10 +1986,10 @@ def get_ir_of_code(glbls, fcode):
|
|
|
1997
1986
|
rewrites.rewrite_registry.apply("before-inference", state)
|
|
1998
1987
|
# call inline pass to handle cases like stencils and comprehensions
|
|
1999
1988
|
swapped = {} # TODO: get this from diagnostics store
|
|
2000
|
-
|
|
1989
|
+
from numba.cuda.core.inline_closurecall import InlineClosureCallPass
|
|
2001
1990
|
|
|
2002
|
-
inline_pass =
|
|
2003
|
-
ir, numba.core.
|
|
1991
|
+
inline_pass = InlineClosureCallPass(
|
|
1992
|
+
ir, numba.cuda.core.options.ParallelOptions(False), swapped
|
|
2004
1993
|
)
|
|
2005
1994
|
inline_pass.run()
|
|
2006
1995
|
|
|
@@ -2013,8 +2002,8 @@ def get_ir_of_code(glbls, fcode):
|
|
|
2013
2002
|
# added to create valid IR.
|
|
2014
2003
|
|
|
2015
2004
|
# rebuild IR in SSA form
|
|
2016
|
-
from numba.core.untyped_passes import ReconstructSSA
|
|
2017
|
-
from numba.core.typed_passes import PreLowerStripPhis
|
|
2005
|
+
from numba.cuda.core.untyped_passes import ReconstructSSA
|
|
2006
|
+
from numba.cuda.core.typed_passes import PreLowerStripPhis
|
|
2018
2007
|
|
|
2019
2008
|
reconstruct_ssa = ReconstructSSA()
|
|
2020
2009
|
phistrip = PreLowerStripPhis()
|
|
@@ -2494,7 +2483,7 @@ def legalize_single_scope(blocks):
|
|
|
2494
2483
|
return len({blk.scope for blk in blocks.values()}) == 1
|
|
2495
2484
|
|
|
2496
2485
|
|
|
2497
|
-
def check_and_legalize_ir(func_ir, flags: "numba.core.
|
|
2486
|
+
def check_and_legalize_ir(func_ir, flags: "numba.core.flags.Flags"):
|
|
2498
2487
|
"""
|
|
2499
2488
|
This checks that the IR presented is legal
|
|
2500
2489
|
"""
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Defines CUDA Options for use in the CUDA target
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABCMeta, abstractmethod
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AbstractOptionValue(metaclass=ABCMeta):
|
|
12
|
+
"""Abstract base class for custom option values."""
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def encode(self) -> str:
|
|
16
|
+
"""Returns an encoding of the values"""
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
def __repr__(self) -> str:
|
|
20
|
+
return f"{self.__class__.__name__}({self.encode()})"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FastMathOptions(AbstractOptionValue):
|
|
24
|
+
"""
|
|
25
|
+
Options for controlling fast math optimization.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, value):
|
|
29
|
+
# https://releases.llvm.org/7.0.0/docs/LangRef.html#fast-math-flags
|
|
30
|
+
valid_flags = {
|
|
31
|
+
"fast",
|
|
32
|
+
"nnan",
|
|
33
|
+
"ninf",
|
|
34
|
+
"nsz",
|
|
35
|
+
"arcp",
|
|
36
|
+
"contract",
|
|
37
|
+
"afn",
|
|
38
|
+
"reassoc",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if isinstance(value, FastMathOptions):
|
|
42
|
+
self.flags = value.flags.copy()
|
|
43
|
+
elif value is True:
|
|
44
|
+
self.flags = {"fast"}
|
|
45
|
+
elif value is False:
|
|
46
|
+
self.flags = set()
|
|
47
|
+
elif isinstance(value, set):
|
|
48
|
+
invalid = value - valid_flags
|
|
49
|
+
if invalid:
|
|
50
|
+
raise ValueError("Unrecognized fastmath flags: %s" % invalid)
|
|
51
|
+
self.flags = value
|
|
52
|
+
elif isinstance(value, dict):
|
|
53
|
+
invalid = set(value.keys()) - valid_flags
|
|
54
|
+
if invalid:
|
|
55
|
+
raise ValueError("Unrecognized fastmath flags: %s" % invalid)
|
|
56
|
+
self.flags = {v for v, enable in value.items() if enable}
|
|
57
|
+
else:
|
|
58
|
+
msg = "Expected fastmath option(s) to be either a bool, dict or set"
|
|
59
|
+
raise ValueError(msg)
|
|
60
|
+
|
|
61
|
+
def __bool__(self):
|
|
62
|
+
return bool(self.flags)
|
|
63
|
+
|
|
64
|
+
__nonzero__ = __bool__
|
|
65
|
+
|
|
66
|
+
def encode(self) -> str:
|
|
67
|
+
return str(self.flags)
|
|
68
|
+
|
|
69
|
+
def __eq__(self, other):
|
|
70
|
+
if type(other) is type(self):
|
|
71
|
+
return self.flags == other.flags
|
|
72
|
+
return NotImplemented
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ParallelOptions(AbstractOptionValue):
|
|
76
|
+
"""
|
|
77
|
+
Options for controlling auto parallelization.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
__slots__ = (
|
|
81
|
+
"enabled",
|
|
82
|
+
"comprehension",
|
|
83
|
+
"reduction",
|
|
84
|
+
"inplace_binop",
|
|
85
|
+
"setitem",
|
|
86
|
+
"numpy",
|
|
87
|
+
"stencil",
|
|
88
|
+
"fusion",
|
|
89
|
+
"prange",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def __init__(self, value):
|
|
93
|
+
if isinstance(value, bool):
|
|
94
|
+
self.enabled = value
|
|
95
|
+
self.comprehension = value
|
|
96
|
+
self.reduction = value
|
|
97
|
+
self.inplace_binop = value
|
|
98
|
+
self.setitem = value
|
|
99
|
+
self.numpy = value
|
|
100
|
+
self.stencil = value
|
|
101
|
+
self.fusion = value
|
|
102
|
+
self.prange = value
|
|
103
|
+
elif isinstance(value, dict):
|
|
104
|
+
self.enabled = True
|
|
105
|
+
self.comprehension = value.pop("comprehension", True)
|
|
106
|
+
self.reduction = value.pop("reduction", True)
|
|
107
|
+
self.inplace_binop = value.pop("inplace_binop", True)
|
|
108
|
+
self.setitem = value.pop("setitem", True)
|
|
109
|
+
self.numpy = value.pop("numpy", True)
|
|
110
|
+
self.stencil = value.pop("stencil", True)
|
|
111
|
+
self.fusion = value.pop("fusion", True)
|
|
112
|
+
self.prange = value.pop("prange", True)
|
|
113
|
+
if value:
|
|
114
|
+
msg = "Unrecognized parallel options: %s" % value.keys()
|
|
115
|
+
raise NameError(msg)
|
|
116
|
+
elif isinstance(value, ParallelOptions):
|
|
117
|
+
self.enabled = value.enabled
|
|
118
|
+
self.comprehension = value.comprehension
|
|
119
|
+
self.reduction = value.reduction
|
|
120
|
+
self.inplace_binop = value.inplace_binop
|
|
121
|
+
self.setitem = value.setitem
|
|
122
|
+
self.numpy = value.numpy
|
|
123
|
+
self.stencil = value.stencil
|
|
124
|
+
self.fusion = value.fusion
|
|
125
|
+
self.prange = value.prange
|
|
126
|
+
else:
|
|
127
|
+
msg = "Expect parallel option to be either a bool or a dict"
|
|
128
|
+
raise ValueError(msg)
|
|
129
|
+
|
|
130
|
+
def _get_values(self):
|
|
131
|
+
"""Get values as dictionary."""
|
|
132
|
+
return {k: getattr(self, k) for k in self.__slots__}
|
|
133
|
+
|
|
134
|
+
def __eq__(self, other):
|
|
135
|
+
if type(other) is type(self):
|
|
136
|
+
return self._get_values() == other._get_values()
|
|
137
|
+
return NotImplemented
|
|
138
|
+
|
|
139
|
+
def encode(self) -> str:
|
|
140
|
+
return ", ".join(f"{k}={v}" for k, v in self._get_values().items())
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class InlineOptions(AbstractOptionValue):
|
|
144
|
+
"""
|
|
145
|
+
Options for controlling inlining
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
def __init__(self, value):
|
|
149
|
+
ok = False
|
|
150
|
+
if isinstance(value, str):
|
|
151
|
+
if value in ("always", "never"):
|
|
152
|
+
ok = True
|
|
153
|
+
else:
|
|
154
|
+
ok = hasattr(value, "__call__")
|
|
155
|
+
|
|
156
|
+
if ok:
|
|
157
|
+
self._inline = value
|
|
158
|
+
else:
|
|
159
|
+
msg = (
|
|
160
|
+
"kwarg 'inline' must be one of the strings 'always' or "
|
|
161
|
+
"'never', or it can be a callable that returns True/False. "
|
|
162
|
+
"Found value %s" % value
|
|
163
|
+
)
|
|
164
|
+
raise ValueError(msg)
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def is_never_inline(self):
|
|
168
|
+
"""
|
|
169
|
+
True if never inline
|
|
170
|
+
"""
|
|
171
|
+
return self._inline == "never"
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def is_always_inline(self):
|
|
175
|
+
"""
|
|
176
|
+
True if always inline
|
|
177
|
+
"""
|
|
178
|
+
return self._inline == "always"
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def has_cost_model(self):
|
|
182
|
+
"""
|
|
183
|
+
True if a cost model is provided
|
|
184
|
+
"""
|
|
185
|
+
return not (self.is_always_inline or self.is_never_inline)
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def value(self):
|
|
189
|
+
"""
|
|
190
|
+
The raw value
|
|
191
|
+
"""
|
|
192
|
+
return self._inline
|
|
193
|
+
|
|
194
|
+
def __eq__(self, other):
|
|
195
|
+
if type(other) is type(self):
|
|
196
|
+
return self.value == other.value
|
|
197
|
+
return NotImplemented
|
|
198
|
+
|
|
199
|
+
def encode(self) -> str:
|
|
200
|
+
return repr(self._inline)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class TargetOptions:
|
|
204
|
+
"""Target options maps user options from decorators to the
|
|
205
|
+
``numba.core.compiler.Flags`` used by lowering and target context.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
class Mapping:
|
|
209
|
+
def __init__(self, flag_name, apply=lambda x: x):
|
|
210
|
+
self.flag_name = flag_name
|
|
211
|
+
self.apply = apply
|
|
212
|
+
|
|
213
|
+
def finalize(self, flags, options):
|
|
214
|
+
"""Subclasses can override this method to make target specific
|
|
215
|
+
customizations of default flags.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
flags : Flags
|
|
220
|
+
options : dict
|
|
221
|
+
"""
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
@classmethod
|
|
225
|
+
def parse_as_flags(cls, flags, options):
|
|
226
|
+
"""Parse target options defined in ``options`` and set ``flags``
|
|
227
|
+
accordingly.
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
flags : Flags
|
|
232
|
+
options : dict
|
|
233
|
+
"""
|
|
234
|
+
opt = cls()
|
|
235
|
+
opt._apply(flags, options)
|
|
236
|
+
opt.finalize(flags, options)
|
|
237
|
+
return flags
|
|
238
|
+
|
|
239
|
+
def _apply(self, flags, options):
|
|
240
|
+
# Find all Mapping instances in the class
|
|
241
|
+
mappings = {}
|
|
242
|
+
cls = type(self)
|
|
243
|
+
for k in dir(cls):
|
|
244
|
+
v = getattr(cls, k)
|
|
245
|
+
if isinstance(v, cls.Mapping):
|
|
246
|
+
mappings[k] = v
|
|
247
|
+
|
|
248
|
+
used = set()
|
|
249
|
+
for k, mapping in mappings.items():
|
|
250
|
+
if k in options:
|
|
251
|
+
v = mapping.apply(options[k])
|
|
252
|
+
setattr(flags, mapping.flag_name, v)
|
|
253
|
+
used.add(k)
|
|
254
|
+
|
|
255
|
+
unused = set(options) - used
|
|
256
|
+
if unused:
|
|
257
|
+
# Unread options?
|
|
258
|
+
m = (
|
|
259
|
+
f"Unrecognized options: {unused}. "
|
|
260
|
+
f"Known options are {mappings.keys()}"
|
|
261
|
+
)
|
|
262
|
+
raise KeyError(m)
|