numba-cuda 0.21.1__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_compat.py +47 -0
- numba_cuda/numba/cuda/api.py +4 -1
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +8 -40
- numba_cuda/numba/cuda/cext/_hashtable.cpp +5 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +1 -1
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +56 -119
- numba_cuda/numba/cuda/cext/mviewbuf.c +7 -1
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +4 -5
- numba_cuda/numba/cuda/codegen.py +46 -12
- numba_cuda/numba/cuda/compiler.py +15 -9
- numba_cuda/numba/cuda/core/analysis.py +29 -21
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +1 -1
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +4 -4
- numba_cuda/numba/cuda/core/base.py +12 -11
- numba_cuda/numba/cuda/core/bytecode.py +21 -13
- numba_cuda/numba/cuda/core/byteflow.py +336 -90
- numba_cuda/numba/cuda/core/compiler.py +3 -4
- numba_cuda/numba/cuda/core/compiler_machinery.py +3 -3
- numba_cuda/numba/cuda/core/config.py +5 -7
- numba_cuda/numba/cuda/core/consts.py +1 -1
- numba_cuda/numba/cuda/core/controlflow.py +17 -9
- numba_cuda/numba/cuda/core/cuda_errors.py +917 -0
- numba_cuda/numba/cuda/core/errors.py +4 -912
- numba_cuda/numba/cuda/core/inline_closurecall.py +82 -67
- numba_cuda/numba/cuda/core/interpreter.py +334 -160
- numba_cuda/numba/cuda/core/ir.py +191 -119
- numba_cuda/numba/cuda/core/ir_utils.py +149 -128
- numba_cuda/numba/cuda/core/postproc.py +8 -8
- numba_cuda/numba/cuda/core/pythonapi.py +3 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +6 -3
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +1 -1
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +5 -5
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +3 -3
- numba_cuda/numba/cuda/core/ssa.py +5 -5
- numba_cuda/numba/cuda/core/transforms.py +29 -16
- numba_cuda/numba/cuda/core/typed_passes.py +10 -10
- numba_cuda/numba/cuda/core/typeinfer.py +42 -27
- numba_cuda/numba/cuda/core/untyped_passes.py +82 -65
- numba_cuda/numba/cuda/cpython/unicode.py +2 -2
- numba_cuda/numba/cuda/cpython/unicode_support.py +1 -3
- numba_cuda/numba/cuda/cudadecl.py +0 -13
- numba_cuda/numba/cuda/cudadrv/devicearray.py +10 -9
- numba_cuda/numba/cuda/cudadrv/driver.py +142 -519
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +87 -32
- numba_cuda/numba/cuda/cudaimpl.py +0 -12
- numba_cuda/numba/cuda/debuginfo.py +25 -0
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +4 -7
- numba_cuda/numba/cuda/deviceufunc.py +3 -6
- numba_cuda/numba/cuda/dispatcher.py +39 -49
- numba_cuda/numba/cuda/intrinsics.py +150 -1
- numba_cuda/numba/cuda/libdeviceimpl.py +1 -2
- numba_cuda/numba/cuda/lowering.py +36 -29
- numba_cuda/numba/cuda/memory_management/nrt.py +10 -14
- numba_cuda/numba/cuda/np/arrayobj.py +61 -9
- numba_cuda/numba/cuda/np/numpy_support.py +32 -9
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +4 -3
- numba_cuda/numba/cuda/printimpl.py +20 -0
- numba_cuda/numba/cuda/serialize.py +10 -0
- numba_cuda/numba/cuda/stubs.py +0 -11
- numba_cuda/numba/cuda/testing.py +4 -8
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +21 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +195 -51
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +6 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +3 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +6 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +11 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +53 -23
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +61 -9
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +6 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +22 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +13 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py +243 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_numba_interop.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +51 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +37 -35
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +117 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_globals.py +111 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +61 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +31 -0
- numba_cuda/numba/cuda/tests/support.py +11 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +1 -1
- numba_cuda/numba/cuda/typing/asnumbatype.py +37 -2
- numba_cuda/numba/cuda/typing/context.py +3 -1
- numba_cuda/numba/cuda/typing/typeof.py +51 -2
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/METADATA +4 -13
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/RECORD +106 -105
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +0 -159
- numba_cuda/numba/cuda/cext/_devicearray.h +0 -29
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -41
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/top_level.txt +0 -0
|
@@ -350,9 +350,9 @@ class InlineInlinables(FunctionPass):
|
|
|
350
350
|
while work_list:
|
|
351
351
|
label, block = work_list.pop()
|
|
352
352
|
for i, instr in enumerate(block.body):
|
|
353
|
-
if isinstance(instr, ir.
|
|
353
|
+
if isinstance(instr, ir.assign_types):
|
|
354
354
|
expr = instr.value
|
|
355
|
-
if isinstance(expr, ir.
|
|
355
|
+
if isinstance(expr, ir.expr_types) and expr.op == "call":
|
|
356
356
|
if guard(
|
|
357
357
|
self._do_work,
|
|
358
358
|
state,
|
|
@@ -561,14 +561,14 @@ class CanonicalizeLoopEntry(FunctionPass):
|
|
|
561
561
|
|
|
562
562
|
# Find the start of loop entry statement that needs to be included.
|
|
563
563
|
startpt = None
|
|
564
|
-
list_of_insts = list(entry_block.find_insts(ir.
|
|
564
|
+
list_of_insts = list(entry_block.find_insts(ir.assign_types))
|
|
565
565
|
for assign in reversed(list_of_insts):
|
|
566
566
|
if assign.target in deps:
|
|
567
567
|
rhs = assign.value
|
|
568
|
-
if isinstance(rhs, ir.
|
|
568
|
+
if isinstance(rhs, ir.var_types):
|
|
569
569
|
if rhs.is_temp:
|
|
570
570
|
deps.add(rhs)
|
|
571
|
-
elif isinstance(rhs, ir.
|
|
571
|
+
elif isinstance(rhs, ir.expr_types):
|
|
572
572
|
expr = rhs
|
|
573
573
|
if expr.op == "getiter":
|
|
574
574
|
startpt = assign
|
|
@@ -576,11 +576,11 @@ class CanonicalizeLoopEntry(FunctionPass):
|
|
|
576
576
|
deps.add(expr.value)
|
|
577
577
|
elif expr.op == "call":
|
|
578
578
|
defn = guard(get_definition, fir, expr.func)
|
|
579
|
-
if isinstance(defn, ir.
|
|
579
|
+
if isinstance(defn, ir.global_types):
|
|
580
580
|
if expr.func.is_temp:
|
|
581
581
|
deps.add(expr.func)
|
|
582
582
|
elif (
|
|
583
|
-
isinstance(rhs, ir.
|
|
583
|
+
isinstance(rhs, ir.global_types)
|
|
584
584
|
and rhs.value in self._supported_globals
|
|
585
585
|
):
|
|
586
586
|
startpt = assign
|
|
@@ -632,32 +632,32 @@ class MakeFunctionToJitFunction(FunctionPass):
|
|
|
632
632
|
def run_pass(self, state):
|
|
633
633
|
func_ir = state.func_ir
|
|
634
634
|
mutated = False
|
|
635
|
-
for
|
|
635
|
+
for blk in func_ir.blocks.values():
|
|
636
636
|
for stmt in blk.body:
|
|
637
|
-
if isinstance(stmt, ir.
|
|
638
|
-
if isinstance(stmt.value, ir.
|
|
637
|
+
if isinstance(stmt, ir.assign_types):
|
|
638
|
+
if isinstance(stmt.value, ir.expr_types):
|
|
639
639
|
if stmt.value.op == "make_function":
|
|
640
640
|
node = stmt.value
|
|
641
641
|
getdef = func_ir.get_definition
|
|
642
642
|
kw_default = getdef(node.defaults)
|
|
643
643
|
ok = False
|
|
644
644
|
if kw_default is None or isinstance(
|
|
645
|
-
kw_default, ir.
|
|
645
|
+
kw_default, ir.const_types
|
|
646
646
|
):
|
|
647
647
|
ok = True
|
|
648
648
|
elif isinstance(kw_default, tuple):
|
|
649
649
|
ok = all(
|
|
650
650
|
[
|
|
651
|
-
isinstance(getdef(x), ir.
|
|
651
|
+
isinstance(getdef(x), ir.const_types)
|
|
652
652
|
for x in kw_default
|
|
653
653
|
]
|
|
654
654
|
)
|
|
655
|
-
elif isinstance(kw_default, ir.
|
|
655
|
+
elif isinstance(kw_default, ir.expr_types):
|
|
656
656
|
if kw_default.op != "build_tuple":
|
|
657
657
|
continue
|
|
658
658
|
ok = all(
|
|
659
659
|
[
|
|
660
|
-
isinstance(getdef(x), ir.
|
|
660
|
+
isinstance(getdef(x), ir.const_types)
|
|
661
661
|
for x in kw_default.items
|
|
662
662
|
]
|
|
663
663
|
)
|
|
@@ -696,11 +696,14 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
696
696
|
def run_pass(self, state):
|
|
697
697
|
mutated = False
|
|
698
698
|
func_ir = state.func_ir
|
|
699
|
-
for
|
|
699
|
+
for blk in func_ir.blocks.values():
|
|
700
700
|
calls = [_ for _ in blk.find_exprs("call")]
|
|
701
701
|
for call in calls:
|
|
702
702
|
glbl = guard(get_definition, func_ir, call.func)
|
|
703
|
-
if glbl and
|
|
703
|
+
if glbl and (
|
|
704
|
+
isinstance(glbl, ir.global_types)
|
|
705
|
+
or isinstance(glbl, ir.freevar_types)
|
|
706
|
+
):
|
|
704
707
|
# find a literal_unroll
|
|
705
708
|
if glbl.value is literal_unroll:
|
|
706
709
|
if len(call.args) > 1:
|
|
@@ -712,7 +715,7 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
712
715
|
unroll_var = call.args[0]
|
|
713
716
|
to_unroll = guard(get_definition, func_ir, unroll_var)
|
|
714
717
|
if (
|
|
715
|
-
isinstance(to_unroll, ir.
|
|
718
|
+
isinstance(to_unroll, ir.expr_types)
|
|
716
719
|
and to_unroll.op == "build_list"
|
|
717
720
|
):
|
|
718
721
|
# make sure they are all const items in the list
|
|
@@ -726,7 +729,7 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
726
729
|
raise errors.UnsupportedError(
|
|
727
730
|
msg % item, to_unroll.loc
|
|
728
731
|
)
|
|
729
|
-
if not isinstance(val, ir.
|
|
732
|
+
if not isinstance(val, ir.const_types):
|
|
730
733
|
msg = (
|
|
731
734
|
"Found non-constant value at "
|
|
732
735
|
"position %s in a list argument to "
|
|
@@ -777,17 +780,18 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
777
780
|
asgn.value = tup
|
|
778
781
|
mutated = True
|
|
779
782
|
elif (
|
|
780
|
-
isinstance(to_unroll, ir.
|
|
783
|
+
isinstance(to_unroll, ir.expr_types)
|
|
781
784
|
and to_unroll.op == "build_tuple"
|
|
782
785
|
):
|
|
783
786
|
# this is fine, do nothing
|
|
784
787
|
pass
|
|
785
|
-
elif
|
|
786
|
-
to_unroll,
|
|
788
|
+
elif (
|
|
789
|
+
isinstance(to_unroll, ir.global_types)
|
|
790
|
+
or isinstance(to_unroll, ir.freevar_types)
|
|
787
791
|
) and isinstance(to_unroll.value, tuple):
|
|
788
792
|
# this is fine, do nothing
|
|
789
793
|
pass
|
|
790
|
-
elif isinstance(to_unroll, ir.
|
|
794
|
+
elif isinstance(to_unroll, ir.arg_types):
|
|
791
795
|
# this is only fine if the arg is a tuple
|
|
792
796
|
ty = state.typemap[to_unroll.name]
|
|
793
797
|
if not isinstance(ty, self._accepted_types):
|
|
@@ -802,7 +806,7 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
802
806
|
)
|
|
803
807
|
else:
|
|
804
808
|
extra = None
|
|
805
|
-
if isinstance(to_unroll, ir.
|
|
809
|
+
if isinstance(to_unroll, ir.expr_types):
|
|
806
810
|
# probably a slice
|
|
807
811
|
if to_unroll.op == "getitem":
|
|
808
812
|
ty = state.typemap[to_unroll.value.name]
|
|
@@ -810,7 +814,7 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
810
814
|
if not isinstance(ty, self._accepted_types):
|
|
811
815
|
extra = "operation %s" % to_unroll.op
|
|
812
816
|
loc = to_unroll.loc
|
|
813
|
-
elif isinstance(to_unroll, ir.
|
|
817
|
+
elif isinstance(to_unroll, ir.arg_types):
|
|
814
818
|
extra = "non-const argument %s" % to_unroll.name
|
|
815
819
|
loc = to_unroll.loc
|
|
816
820
|
else:
|
|
@@ -868,10 +872,10 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
868
872
|
term = None
|
|
869
873
|
if b.body:
|
|
870
874
|
term = b.body[-1]
|
|
871
|
-
if isinstance(term, ir.
|
|
875
|
+
if isinstance(term, ir.jump_types):
|
|
872
876
|
if term.target not in ignore:
|
|
873
877
|
b.body[-1] = ir.Jump(term.target + offset, term.loc)
|
|
874
|
-
if isinstance(term, ir.
|
|
878
|
+
if isinstance(term, ir.branch_types):
|
|
875
879
|
if term.truebr not in ignore:
|
|
876
880
|
new_true = term.truebr + offset
|
|
877
881
|
else:
|
|
@@ -925,7 +929,7 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
925
929
|
sentinel_blocks = []
|
|
926
930
|
for lbl, blk in switch_ir.blocks.items():
|
|
927
931
|
for i, stmt in enumerate(blk.body):
|
|
928
|
-
if isinstance(stmt, ir.
|
|
932
|
+
if isinstance(stmt, ir.assign_types):
|
|
929
933
|
if "SENTINEL" in stmt.target.name:
|
|
930
934
|
sentinel_blocks.append(lbl)
|
|
931
935
|
sentinel_exits.add(blk.body[-1].target)
|
|
@@ -939,10 +943,10 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
939
943
|
local_lbl = [x for x in loop_ir.blocks.keys()]
|
|
940
944
|
for lbl, blk in loop_ir.blocks.items():
|
|
941
945
|
for i, stmt in enumerate(blk.body):
|
|
942
|
-
if isinstance(stmt, ir.
|
|
946
|
+
if isinstance(stmt, ir.jump_types):
|
|
943
947
|
if stmt.target not in local_lbl:
|
|
944
948
|
ignore_set.add(stmt.target)
|
|
945
|
-
if isinstance(stmt, ir.
|
|
949
|
+
if isinstance(stmt, ir.branch_types):
|
|
946
950
|
if stmt.truebr not in local_lbl:
|
|
947
951
|
ignore_set.add(stmt.truebr)
|
|
948
952
|
if stmt.falsebr not in local_lbl:
|
|
@@ -968,9 +972,9 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
968
972
|
for blk in loop_blocks.values():
|
|
969
973
|
new_body = []
|
|
970
974
|
for stmt in blk.body:
|
|
971
|
-
if isinstance(stmt, ir.
|
|
975
|
+
if isinstance(stmt, ir.assign_types):
|
|
972
976
|
if (
|
|
973
|
-
isinstance(stmt.value, ir.
|
|
977
|
+
isinstance(stmt.value, ir.expr_types)
|
|
974
978
|
and stmt.value.op == "typed_getitem"
|
|
975
979
|
):
|
|
976
980
|
if isinstance(branch_ty, types.Literal):
|
|
@@ -1119,19 +1123,20 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
1119
1123
|
)
|
|
1120
1124
|
keys = [k for k in data.keys()]
|
|
1121
1125
|
|
|
1122
|
-
elifs = [
|
|
1123
|
-
|
|
1124
|
-
|
|
1126
|
+
elifs = [
|
|
1127
|
+
elif_tplt % ",".join(map(str, data[keys[i]]))
|
|
1128
|
+
for i in range(1, len(keys))
|
|
1129
|
+
]
|
|
1125
1130
|
src = b % (",".join(map(str, data[keys[0]])), "".join(elifs))
|
|
1126
1131
|
wstr = src
|
|
1127
1132
|
l = {}
|
|
1128
1133
|
exec(wstr, {}, l)
|
|
1129
1134
|
bfunc = l["foo"]
|
|
1130
1135
|
branches = compile_to_numba_ir(bfunc, {})
|
|
1131
|
-
for
|
|
1136
|
+
for blk in branches.blocks.values():
|
|
1132
1137
|
for stmt in blk.body:
|
|
1133
|
-
if isinstance(stmt, ir.
|
|
1134
|
-
if isinstance(stmt.value, ir.
|
|
1138
|
+
if isinstance(stmt, ir.assign_types):
|
|
1139
|
+
if isinstance(stmt.value, ir.global_types):
|
|
1135
1140
|
if stmt.value.name == "PLACEHOLDER_INDEX":
|
|
1136
1141
|
stmt.value = index
|
|
1137
1142
|
return branches
|
|
@@ -1154,12 +1159,12 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
1154
1159
|
# call to a global function "want" and returns the arguments
|
|
1155
1160
|
# supplied to that function's call
|
|
1156
1161
|
some_call = get_definition(func_ir, init_arg)
|
|
1157
|
-
if not isinstance(some_call, ir.
|
|
1162
|
+
if not isinstance(some_call, ir.expr_types):
|
|
1158
1163
|
raise GuardException
|
|
1159
1164
|
if not some_call.op == "call":
|
|
1160
1165
|
raise GuardException
|
|
1161
1166
|
the_global = get_definition(func_ir, some_call.func)
|
|
1162
|
-
if not isinstance(the_global, ir.
|
|
1167
|
+
if not isinstance(the_global, ir.global_types):
|
|
1163
1168
|
raise GuardException
|
|
1164
1169
|
if the_global.value is not want:
|
|
1165
1170
|
raise GuardException
|
|
@@ -1169,7 +1174,7 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
1169
1174
|
"""This finds loops which are compliant with the form:
|
|
1170
1175
|
for i in range(len(literal_unroll(<something>>)))"""
|
|
1171
1176
|
unroll_loops = {}
|
|
1172
|
-
for
|
|
1177
|
+
for loop in loops.values():
|
|
1173
1178
|
# TODO: check the loop head has literal_unroll, if it does but
|
|
1174
1179
|
# does not conform to the following then raise
|
|
1175
1180
|
|
|
@@ -1206,7 +1211,7 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
1206
1211
|
)
|
|
1207
1212
|
if literal_unroll_call is None:
|
|
1208
1213
|
continue
|
|
1209
|
-
if not isinstance(literal_unroll_call, ir.
|
|
1214
|
+
if not isinstance(literal_unroll_call, ir.expr_types):
|
|
1210
1215
|
continue
|
|
1211
1216
|
if literal_unroll_call.op != "call":
|
|
1212
1217
|
continue
|
|
@@ -1263,9 +1268,9 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
1263
1268
|
for lbli in loop.body:
|
|
1264
1269
|
blk = func_ir.blocks[lbli]
|
|
1265
1270
|
for stmt in blk.body:
|
|
1266
|
-
if isinstance(stmt, ir.
|
|
1271
|
+
if isinstance(stmt, ir.assign_types):
|
|
1267
1272
|
if (
|
|
1268
|
-
isinstance(stmt.value, ir.
|
|
1273
|
+
isinstance(stmt.value, ir.expr_types)
|
|
1269
1274
|
and stmt.value.op == "getitem"
|
|
1270
1275
|
):
|
|
1271
1276
|
# check for something like a[i]
|
|
@@ -1346,9 +1351,9 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
1346
1351
|
for lbl in loop_info.loop.body:
|
|
1347
1352
|
blk = func_ir.blocks[lbl]
|
|
1348
1353
|
for stmt in blk.body:
|
|
1349
|
-
if isinstance(stmt, ir.
|
|
1354
|
+
if isinstance(stmt, ir.assign_types):
|
|
1350
1355
|
if (
|
|
1351
|
-
isinstance(stmt.value, ir.
|
|
1356
|
+
isinstance(stmt.value, ir.expr_types)
|
|
1352
1357
|
and stmt.value.op == "getitem"
|
|
1353
1358
|
):
|
|
1354
1359
|
# try a couple of spellings... a[i] and ref(a)[i]
|
|
@@ -1508,7 +1513,7 @@ class IterLoopCanonicalization(FunctionPass):
|
|
|
1508
1513
|
# confident that tuple unrolling is behaving require opt-in
|
|
1509
1514
|
# guard of `literal_unroll`, remove this later!
|
|
1510
1515
|
phi_val_defn = guard(get_definition, func_ir, phi.value)
|
|
1511
|
-
if not isinstance(phi_val_defn, ir.
|
|
1516
|
+
if not isinstance(phi_val_defn, ir.expr_types):
|
|
1512
1517
|
return False
|
|
1513
1518
|
if not phi_val_defn.op == "call":
|
|
1514
1519
|
return False
|
|
@@ -1518,7 +1523,7 @@ class IterLoopCanonicalization(FunctionPass):
|
|
|
1518
1523
|
func_var = guard(get_definition, func_ir, call.func)
|
|
1519
1524
|
func = guard(get_definition, func_ir, func_var)
|
|
1520
1525
|
if func is None or not isinstance(
|
|
1521
|
-
func,
|
|
1526
|
+
func, ir.global_types + ir.freevar_types
|
|
1522
1527
|
):
|
|
1523
1528
|
return False
|
|
1524
1529
|
if (
|
|
@@ -1558,9 +1563,9 @@ class IterLoopCanonicalization(FunctionPass):
|
|
|
1558
1563
|
# look for iternext
|
|
1559
1564
|
idx = 0
|
|
1560
1565
|
for stmt in entry_block.body:
|
|
1561
|
-
if isinstance(stmt, ir.
|
|
1566
|
+
if isinstance(stmt, ir.assign_types):
|
|
1562
1567
|
if (
|
|
1563
|
-
isinstance(stmt.value, ir.
|
|
1568
|
+
isinstance(stmt.value, ir.expr_types)
|
|
1564
1569
|
and stmt.value.op == "getiter"
|
|
1565
1570
|
):
|
|
1566
1571
|
break
|
|
@@ -1601,7 +1606,7 @@ class IterLoopCanonicalization(FunctionPass):
|
|
|
1601
1606
|
for x in induction_vars:
|
|
1602
1607
|
try: # there's not always an alias, e.g. loop from inlined closure
|
|
1603
1608
|
tmp.add(func_ir.get_assignee(x, loop.header))
|
|
1604
|
-
except ValueError:
|
|
1609
|
+
except ValueError: # noqa: PERF203
|
|
1605
1610
|
pass
|
|
1606
1611
|
induction_vars |= tmp
|
|
1607
1612
|
induction_var_names = set([x.name for x in induction_vars])
|
|
@@ -1615,7 +1620,7 @@ class IterLoopCanonicalization(FunctionPass):
|
|
|
1615
1620
|
# replace RHS use of induction var with getitem
|
|
1616
1621
|
for lbl in check_blocks:
|
|
1617
1622
|
for stmt in func_ir.blocks[lbl].body:
|
|
1618
|
-
if isinstance(stmt, ir.
|
|
1623
|
+
if isinstance(stmt, ir.assign_types):
|
|
1619
1624
|
# check for aliases
|
|
1620
1625
|
try:
|
|
1621
1626
|
lookup = getattr(stmt.value, "name", None)
|
|
@@ -1635,7 +1640,7 @@ class IterLoopCanonicalization(FunctionPass):
|
|
|
1635
1640
|
loops = cfg.loops()
|
|
1636
1641
|
|
|
1637
1642
|
mutated = False
|
|
1638
|
-
for
|
|
1643
|
+
for loop in loops.values():
|
|
1639
1644
|
stat = self.assess_loop(loop, func_ir, state.typemap)
|
|
1640
1645
|
if stat:
|
|
1641
1646
|
if self._DEBUG:
|
|
@@ -1675,15 +1680,20 @@ class PropagateLiterals(FunctionPass):
|
|
|
1675
1680
|
changed = False
|
|
1676
1681
|
|
|
1677
1682
|
for block in func_ir.blocks.values():
|
|
1678
|
-
for assign in block.find_insts(ir.
|
|
1683
|
+
for assign in block.find_insts(ir.assign_types):
|
|
1679
1684
|
value = assign.value
|
|
1680
|
-
if
|
|
1685
|
+
if (
|
|
1686
|
+
isinstance(value, ir.arg_types)
|
|
1687
|
+
or isinstance(value, ir.const_types)
|
|
1688
|
+
or isinstance(value, ir.freevar_types)
|
|
1689
|
+
or isinstance(value, ir.global_types)
|
|
1690
|
+
):
|
|
1681
1691
|
continue
|
|
1682
1692
|
|
|
1683
1693
|
# 1) Don't change return stmt in the form
|
|
1684
1694
|
# $return_xyz = cast(value=ABC)
|
|
1685
1695
|
# 2) Don't propagate literal values that are not primitives
|
|
1686
|
-
if isinstance(value, ir.
|
|
1696
|
+
if isinstance(value, ir.expr_types) and value.op in (
|
|
1687
1697
|
"cast",
|
|
1688
1698
|
"build_map",
|
|
1689
1699
|
"build_list",
|
|
@@ -1716,13 +1726,13 @@ class PropagateLiterals(FunctionPass):
|
|
|
1716
1726
|
# At the moment, one avoid propagating the literal
|
|
1717
1727
|
# value if the argument is a PHI node
|
|
1718
1728
|
|
|
1719
|
-
if isinstance(value, ir.
|
|
1729
|
+
if isinstance(value, ir.expr_types) and value.op == "call":
|
|
1720
1730
|
fn = guard(get_definition, func_ir, value.func.name)
|
|
1721
1731
|
if fn is None:
|
|
1722
1732
|
continue
|
|
1723
1733
|
|
|
1724
1734
|
if not (
|
|
1725
|
-
isinstance(fn, ir.
|
|
1735
|
+
isinstance(fn, ir.global_types)
|
|
1726
1736
|
and fn.name in accepted_functions
|
|
1727
1737
|
):
|
|
1728
1738
|
continue
|
|
@@ -1731,7 +1741,10 @@ class PropagateLiterals(FunctionPass):
|
|
|
1731
1741
|
# check if any of the args to isinstance is a PHI node
|
|
1732
1742
|
iv = func_ir._definitions[arg.name]
|
|
1733
1743
|
assert len(iv) == 1 # SSA!
|
|
1734
|
-
if
|
|
1744
|
+
if (
|
|
1745
|
+
isinstance(iv[0], ir.expr_types)
|
|
1746
|
+
and iv[0].op == "phi"
|
|
1747
|
+
):
|
|
1735
1748
|
msg = (
|
|
1736
1749
|
f"{fn.name}() cannot determine the "
|
|
1737
1750
|
f'type of variable "{arg.unversioned_name}" '
|
|
@@ -1741,7 +1754,7 @@ class PropagateLiterals(FunctionPass):
|
|
|
1741
1754
|
|
|
1742
1755
|
# Only propagate a PHI node if all arguments are the same
|
|
1743
1756
|
# constant
|
|
1744
|
-
if isinstance(value, ir.
|
|
1757
|
+
if isinstance(value, ir.expr_types) and value.op == "phi":
|
|
1745
1758
|
# typemap will return None in case `inc.name` not in typemap
|
|
1746
1759
|
v = [typemap.get(inc.name) for inc in value.incoming_values]
|
|
1747
1760
|
# stop if the elements in `v` do not hold the same value
|
|
@@ -1788,8 +1801,10 @@ class LiteralPropagationSubPipelinePass(FunctionPass):
|
|
|
1788
1801
|
found = False
|
|
1789
1802
|
func_ir = state.func_ir
|
|
1790
1803
|
for blk in func_ir.blocks.values():
|
|
1791
|
-
for asgn in blk.find_insts(ir.
|
|
1792
|
-
if isinstance(asgn.value,
|
|
1804
|
+
for asgn in blk.find_insts(ir.assign_types):
|
|
1805
|
+
if isinstance(asgn.value, ir.global_types) or isinstance(
|
|
1806
|
+
asgn.value, ir.freevar_types
|
|
1807
|
+
):
|
|
1793
1808
|
value = asgn.value.value
|
|
1794
1809
|
if value is isinstance or value is hasattr:
|
|
1795
1810
|
found = True
|
|
@@ -1835,8 +1850,10 @@ class LiteralUnroll(FunctionPass):
|
|
|
1835
1850
|
found = False
|
|
1836
1851
|
func_ir = state.func_ir
|
|
1837
1852
|
for blk in func_ir.blocks.values():
|
|
1838
|
-
for asgn in blk.find_insts(ir.
|
|
1839
|
-
if isinstance(asgn.value,
|
|
1853
|
+
for asgn in blk.find_insts(ir.assign_types):
|
|
1854
|
+
if isinstance(asgn.value, ir.global_types) or isinstance(
|
|
1855
|
+
asgn.value, ir.freevar_types
|
|
1856
|
+
):
|
|
1840
1857
|
if asgn.value.value is literal_unroll:
|
|
1841
1858
|
found = True
|
|
1842
1859
|
break
|
|
@@ -1953,7 +1970,7 @@ class RewriteDynamicRaises(FunctionPass):
|
|
|
1953
1970
|
changed = False
|
|
1954
1971
|
|
|
1955
1972
|
for block in func_ir.blocks.values():
|
|
1956
|
-
for raise_ in block.find_insts(
|
|
1973
|
+
for raise_ in block.find_insts(ir.raise_types + ir.tryraise_types):
|
|
1957
1974
|
call_inst = guard(get_definition, func_ir, raise_.exception)
|
|
1958
1975
|
if call_inst is None:
|
|
1959
1976
|
continue
|
|
@@ -1963,7 +1980,7 @@ class RewriteDynamicRaises(FunctionPass):
|
|
|
1963
1980
|
try:
|
|
1964
1981
|
const = func_ir.infer_constant(exc_arg)
|
|
1965
1982
|
exc_args.append(const)
|
|
1966
|
-
except consts.ConstantInferenceError:
|
|
1983
|
+
except consts.ConstantInferenceError: # noqa: PERF203
|
|
1967
1984
|
exc_args.append(exc_arg)
|
|
1968
1985
|
loc = raise_.loc
|
|
1969
1986
|
|
|
@@ -394,7 +394,7 @@ def _set_code_point(a, i, ch):
|
|
|
394
394
|
)
|
|
395
395
|
|
|
396
396
|
|
|
397
|
-
if PYVERSION in ((3, 12), (3, 13)):
|
|
397
|
+
if PYVERSION in ((3, 12), (3, 13), (3, 14)):
|
|
398
398
|
|
|
399
399
|
@register_jitable
|
|
400
400
|
def _pick_kind(kind1, kind2):
|
|
@@ -442,7 +442,7 @@ def _pick_ascii(is_ascii1, is_ascii2):
|
|
|
442
442
|
return types.uint32(0)
|
|
443
443
|
|
|
444
444
|
|
|
445
|
-
if PYVERSION in ((3, 12), (3, 13)):
|
|
445
|
+
if PYVERSION in ((3, 12), (3, 13), (3, 14)):
|
|
446
446
|
|
|
447
447
|
@register_jitable
|
|
448
448
|
def _kind_to_byte_width(kind):
|
|
@@ -125,9 +125,7 @@ def _gettyperecord_impl(typingctx, codepoint):
|
|
|
125
125
|
|
|
126
126
|
byref = [upper, lower, title, decimal, digit, flags]
|
|
127
127
|
builder.call(fn, [args[0]] + byref)
|
|
128
|
-
buf =
|
|
129
|
-
for x in byref:
|
|
130
|
-
buf.append(builder.load(x))
|
|
128
|
+
buf = list(map(builder.load, byref))
|
|
131
129
|
|
|
132
130
|
res = context.make_tuple(builder, signature.return_type, tuple(buf))
|
|
133
131
|
return impl_ret_untracked(context, builder, signature.return_type, res)
|
|
@@ -103,16 +103,6 @@ class Cuda_syncwarp(ConcreteTemplate):
|
|
|
103
103
|
cases = [signature(types.none), signature(types.none, types.i4)]
|
|
104
104
|
|
|
105
105
|
|
|
106
|
-
@register
|
|
107
|
-
class Cuda_vote_sync_intrinsic(ConcreteTemplate):
|
|
108
|
-
key = cuda.vote_sync_intrinsic
|
|
109
|
-
cases = [
|
|
110
|
-
signature(
|
|
111
|
-
types.Tuple((types.i4, types.b1)), types.i4, types.i4, types.b1
|
|
112
|
-
)
|
|
113
|
-
]
|
|
114
|
-
|
|
115
|
-
|
|
116
106
|
@register
|
|
117
107
|
class Cuda_match_any_sync(ConcreteTemplate):
|
|
118
108
|
key = cuda.match_any_sync
|
|
@@ -522,9 +512,6 @@ class CudaModuleTemplate(AttributeTemplate):
|
|
|
522
512
|
def resolve_syncwarp(self, mod):
|
|
523
513
|
return types.Function(Cuda_syncwarp)
|
|
524
514
|
|
|
525
|
-
def resolve_vote_sync_intrinsic(self, mod):
|
|
526
|
-
return types.Function(Cuda_vote_sync_intrinsic)
|
|
527
|
-
|
|
528
515
|
def resolve_match_any_sync(self, mod):
|
|
529
516
|
return types.Function(Cuda_match_any_sync)
|
|
530
517
|
|
|
@@ -15,7 +15,6 @@ from ctypes import c_void_p
|
|
|
15
15
|
|
|
16
16
|
import numpy as np
|
|
17
17
|
|
|
18
|
-
from numba.cuda.cext import _devicearray
|
|
19
18
|
from numba.cuda.cudadrv import devices, dummyarray
|
|
20
19
|
from numba.cuda.cudadrv import driver as _driver
|
|
21
20
|
from numba.cuda import types
|
|
@@ -55,7 +54,7 @@ def require_cuda_ndarray(obj):
|
|
|
55
54
|
raise ValueError("require an cuda ndarray object")
|
|
56
55
|
|
|
57
56
|
|
|
58
|
-
class DeviceNDArrayBase
|
|
57
|
+
class DeviceNDArrayBase:
|
|
59
58
|
"""A on GPU NDArray representation"""
|
|
60
59
|
|
|
61
60
|
__cuda_memory__ = True
|
|
@@ -108,7 +107,9 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
|
108
107
|
else:
|
|
109
108
|
# Make NULL pointer for empty allocation
|
|
110
109
|
null = _driver.binding.CUdeviceptr(0)
|
|
111
|
-
gpu_data = _driver.MemoryPointer(
|
|
110
|
+
gpu_data = _driver.MemoryPointer(
|
|
111
|
+
context=devices.get_context(), pointer=null, size=0
|
|
112
|
+
)
|
|
112
113
|
self.alloc_size = 0
|
|
113
114
|
|
|
114
115
|
self.gpu_data = gpu_data
|
|
@@ -158,7 +159,7 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
|
158
159
|
def _default_stream(self, stream):
|
|
159
160
|
return self.stream if not stream else stream
|
|
160
161
|
|
|
161
|
-
@
|
|
162
|
+
@functools.cached_property
|
|
162
163
|
def _numba_type_(self):
|
|
163
164
|
"""
|
|
164
165
|
Magic attribute expected by Numba to get the numba type that
|
|
@@ -177,8 +178,8 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
|
177
178
|
# or 'F' does not apply for broadcast arrays, because the strides, some
|
|
178
179
|
# of which will be 0, will not match those hardcoded in for 'C' or 'F'
|
|
179
180
|
# layouts.
|
|
181
|
+
broadcast = 0 in self.strides and (self.size != 0)
|
|
180
182
|
|
|
181
|
-
broadcast = 0 in self.strides
|
|
182
183
|
if self.flags["C_CONTIGUOUS"] and not broadcast:
|
|
183
184
|
layout = "C"
|
|
184
185
|
elif self.flags["F_CONTIGUOUS"] and not broadcast:
|
|
@@ -851,10 +852,10 @@ def array_core(ary):
|
|
|
851
852
|
"""
|
|
852
853
|
if not ary.strides or not ary.size:
|
|
853
854
|
return ary
|
|
854
|
-
core_index =
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
return ary[
|
|
855
|
+
core_index = tuple(
|
|
856
|
+
0 if stride == 0 else slice(None) for stride in ary.strides
|
|
857
|
+
)
|
|
858
|
+
return ary[core_index]
|
|
858
859
|
|
|
859
860
|
|
|
860
861
|
def is_contiguous(ary):
|