numba-cuda 0.21.1__cp313-cp313-win_amd64.whl → 0.23.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/api.py +4 -1
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +0 -38
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +0 -111
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/codegen.py +42 -10
- numba_cuda/numba/cuda/compiler.py +10 -4
- numba_cuda/numba/cuda/core/analysis.py +29 -21
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +4 -4
- numba_cuda/numba/cuda/core/base.py +6 -1
- numba_cuda/numba/cuda/core/consts.py +1 -1
- numba_cuda/numba/cuda/core/cuda_errors.py +917 -0
- numba_cuda/numba/cuda/core/errors.py +4 -912
- numba_cuda/numba/cuda/core/inline_closurecall.py +71 -57
- numba_cuda/numba/cuda/core/interpreter.py +79 -64
- numba_cuda/numba/cuda/core/ir.py +191 -119
- numba_cuda/numba/cuda/core/ir_utils.py +142 -112
- numba_cuda/numba/cuda/core/postproc.py +8 -8
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +6 -3
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +5 -5
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +3 -3
- numba_cuda/numba/cuda/core/ssa.py +3 -3
- numba_cuda/numba/cuda/core/transforms.py +25 -10
- numba_cuda/numba/cuda/core/typed_passes.py +9 -9
- numba_cuda/numba/cuda/core/typeinfer.py +39 -24
- numba_cuda/numba/cuda/core/untyped_passes.py +71 -55
- numba_cuda/numba/cuda/cudadecl.py +0 -13
- numba_cuda/numba/cuda/cudadrv/devicearray.py +6 -5
- numba_cuda/numba/cuda/cudadrv/driver.py +132 -511
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +16 -0
- numba_cuda/numba/cuda/cudaimpl.py +0 -12
- numba_cuda/numba/cuda/debuginfo.py +104 -10
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +4 -7
- numba_cuda/numba/cuda/dispatcher.py +36 -32
- numba_cuda/numba/cuda/intrinsics.py +150 -1
- numba_cuda/numba/cuda/lowering.py +64 -29
- numba_cuda/numba/cuda/memory_management/nrt.py +10 -14
- numba_cuda/numba/cuda/np/arrayobj.py +54 -0
- numba_cuda/numba/cuda/np/numpy_support.py +26 -0
- numba_cuda/numba/cuda/printimpl.py +20 -0
- numba_cuda/numba/cuda/serialize.py +10 -0
- numba_cuda/numba/cuda/stubs.py +0 -11
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +21 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +130 -48
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +6 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +3 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +5 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +11 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +27 -19
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +10 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +89 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py +243 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_numba_interop.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +51 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +116 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_globals.py +111 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +61 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +31 -0
- numba_cuda/numba/cuda/typing/context.py +3 -1
- numba_cuda/numba/cuda/typing/typeof.py +56 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/METADATA +1 -1
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/RECORD +74 -74
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +0 -159
- numba_cuda/numba/cuda/cext/_devicearray.h +0 -29
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -41
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.23.0.dist-info}/top_level.txt +0 -0
|
@@ -350,9 +350,9 @@ class InlineInlinables(FunctionPass):
|
|
|
350
350
|
while work_list:
|
|
351
351
|
label, block = work_list.pop()
|
|
352
352
|
for i, instr in enumerate(block.body):
|
|
353
|
-
if isinstance(instr, ir.
|
|
353
|
+
if isinstance(instr, ir.assign_types):
|
|
354
354
|
expr = instr.value
|
|
355
|
-
if isinstance(expr, ir.
|
|
355
|
+
if isinstance(expr, ir.expr_types) and expr.op == "call":
|
|
356
356
|
if guard(
|
|
357
357
|
self._do_work,
|
|
358
358
|
state,
|
|
@@ -561,14 +561,14 @@ class CanonicalizeLoopEntry(FunctionPass):
|
|
|
561
561
|
|
|
562
562
|
# Find the start of loop entry statement that needs to be included.
|
|
563
563
|
startpt = None
|
|
564
|
-
list_of_insts = list(entry_block.find_insts(ir.
|
|
564
|
+
list_of_insts = list(entry_block.find_insts(ir.assign_types))
|
|
565
565
|
for assign in reversed(list_of_insts):
|
|
566
566
|
if assign.target in deps:
|
|
567
567
|
rhs = assign.value
|
|
568
|
-
if isinstance(rhs, ir.
|
|
568
|
+
if isinstance(rhs, ir.var_types):
|
|
569
569
|
if rhs.is_temp:
|
|
570
570
|
deps.add(rhs)
|
|
571
|
-
elif isinstance(rhs, ir.
|
|
571
|
+
elif isinstance(rhs, ir.expr_types):
|
|
572
572
|
expr = rhs
|
|
573
573
|
if expr.op == "getiter":
|
|
574
574
|
startpt = assign
|
|
@@ -576,11 +576,11 @@ class CanonicalizeLoopEntry(FunctionPass):
|
|
|
576
576
|
deps.add(expr.value)
|
|
577
577
|
elif expr.op == "call":
|
|
578
578
|
defn = guard(get_definition, fir, expr.func)
|
|
579
|
-
if isinstance(defn, ir.
|
|
579
|
+
if isinstance(defn, ir.global_types):
|
|
580
580
|
if expr.func.is_temp:
|
|
581
581
|
deps.add(expr.func)
|
|
582
582
|
elif (
|
|
583
|
-
isinstance(rhs, ir.
|
|
583
|
+
isinstance(rhs, ir.global_types)
|
|
584
584
|
and rhs.value in self._supported_globals
|
|
585
585
|
):
|
|
586
586
|
startpt = assign
|
|
@@ -634,30 +634,30 @@ class MakeFunctionToJitFunction(FunctionPass):
|
|
|
634
634
|
mutated = False
|
|
635
635
|
for idx, blk in func_ir.blocks.items():
|
|
636
636
|
for stmt in blk.body:
|
|
637
|
-
if isinstance(stmt, ir.
|
|
638
|
-
if isinstance(stmt.value, ir.
|
|
637
|
+
if isinstance(stmt, ir.assign_types):
|
|
638
|
+
if isinstance(stmt.value, ir.expr_types):
|
|
639
639
|
if stmt.value.op == "make_function":
|
|
640
640
|
node = stmt.value
|
|
641
641
|
getdef = func_ir.get_definition
|
|
642
642
|
kw_default = getdef(node.defaults)
|
|
643
643
|
ok = False
|
|
644
644
|
if kw_default is None or isinstance(
|
|
645
|
-
kw_default, ir.
|
|
645
|
+
kw_default, ir.const_types
|
|
646
646
|
):
|
|
647
647
|
ok = True
|
|
648
648
|
elif isinstance(kw_default, tuple):
|
|
649
649
|
ok = all(
|
|
650
650
|
[
|
|
651
|
-
isinstance(getdef(x), ir.
|
|
651
|
+
isinstance(getdef(x), ir.const_types)
|
|
652
652
|
for x in kw_default
|
|
653
653
|
]
|
|
654
654
|
)
|
|
655
|
-
elif isinstance(kw_default, ir.
|
|
655
|
+
elif isinstance(kw_default, ir.expr_types):
|
|
656
656
|
if kw_default.op != "build_tuple":
|
|
657
657
|
continue
|
|
658
658
|
ok = all(
|
|
659
659
|
[
|
|
660
|
-
isinstance(getdef(x), ir.
|
|
660
|
+
isinstance(getdef(x), ir.const_types)
|
|
661
661
|
for x in kw_default.items
|
|
662
662
|
]
|
|
663
663
|
)
|
|
@@ -700,7 +700,10 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
700
700
|
calls = [_ for _ in blk.find_exprs("call")]
|
|
701
701
|
for call in calls:
|
|
702
702
|
glbl = guard(get_definition, func_ir, call.func)
|
|
703
|
-
if glbl and
|
|
703
|
+
if glbl and (
|
|
704
|
+
isinstance(glbl, ir.global_types)
|
|
705
|
+
or isinstance(glbl, ir.freevar_types)
|
|
706
|
+
):
|
|
704
707
|
# find a literal_unroll
|
|
705
708
|
if glbl.value is literal_unroll:
|
|
706
709
|
if len(call.args) > 1:
|
|
@@ -712,7 +715,7 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
712
715
|
unroll_var = call.args[0]
|
|
713
716
|
to_unroll = guard(get_definition, func_ir, unroll_var)
|
|
714
717
|
if (
|
|
715
|
-
isinstance(to_unroll, ir.
|
|
718
|
+
isinstance(to_unroll, ir.expr_types)
|
|
716
719
|
and to_unroll.op == "build_list"
|
|
717
720
|
):
|
|
718
721
|
# make sure they are all const items in the list
|
|
@@ -726,7 +729,7 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
726
729
|
raise errors.UnsupportedError(
|
|
727
730
|
msg % item, to_unroll.loc
|
|
728
731
|
)
|
|
729
|
-
if not isinstance(val, ir.
|
|
732
|
+
if not isinstance(val, ir.const_types):
|
|
730
733
|
msg = (
|
|
731
734
|
"Found non-constant value at "
|
|
732
735
|
"position %s in a list argument to "
|
|
@@ -777,17 +780,18 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
777
780
|
asgn.value = tup
|
|
778
781
|
mutated = True
|
|
779
782
|
elif (
|
|
780
|
-
isinstance(to_unroll, ir.
|
|
783
|
+
isinstance(to_unroll, ir.expr_types)
|
|
781
784
|
and to_unroll.op == "build_tuple"
|
|
782
785
|
):
|
|
783
786
|
# this is fine, do nothing
|
|
784
787
|
pass
|
|
785
|
-
elif
|
|
786
|
-
to_unroll,
|
|
788
|
+
elif (
|
|
789
|
+
isinstance(to_unroll, ir.global_types)
|
|
790
|
+
or isinstance(to_unroll, ir.freevar_types)
|
|
787
791
|
) and isinstance(to_unroll.value, tuple):
|
|
788
792
|
# this is fine, do nothing
|
|
789
793
|
pass
|
|
790
|
-
elif isinstance(to_unroll, ir.
|
|
794
|
+
elif isinstance(to_unroll, ir.arg_types):
|
|
791
795
|
# this is only fine if the arg is a tuple
|
|
792
796
|
ty = state.typemap[to_unroll.name]
|
|
793
797
|
if not isinstance(ty, self._accepted_types):
|
|
@@ -802,7 +806,7 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
802
806
|
)
|
|
803
807
|
else:
|
|
804
808
|
extra = None
|
|
805
|
-
if isinstance(to_unroll, ir.
|
|
809
|
+
if isinstance(to_unroll, ir.expr_types):
|
|
806
810
|
# probably a slice
|
|
807
811
|
if to_unroll.op == "getitem":
|
|
808
812
|
ty = state.typemap[to_unroll.value.name]
|
|
@@ -810,7 +814,7 @@ class TransformLiteralUnrollConstListToTuple(FunctionPass):
|
|
|
810
814
|
if not isinstance(ty, self._accepted_types):
|
|
811
815
|
extra = "operation %s" % to_unroll.op
|
|
812
816
|
loc = to_unroll.loc
|
|
813
|
-
elif isinstance(to_unroll, ir.
|
|
817
|
+
elif isinstance(to_unroll, ir.arg_types):
|
|
814
818
|
extra = "non-const argument %s" % to_unroll.name
|
|
815
819
|
loc = to_unroll.loc
|
|
816
820
|
else:
|
|
@@ -868,10 +872,10 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
868
872
|
term = None
|
|
869
873
|
if b.body:
|
|
870
874
|
term = b.body[-1]
|
|
871
|
-
if isinstance(term, ir.
|
|
875
|
+
if isinstance(term, ir.jump_types):
|
|
872
876
|
if term.target not in ignore:
|
|
873
877
|
b.body[-1] = ir.Jump(term.target + offset, term.loc)
|
|
874
|
-
if isinstance(term, ir.
|
|
878
|
+
if isinstance(term, ir.branch_types):
|
|
875
879
|
if term.truebr not in ignore:
|
|
876
880
|
new_true = term.truebr + offset
|
|
877
881
|
else:
|
|
@@ -925,7 +929,7 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
925
929
|
sentinel_blocks = []
|
|
926
930
|
for lbl, blk in switch_ir.blocks.items():
|
|
927
931
|
for i, stmt in enumerate(blk.body):
|
|
928
|
-
if isinstance(stmt, ir.
|
|
932
|
+
if isinstance(stmt, ir.assign_types):
|
|
929
933
|
if "SENTINEL" in stmt.target.name:
|
|
930
934
|
sentinel_blocks.append(lbl)
|
|
931
935
|
sentinel_exits.add(blk.body[-1].target)
|
|
@@ -939,10 +943,10 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
939
943
|
local_lbl = [x for x in loop_ir.blocks.keys()]
|
|
940
944
|
for lbl, blk in loop_ir.blocks.items():
|
|
941
945
|
for i, stmt in enumerate(blk.body):
|
|
942
|
-
if isinstance(stmt, ir.
|
|
946
|
+
if isinstance(stmt, ir.jump_types):
|
|
943
947
|
if stmt.target not in local_lbl:
|
|
944
948
|
ignore_set.add(stmt.target)
|
|
945
|
-
if isinstance(stmt, ir.
|
|
949
|
+
if isinstance(stmt, ir.branch_types):
|
|
946
950
|
if stmt.truebr not in local_lbl:
|
|
947
951
|
ignore_set.add(stmt.truebr)
|
|
948
952
|
if stmt.falsebr not in local_lbl:
|
|
@@ -968,9 +972,9 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
968
972
|
for blk in loop_blocks.values():
|
|
969
973
|
new_body = []
|
|
970
974
|
for stmt in blk.body:
|
|
971
|
-
if isinstance(stmt, ir.
|
|
975
|
+
if isinstance(stmt, ir.assign_types):
|
|
972
976
|
if (
|
|
973
|
-
isinstance(stmt.value, ir.
|
|
977
|
+
isinstance(stmt.value, ir.expr_types)
|
|
974
978
|
and stmt.value.op == "typed_getitem"
|
|
975
979
|
):
|
|
976
980
|
if isinstance(branch_ty, types.Literal):
|
|
@@ -1130,8 +1134,8 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
1130
1134
|
branches = compile_to_numba_ir(bfunc, {})
|
|
1131
1135
|
for lbl, blk in branches.blocks.items():
|
|
1132
1136
|
for stmt in blk.body:
|
|
1133
|
-
if isinstance(stmt, ir.
|
|
1134
|
-
if isinstance(stmt.value, ir.
|
|
1137
|
+
if isinstance(stmt, ir.assign_types):
|
|
1138
|
+
if isinstance(stmt.value, ir.global_types):
|
|
1135
1139
|
if stmt.value.name == "PLACEHOLDER_INDEX":
|
|
1136
1140
|
stmt.value = index
|
|
1137
1141
|
return branches
|
|
@@ -1154,12 +1158,12 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
1154
1158
|
# call to a global function "want" and returns the arguments
|
|
1155
1159
|
# supplied to that function's call
|
|
1156
1160
|
some_call = get_definition(func_ir, init_arg)
|
|
1157
|
-
if not isinstance(some_call, ir.
|
|
1161
|
+
if not isinstance(some_call, ir.expr_types):
|
|
1158
1162
|
raise GuardException
|
|
1159
1163
|
if not some_call.op == "call":
|
|
1160
1164
|
raise GuardException
|
|
1161
1165
|
the_global = get_definition(func_ir, some_call.func)
|
|
1162
|
-
if not isinstance(the_global, ir.
|
|
1166
|
+
if not isinstance(the_global, ir.global_types):
|
|
1163
1167
|
raise GuardException
|
|
1164
1168
|
if the_global.value is not want:
|
|
1165
1169
|
raise GuardException
|
|
@@ -1206,7 +1210,7 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
1206
1210
|
)
|
|
1207
1211
|
if literal_unroll_call is None:
|
|
1208
1212
|
continue
|
|
1209
|
-
if not isinstance(literal_unroll_call, ir.
|
|
1213
|
+
if not isinstance(literal_unroll_call, ir.expr_types):
|
|
1210
1214
|
continue
|
|
1211
1215
|
if literal_unroll_call.op != "call":
|
|
1212
1216
|
continue
|
|
@@ -1263,9 +1267,9 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
1263
1267
|
for lbli in loop.body:
|
|
1264
1268
|
blk = func_ir.blocks[lbli]
|
|
1265
1269
|
for stmt in blk.body:
|
|
1266
|
-
if isinstance(stmt, ir.
|
|
1270
|
+
if isinstance(stmt, ir.assign_types):
|
|
1267
1271
|
if (
|
|
1268
|
-
isinstance(stmt.value, ir.
|
|
1272
|
+
isinstance(stmt.value, ir.expr_types)
|
|
1269
1273
|
and stmt.value.op == "getitem"
|
|
1270
1274
|
):
|
|
1271
1275
|
# check for something like a[i]
|
|
@@ -1346,9 +1350,9 @@ class MixedContainerUnroller(FunctionPass):
|
|
|
1346
1350
|
for lbl in loop_info.loop.body:
|
|
1347
1351
|
blk = func_ir.blocks[lbl]
|
|
1348
1352
|
for stmt in blk.body:
|
|
1349
|
-
if isinstance(stmt, ir.
|
|
1353
|
+
if isinstance(stmt, ir.assign_types):
|
|
1350
1354
|
if (
|
|
1351
|
-
isinstance(stmt.value, ir.
|
|
1355
|
+
isinstance(stmt.value, ir.expr_types)
|
|
1352
1356
|
and stmt.value.op == "getitem"
|
|
1353
1357
|
):
|
|
1354
1358
|
# try a couple of spellings... a[i] and ref(a)[i]
|
|
@@ -1508,7 +1512,7 @@ class IterLoopCanonicalization(FunctionPass):
|
|
|
1508
1512
|
# confident that tuple unrolling is behaving require opt-in
|
|
1509
1513
|
# guard of `literal_unroll`, remove this later!
|
|
1510
1514
|
phi_val_defn = guard(get_definition, func_ir, phi.value)
|
|
1511
|
-
if not isinstance(phi_val_defn, ir.
|
|
1515
|
+
if not isinstance(phi_val_defn, ir.expr_types):
|
|
1512
1516
|
return False
|
|
1513
1517
|
if not phi_val_defn.op == "call":
|
|
1514
1518
|
return False
|
|
@@ -1518,7 +1522,7 @@ class IterLoopCanonicalization(FunctionPass):
|
|
|
1518
1522
|
func_var = guard(get_definition, func_ir, call.func)
|
|
1519
1523
|
func = guard(get_definition, func_ir, func_var)
|
|
1520
1524
|
if func is None or not isinstance(
|
|
1521
|
-
func,
|
|
1525
|
+
func, ir.global_types + ir.freevar_types
|
|
1522
1526
|
):
|
|
1523
1527
|
return False
|
|
1524
1528
|
if (
|
|
@@ -1558,9 +1562,9 @@ class IterLoopCanonicalization(FunctionPass):
|
|
|
1558
1562
|
# look for iternext
|
|
1559
1563
|
idx = 0
|
|
1560
1564
|
for stmt in entry_block.body:
|
|
1561
|
-
if isinstance(stmt, ir.
|
|
1565
|
+
if isinstance(stmt, ir.assign_types):
|
|
1562
1566
|
if (
|
|
1563
|
-
isinstance(stmt.value, ir.
|
|
1567
|
+
isinstance(stmt.value, ir.expr_types)
|
|
1564
1568
|
and stmt.value.op == "getiter"
|
|
1565
1569
|
):
|
|
1566
1570
|
break
|
|
@@ -1615,7 +1619,7 @@ class IterLoopCanonicalization(FunctionPass):
|
|
|
1615
1619
|
# replace RHS use of induction var with getitem
|
|
1616
1620
|
for lbl in check_blocks:
|
|
1617
1621
|
for stmt in func_ir.blocks[lbl].body:
|
|
1618
|
-
if isinstance(stmt, ir.
|
|
1622
|
+
if isinstance(stmt, ir.assign_types):
|
|
1619
1623
|
# check for aliases
|
|
1620
1624
|
try:
|
|
1621
1625
|
lookup = getattr(stmt.value, "name", None)
|
|
@@ -1675,15 +1679,20 @@ class PropagateLiterals(FunctionPass):
|
|
|
1675
1679
|
changed = False
|
|
1676
1680
|
|
|
1677
1681
|
for block in func_ir.blocks.values():
|
|
1678
|
-
for assign in block.find_insts(ir.
|
|
1682
|
+
for assign in block.find_insts(ir.assign_types):
|
|
1679
1683
|
value = assign.value
|
|
1680
|
-
if
|
|
1684
|
+
if (
|
|
1685
|
+
isinstance(value, ir.arg_types)
|
|
1686
|
+
or isinstance(value, ir.const_types)
|
|
1687
|
+
or isinstance(value, ir.freevar_types)
|
|
1688
|
+
or isinstance(value, ir.global_types)
|
|
1689
|
+
):
|
|
1681
1690
|
continue
|
|
1682
1691
|
|
|
1683
1692
|
# 1) Don't change return stmt in the form
|
|
1684
1693
|
# $return_xyz = cast(value=ABC)
|
|
1685
1694
|
# 2) Don't propagate literal values that are not primitives
|
|
1686
|
-
if isinstance(value, ir.
|
|
1695
|
+
if isinstance(value, ir.expr_types) and value.op in (
|
|
1687
1696
|
"cast",
|
|
1688
1697
|
"build_map",
|
|
1689
1698
|
"build_list",
|
|
@@ -1716,13 +1725,13 @@ class PropagateLiterals(FunctionPass):
|
|
|
1716
1725
|
# At the moment, one avoid propagating the literal
|
|
1717
1726
|
# value if the argument is a PHI node
|
|
1718
1727
|
|
|
1719
|
-
if isinstance(value, ir.
|
|
1728
|
+
if isinstance(value, ir.expr_types) and value.op == "call":
|
|
1720
1729
|
fn = guard(get_definition, func_ir, value.func.name)
|
|
1721
1730
|
if fn is None:
|
|
1722
1731
|
continue
|
|
1723
1732
|
|
|
1724
1733
|
if not (
|
|
1725
|
-
isinstance(fn, ir.
|
|
1734
|
+
isinstance(fn, ir.global_types)
|
|
1726
1735
|
and fn.name in accepted_functions
|
|
1727
1736
|
):
|
|
1728
1737
|
continue
|
|
@@ -1731,7 +1740,10 @@ class PropagateLiterals(FunctionPass):
|
|
|
1731
1740
|
# check if any of the args to isinstance is a PHI node
|
|
1732
1741
|
iv = func_ir._definitions[arg.name]
|
|
1733
1742
|
assert len(iv) == 1 # SSA!
|
|
1734
|
-
if
|
|
1743
|
+
if (
|
|
1744
|
+
isinstance(iv[0], ir.expr_types)
|
|
1745
|
+
and iv[0].op == "phi"
|
|
1746
|
+
):
|
|
1735
1747
|
msg = (
|
|
1736
1748
|
f"{fn.name}() cannot determine the "
|
|
1737
1749
|
f'type of variable "{arg.unversioned_name}" '
|
|
@@ -1741,7 +1753,7 @@ class PropagateLiterals(FunctionPass):
|
|
|
1741
1753
|
|
|
1742
1754
|
# Only propagate a PHI node if all arguments are the same
|
|
1743
1755
|
# constant
|
|
1744
|
-
if isinstance(value, ir.
|
|
1756
|
+
if isinstance(value, ir.expr_types) and value.op == "phi":
|
|
1745
1757
|
# typemap will return None in case `inc.name` not in typemap
|
|
1746
1758
|
v = [typemap.get(inc.name) for inc in value.incoming_values]
|
|
1747
1759
|
# stop if the elements in `v` do not hold the same value
|
|
@@ -1788,8 +1800,10 @@ class LiteralPropagationSubPipelinePass(FunctionPass):
|
|
|
1788
1800
|
found = False
|
|
1789
1801
|
func_ir = state.func_ir
|
|
1790
1802
|
for blk in func_ir.blocks.values():
|
|
1791
|
-
for asgn in blk.find_insts(ir.
|
|
1792
|
-
if isinstance(asgn.value,
|
|
1803
|
+
for asgn in blk.find_insts(ir.assign_types):
|
|
1804
|
+
if isinstance(asgn.value, ir.global_types) or isinstance(
|
|
1805
|
+
asgn.value, ir.freevar_types
|
|
1806
|
+
):
|
|
1793
1807
|
value = asgn.value.value
|
|
1794
1808
|
if value is isinstance or value is hasattr:
|
|
1795
1809
|
found = True
|
|
@@ -1835,8 +1849,10 @@ class LiteralUnroll(FunctionPass):
|
|
|
1835
1849
|
found = False
|
|
1836
1850
|
func_ir = state.func_ir
|
|
1837
1851
|
for blk in func_ir.blocks.values():
|
|
1838
|
-
for asgn in blk.find_insts(ir.
|
|
1839
|
-
if isinstance(asgn.value,
|
|
1852
|
+
for asgn in blk.find_insts(ir.assign_types):
|
|
1853
|
+
if isinstance(asgn.value, ir.global_types) or isinstance(
|
|
1854
|
+
asgn.value, ir.freevar_types
|
|
1855
|
+
):
|
|
1840
1856
|
if asgn.value.value is literal_unroll:
|
|
1841
1857
|
found = True
|
|
1842
1858
|
break
|
|
@@ -1953,7 +1969,7 @@ class RewriteDynamicRaises(FunctionPass):
|
|
|
1953
1969
|
changed = False
|
|
1954
1970
|
|
|
1955
1971
|
for block in func_ir.blocks.values():
|
|
1956
|
-
for raise_ in block.find_insts(
|
|
1972
|
+
for raise_ in block.find_insts(ir.raise_types + ir.tryraise_types):
|
|
1957
1973
|
call_inst = guard(get_definition, func_ir, raise_.exception)
|
|
1958
1974
|
if call_inst is None:
|
|
1959
1975
|
continue
|
|
@@ -103,16 +103,6 @@ class Cuda_syncwarp(ConcreteTemplate):
|
|
|
103
103
|
cases = [signature(types.none), signature(types.none, types.i4)]
|
|
104
104
|
|
|
105
105
|
|
|
106
|
-
@register
|
|
107
|
-
class Cuda_vote_sync_intrinsic(ConcreteTemplate):
|
|
108
|
-
key = cuda.vote_sync_intrinsic
|
|
109
|
-
cases = [
|
|
110
|
-
signature(
|
|
111
|
-
types.Tuple((types.i4, types.b1)), types.i4, types.i4, types.b1
|
|
112
|
-
)
|
|
113
|
-
]
|
|
114
|
-
|
|
115
|
-
|
|
116
106
|
@register
|
|
117
107
|
class Cuda_match_any_sync(ConcreteTemplate):
|
|
118
108
|
key = cuda.match_any_sync
|
|
@@ -522,9 +512,6 @@ class CudaModuleTemplate(AttributeTemplate):
|
|
|
522
512
|
def resolve_syncwarp(self, mod):
|
|
523
513
|
return types.Function(Cuda_syncwarp)
|
|
524
514
|
|
|
525
|
-
def resolve_vote_sync_intrinsic(self, mod):
|
|
526
|
-
return types.Function(Cuda_vote_sync_intrinsic)
|
|
527
|
-
|
|
528
515
|
def resolve_match_any_sync(self, mod):
|
|
529
516
|
return types.Function(Cuda_match_any_sync)
|
|
530
517
|
|
|
@@ -15,7 +15,6 @@ from ctypes import c_void_p
|
|
|
15
15
|
|
|
16
16
|
import numpy as np
|
|
17
17
|
|
|
18
|
-
from numba.cuda.cext import _devicearray
|
|
19
18
|
from numba.cuda.cudadrv import devices, dummyarray
|
|
20
19
|
from numba.cuda.cudadrv import driver as _driver
|
|
21
20
|
from numba.cuda import types
|
|
@@ -55,7 +54,7 @@ def require_cuda_ndarray(obj):
|
|
|
55
54
|
raise ValueError("require an cuda ndarray object")
|
|
56
55
|
|
|
57
56
|
|
|
58
|
-
class DeviceNDArrayBase
|
|
57
|
+
class DeviceNDArrayBase:
|
|
59
58
|
"""A on GPU NDArray representation"""
|
|
60
59
|
|
|
61
60
|
__cuda_memory__ = True
|
|
@@ -108,7 +107,9 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
|
108
107
|
else:
|
|
109
108
|
# Make NULL pointer for empty allocation
|
|
110
109
|
null = _driver.binding.CUdeviceptr(0)
|
|
111
|
-
gpu_data = _driver.MemoryPointer(
|
|
110
|
+
gpu_data = _driver.MemoryPointer(
|
|
111
|
+
context=devices.get_context(), pointer=null, size=0
|
|
112
|
+
)
|
|
112
113
|
self.alloc_size = 0
|
|
113
114
|
|
|
114
115
|
self.gpu_data = gpu_data
|
|
@@ -158,7 +159,7 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
|
158
159
|
def _default_stream(self, stream):
|
|
159
160
|
return self.stream if not stream else stream
|
|
160
161
|
|
|
161
|
-
@
|
|
162
|
+
@functools.cached_property
|
|
162
163
|
def _numba_type_(self):
|
|
163
164
|
"""
|
|
164
165
|
Magic attribute expected by Numba to get the numba type that
|
|
@@ -177,8 +178,8 @@ class DeviceNDArrayBase(_devicearray.DeviceArray):
|
|
|
177
178
|
# or 'F' does not apply for broadcast arrays, because the strides, some
|
|
178
179
|
# of which will be 0, will not match those hardcoded in for 'C' or 'F'
|
|
179
180
|
# layouts.
|
|
181
|
+
broadcast = 0 in self.strides and (self.size != 0)
|
|
180
182
|
|
|
181
|
-
broadcast = 0 in self.strides
|
|
182
183
|
if self.flags["C_CONTIGUOUS"] and not broadcast:
|
|
183
184
|
layout = "C"
|
|
184
185
|
elif self.flags["F_CONTIGUOUS"] and not broadcast:
|