numba-cuda 0.21.1__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_compat.py +47 -0
- numba_cuda/numba/cuda/api.py +4 -1
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +8 -40
- numba_cuda/numba/cuda/cext/_hashtable.cpp +5 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +1 -1
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +56 -119
- numba_cuda/numba/cuda/cext/mviewbuf.c +7 -1
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +4 -5
- numba_cuda/numba/cuda/codegen.py +46 -12
- numba_cuda/numba/cuda/compiler.py +15 -9
- numba_cuda/numba/cuda/core/analysis.py +29 -21
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +1 -1
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +4 -4
- numba_cuda/numba/cuda/core/base.py +12 -11
- numba_cuda/numba/cuda/core/bytecode.py +21 -13
- numba_cuda/numba/cuda/core/byteflow.py +336 -90
- numba_cuda/numba/cuda/core/compiler.py +3 -4
- numba_cuda/numba/cuda/core/compiler_machinery.py +3 -3
- numba_cuda/numba/cuda/core/config.py +5 -7
- numba_cuda/numba/cuda/core/consts.py +1 -1
- numba_cuda/numba/cuda/core/controlflow.py +17 -9
- numba_cuda/numba/cuda/core/cuda_errors.py +917 -0
- numba_cuda/numba/cuda/core/errors.py +4 -912
- numba_cuda/numba/cuda/core/inline_closurecall.py +82 -67
- numba_cuda/numba/cuda/core/interpreter.py +334 -160
- numba_cuda/numba/cuda/core/ir.py +191 -119
- numba_cuda/numba/cuda/core/ir_utils.py +149 -128
- numba_cuda/numba/cuda/core/postproc.py +8 -8
- numba_cuda/numba/cuda/core/pythonapi.py +3 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +6 -3
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +1 -1
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +5 -5
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +3 -3
- numba_cuda/numba/cuda/core/ssa.py +5 -5
- numba_cuda/numba/cuda/core/transforms.py +29 -16
- numba_cuda/numba/cuda/core/typed_passes.py +10 -10
- numba_cuda/numba/cuda/core/typeinfer.py +42 -27
- numba_cuda/numba/cuda/core/untyped_passes.py +82 -65
- numba_cuda/numba/cuda/cpython/unicode.py +2 -2
- numba_cuda/numba/cuda/cpython/unicode_support.py +1 -3
- numba_cuda/numba/cuda/cudadecl.py +0 -13
- numba_cuda/numba/cuda/cudadrv/devicearray.py +10 -9
- numba_cuda/numba/cuda/cudadrv/driver.py +142 -519
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +87 -32
- numba_cuda/numba/cuda/cudaimpl.py +0 -12
- numba_cuda/numba/cuda/debuginfo.py +25 -0
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +4 -7
- numba_cuda/numba/cuda/deviceufunc.py +3 -6
- numba_cuda/numba/cuda/dispatcher.py +39 -49
- numba_cuda/numba/cuda/intrinsics.py +150 -1
- numba_cuda/numba/cuda/libdeviceimpl.py +1 -2
- numba_cuda/numba/cuda/lowering.py +36 -29
- numba_cuda/numba/cuda/memory_management/nrt.py +10 -14
- numba_cuda/numba/cuda/np/arrayobj.py +61 -9
- numba_cuda/numba/cuda/np/numpy_support.py +32 -9
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +4 -3
- numba_cuda/numba/cuda/printimpl.py +20 -0
- numba_cuda/numba/cuda/serialize.py +10 -0
- numba_cuda/numba/cuda/stubs.py +0 -11
- numba_cuda/numba/cuda/testing.py +4 -8
- numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +21 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +195 -51
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +6 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +3 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +6 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +11 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +53 -23
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +61 -9
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +6 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +22 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +13 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_array_capture.py +243 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_numba_interop.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +51 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +37 -35
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +117 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_globals.py +111 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +61 -0
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +31 -0
- numba_cuda/numba/cuda/tests/support.py +11 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +1 -1
- numba_cuda/numba/cuda/typing/asnumbatype.py +37 -2
- numba_cuda/numba/cuda/typing/context.py +3 -1
- numba_cuda/numba/cuda/typing/typeof.py +51 -2
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/METADATA +4 -13
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/RECORD +106 -105
- numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_devicearray.cpp +0 -159
- numba_cuda/numba/cuda/cext/_devicearray.h +0 -29
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -41
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.21.1.dist-info → numba_cuda-0.24.0.dist-info}/top_level.txt +0 -0
|
@@ -441,7 +441,9 @@ class Lower(BaseLower):
|
|
|
441
441
|
# Ensure that the variable is not defined multiple times
|
|
442
442
|
# in the block
|
|
443
443
|
[defblk] = var_assign_map[var]
|
|
444
|
-
assign_stmts = self.blocks[defblk].find_insts(
|
|
444
|
+
assign_stmts = self.blocks[defblk].find_insts(
|
|
445
|
+
ir.assign_types
|
|
446
|
+
)
|
|
445
447
|
assigns = [
|
|
446
448
|
stmt
|
|
447
449
|
for stmt in assign_stmts
|
|
@@ -468,7 +470,7 @@ class Lower(BaseLower):
|
|
|
468
470
|
self.builder.position_at_end(bb)
|
|
469
471
|
all_names = set()
|
|
470
472
|
for block in self.blocks.values():
|
|
471
|
-
for x in block.find_insts(ir.
|
|
473
|
+
for x in block.find_insts(ir.del_types):
|
|
472
474
|
if x.value not in all_names:
|
|
473
475
|
all_names.add(x.value)
|
|
474
476
|
for name in all_names:
|
|
@@ -483,9 +485,9 @@ class Lower(BaseLower):
|
|
|
483
485
|
self.func_ir,
|
|
484
486
|
call.func,
|
|
485
487
|
)
|
|
486
|
-
if defn is not None and isinstance(defn, ir.
|
|
488
|
+
if defn is not None and isinstance(defn, ir.global_types):
|
|
487
489
|
if defn.value is eh.exception_check:
|
|
488
|
-
if isinstance(block.terminator, ir.
|
|
490
|
+
if isinstance(block.terminator, ir.branch_types):
|
|
489
491
|
targetblk = self.blkmap[block.terminator.truebr]
|
|
490
492
|
# NOTE: This hacks in an attribute for call_conv to
|
|
491
493
|
# pick up. This hack is no longer needed when
|
|
@@ -505,19 +507,19 @@ class Lower(BaseLower):
|
|
|
505
507
|
self.debuginfo.mark_location(self.builder, self.loc.line)
|
|
506
508
|
self.notify_loc(self.loc)
|
|
507
509
|
self.debug_print(str(inst))
|
|
508
|
-
if isinstance(inst, ir.
|
|
510
|
+
if isinstance(inst, ir.assign_types):
|
|
509
511
|
ty = self.typeof(inst.target.name)
|
|
510
512
|
val = self.lower_assign(ty, inst)
|
|
511
513
|
argidx = None
|
|
512
514
|
# If this is a store from an arg, like x = arg.x then tell debuginfo
|
|
513
515
|
# that this is the arg
|
|
514
|
-
if isinstance(inst.value, ir.
|
|
516
|
+
if isinstance(inst.value, ir.arg_types):
|
|
515
517
|
# NOTE: debug location is the `def <func>` line
|
|
516
518
|
self.debuginfo.mark_location(self.builder, self.defn_loc.line)
|
|
517
519
|
argidx = inst.value.index + 1 # args start at 1
|
|
518
520
|
self.storevar(val, inst.target.name, argidx=argidx)
|
|
519
521
|
|
|
520
|
-
elif isinstance(inst, ir.
|
|
522
|
+
elif isinstance(inst, ir.branch_types):
|
|
521
523
|
cond = self.loadvar(inst.cond.name)
|
|
522
524
|
tr = self.blkmap[inst.truebr]
|
|
523
525
|
fl = self.blkmap[inst.falsebr]
|
|
@@ -529,11 +531,11 @@ class Lower(BaseLower):
|
|
|
529
531
|
)
|
|
530
532
|
self.builder.cbranch(pred, tr, fl)
|
|
531
533
|
|
|
532
|
-
elif isinstance(inst, ir.
|
|
534
|
+
elif isinstance(inst, ir.jump_types):
|
|
533
535
|
target = self.blkmap[inst.target]
|
|
534
536
|
self.builder.branch(target)
|
|
535
537
|
|
|
536
|
-
elif isinstance(inst, ir.
|
|
538
|
+
elif isinstance(inst, ir.return_types):
|
|
537
539
|
if self.generator_info:
|
|
538
540
|
# StopIteration
|
|
539
541
|
self.genlower.return_from_generator(self)
|
|
@@ -551,10 +553,10 @@ class Lower(BaseLower):
|
|
|
551
553
|
retval = self.context.get_return_value(self.builder, ty, val)
|
|
552
554
|
self.call_conv.return_value(self.builder, retval)
|
|
553
555
|
|
|
554
|
-
elif isinstance(inst, ir.
|
|
556
|
+
elif isinstance(inst, ir.popblock_types):
|
|
555
557
|
pass # this is just a marker
|
|
556
558
|
|
|
557
|
-
elif isinstance(inst, ir.
|
|
559
|
+
elif isinstance(inst, ir.staticsetitem_types):
|
|
558
560
|
signature = self.fndesc.calltypes[inst]
|
|
559
561
|
assert signature is not None
|
|
560
562
|
try:
|
|
@@ -572,22 +574,22 @@ class Lower(BaseLower):
|
|
|
572
574
|
)
|
|
573
575
|
return impl(self.builder, (target, inst.index, value))
|
|
574
576
|
|
|
575
|
-
elif isinstance(inst, ir.
|
|
577
|
+
elif isinstance(inst, ir.print_types):
|
|
576
578
|
self.lower_print(inst)
|
|
577
579
|
|
|
578
|
-
elif isinstance(inst, ir.
|
|
580
|
+
elif isinstance(inst, ir.setitem_types):
|
|
579
581
|
signature = self.fndesc.calltypes[inst]
|
|
580
582
|
assert signature is not None
|
|
581
583
|
return self.lower_setitem(
|
|
582
584
|
inst.target, inst.index, inst.value, signature
|
|
583
585
|
)
|
|
584
586
|
|
|
585
|
-
elif isinstance(inst, ir.
|
|
587
|
+
elif isinstance(inst, ir.storemap_types):
|
|
586
588
|
signature = self.fndesc.calltypes[inst]
|
|
587
589
|
assert signature is not None
|
|
588
590
|
return self.lower_setitem(inst.dct, inst.key, inst.value, signature)
|
|
589
591
|
|
|
590
|
-
elif isinstance(inst, ir.
|
|
592
|
+
elif isinstance(inst, ir.delitem_types):
|
|
591
593
|
target = self.loadvar(inst.target.name)
|
|
592
594
|
index = self.loadvar(inst.index.name)
|
|
593
595
|
|
|
@@ -613,10 +615,10 @@ class Lower(BaseLower):
|
|
|
613
615
|
|
|
614
616
|
return impl(self.builder, (target, index))
|
|
615
617
|
|
|
616
|
-
elif isinstance(inst, ir.
|
|
618
|
+
elif isinstance(inst, ir.del_types):
|
|
617
619
|
self.delvar(inst.value)
|
|
618
620
|
|
|
619
|
-
elif isinstance(inst, ir.
|
|
621
|
+
elif isinstance(inst, ir.setattr_types):
|
|
620
622
|
target = self.loadvar(inst.target.name)
|
|
621
623
|
value = self.loadvar(inst.value.name)
|
|
622
624
|
signature = self.fndesc.calltypes[inst]
|
|
@@ -634,16 +636,16 @@ class Lower(BaseLower):
|
|
|
634
636
|
|
|
635
637
|
return impl(self.builder, (target, value))
|
|
636
638
|
|
|
637
|
-
elif isinstance(inst, ir.
|
|
639
|
+
elif isinstance(inst, ir.dynamicraise_types):
|
|
638
640
|
self.lower_dynamic_raise(inst)
|
|
639
641
|
|
|
640
|
-
elif isinstance(inst, ir.
|
|
642
|
+
elif isinstance(inst, ir.dynamictryraise_types):
|
|
641
643
|
self.lower_try_dynamic_raise(inst)
|
|
642
644
|
|
|
643
|
-
elif isinstance(inst, ir.
|
|
645
|
+
elif isinstance(inst, ir.staticraise_types):
|
|
644
646
|
self.lower_static_raise(inst)
|
|
645
647
|
|
|
646
|
-
elif isinstance(inst, ir.
|
|
648
|
+
elif isinstance(inst, ir.statictryraise_types):
|
|
647
649
|
self.lower_static_try_raise(inst)
|
|
648
650
|
|
|
649
651
|
else:
|
|
@@ -695,7 +697,7 @@ class Lower(BaseLower):
|
|
|
695
697
|
args = []
|
|
696
698
|
nb_types = []
|
|
697
699
|
for exc_arg in exc_args:
|
|
698
|
-
if isinstance(exc_arg, ir.
|
|
700
|
+
if isinstance(exc_arg, ir.var_types):
|
|
699
701
|
# dynamic values
|
|
700
702
|
typ = self.typeof(exc_arg.name)
|
|
701
703
|
val = self.loadvar(exc_arg.name)
|
|
@@ -727,24 +729,28 @@ class Lower(BaseLower):
|
|
|
727
729
|
def lower_assign(self, ty, inst):
|
|
728
730
|
value = inst.value
|
|
729
731
|
# In nopython mode, closure vars are frozen like globals
|
|
730
|
-
if
|
|
732
|
+
if (
|
|
733
|
+
isinstance(value, ir.const_types)
|
|
734
|
+
or isinstance(value, ir.global_types)
|
|
735
|
+
or isinstance(value, ir.freevar_types)
|
|
736
|
+
):
|
|
731
737
|
res = self.context.get_constant_generic(
|
|
732
738
|
self.builder, ty, value.value
|
|
733
739
|
)
|
|
734
740
|
self.incref(ty, res)
|
|
735
741
|
return res
|
|
736
742
|
|
|
737
|
-
elif isinstance(value, ir.
|
|
743
|
+
elif isinstance(value, ir.expr_types):
|
|
738
744
|
return self.lower_expr(ty, value)
|
|
739
745
|
|
|
740
|
-
elif isinstance(value, ir.
|
|
746
|
+
elif isinstance(value, ir.var_types):
|
|
741
747
|
val = self.loadvar(value.name)
|
|
742
748
|
oty = self.typeof(value.name)
|
|
743
749
|
res = self.context.cast(self.builder, val, oty, ty)
|
|
744
750
|
self.incref(ty, res)
|
|
745
751
|
return res
|
|
746
752
|
|
|
747
|
-
elif isinstance(value, ir.
|
|
753
|
+
elif isinstance(value, ir.arg_types):
|
|
748
754
|
# Suspend debug info else all the arg repacking ends up being
|
|
749
755
|
# associated with some line or other and it's actually just a detail
|
|
750
756
|
# of Numba's CC.
|
|
@@ -770,7 +776,7 @@ class Lower(BaseLower):
|
|
|
770
776
|
self.incref(ty, res)
|
|
771
777
|
return res
|
|
772
778
|
|
|
773
|
-
elif isinstance(value, ir.
|
|
779
|
+
elif isinstance(value, ir.yield_types):
|
|
774
780
|
res = self.lower_yield(ty, value)
|
|
775
781
|
self.incref(ty, res)
|
|
776
782
|
return res
|
|
@@ -1792,7 +1798,7 @@ class CUDALower(Lower):
|
|
|
1792
1798
|
self.dbg_val_names = set()
|
|
1793
1799
|
|
|
1794
1800
|
if self.context.enable_debuginfo and self._disable_sroa_like_opt:
|
|
1795
|
-
for x in block.find_insts(ir.
|
|
1801
|
+
for x in block.find_insts(ir.assign_types):
|
|
1796
1802
|
if x.target.name.startswith("$"):
|
|
1797
1803
|
continue
|
|
1798
1804
|
ssa_name = x.target.name
|
|
@@ -1806,6 +1812,7 @@ class CUDALower(Lower):
|
|
|
1806
1812
|
"""
|
|
1807
1813
|
super().pre_lower()
|
|
1808
1814
|
|
|
1815
|
+
# Track polymorphic variables for debug info
|
|
1809
1816
|
self.poly_var_typ_map = {}
|
|
1810
1817
|
self.poly_var_loc_map = {}
|
|
1811
1818
|
self.poly_var_set = set()
|
|
@@ -1818,7 +1825,7 @@ class CUDALower(Lower):
|
|
|
1818
1825
|
poly_map = {}
|
|
1819
1826
|
# pre-scan all blocks
|
|
1820
1827
|
for block in self.blocks.values():
|
|
1821
|
-
for x in block.find_insts(ir.
|
|
1828
|
+
for x in block.find_insts(ir.assign_types):
|
|
1822
1829
|
if x.target.name.startswith("$"):
|
|
1823
1830
|
continue
|
|
1824
1831
|
ssa_name = x.target.name
|
|
@@ -13,9 +13,10 @@ from numba.cuda import config, types
|
|
|
13
13
|
from numba.cuda.cudadrv.driver import (
|
|
14
14
|
_Linker,
|
|
15
15
|
driver,
|
|
16
|
-
|
|
16
|
+
_to_core_stream,
|
|
17
17
|
_have_nvjitlink,
|
|
18
18
|
)
|
|
19
|
+
from numba.cuda._compat import LaunchConfig, launch
|
|
19
20
|
from numba.cuda.cudadrv import devices
|
|
20
21
|
from numba.cuda.api import get_current_device
|
|
21
22
|
from numba.cuda.utils import _readenv, cached_file_read
|
|
@@ -126,7 +127,7 @@ class _Runtime:
|
|
|
126
127
|
cc = get_current_device().compute_capability
|
|
127
128
|
|
|
128
129
|
# Create a new linker instance and add the cu file
|
|
129
|
-
linker = _Linker
|
|
130
|
+
linker = _Linker(max_registers=0, cc=cc, lto=_have_nvjitlink())
|
|
130
131
|
linker.add_cu_file(memsys_mod)
|
|
131
132
|
|
|
132
133
|
# Complete the linker and create a module from it
|
|
@@ -179,20 +180,15 @@ class _Runtime:
|
|
|
179
180
|
stream = cuda.default_stream()
|
|
180
181
|
|
|
181
182
|
func = module.get_function(name)
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
1,
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
1,
|
|
188
|
-
1,
|
|
189
|
-
1,
|
|
190
|
-
0,
|
|
191
|
-
stream.handle.value,
|
|
192
|
-
params,
|
|
193
|
-
cooperative=False,
|
|
183
|
+
config = LaunchConfig(
|
|
184
|
+
grid=(1, 1, 1),
|
|
185
|
+
block=(1, 1, 1),
|
|
186
|
+
shmem_size=0,
|
|
187
|
+
cooperative_launch=False,
|
|
194
188
|
)
|
|
195
189
|
|
|
190
|
+
launch(_to_core_stream(stream), config, func.kernel, *params)
|
|
191
|
+
|
|
196
192
|
def ensure_initialized(self, stream=None):
|
|
197
193
|
"""
|
|
198
194
|
If memsys is not initialized, initialize memsys
|
|
@@ -31,6 +31,7 @@ from numba.cuda.np.numpy_support import (
|
|
|
31
31
|
type_is_scalar,
|
|
32
32
|
lt_complex,
|
|
33
33
|
lt_floats,
|
|
34
|
+
strides_from_shape,
|
|
34
35
|
)
|
|
35
36
|
from numba.cuda.np.numpy_support import (
|
|
36
37
|
type_can_asarray,
|
|
@@ -1797,10 +1798,10 @@ def numpy_broadcast_arrays(*args):
|
|
|
1797
1798
|
tup = tuple_setitem(tup, i, shape[i])
|
|
1798
1799
|
|
|
1799
1800
|
# numpy checks if the input arrays have the same shape as `shape`
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1801
|
+
return [
|
|
1802
|
+
np.broadcast_to(np.asarray(array), tup)
|
|
1803
|
+
for array in literal_unroll(args)
|
|
1804
|
+
]
|
|
1804
1805
|
|
|
1805
1806
|
return impl
|
|
1806
1807
|
|
|
@@ -3642,10 +3643,63 @@ def record_static_setitem_int(context, builder, sig, args):
|
|
|
3642
3643
|
def constant_array(context, builder, ty, pyval):
|
|
3643
3644
|
"""
|
|
3644
3645
|
Create a constant array (mechanism is target-dependent).
|
|
3646
|
+
|
|
3647
|
+
For objects implementing __cuda_array_interface__,
|
|
3648
|
+
the device pointer is embedded directly as a constant. For other arrays,
|
|
3649
|
+
the target-specific mechanism is used.
|
|
3645
3650
|
"""
|
|
3651
|
+
# Check if this is a device array (implements __cuda_array_interface__)
|
|
3652
|
+
if getattr(pyval, "__cuda_array_interface__", None) is not None:
|
|
3653
|
+
return _lower_constant_device_array(context, builder, ty, pyval)
|
|
3654
|
+
|
|
3646
3655
|
return context.make_constant_array(builder, ty, pyval)
|
|
3647
3656
|
|
|
3648
3657
|
|
|
3658
|
+
def _lower_constant_device_array(context, builder, ty, pyval):
|
|
3659
|
+
"""
|
|
3660
|
+
Lower objects with __cuda_array_interface__ by embedding the device
|
|
3661
|
+
pointer as a constant.
|
|
3662
|
+
|
|
3663
|
+
This allows device arrays captured from globals to be used in CUDA
|
|
3664
|
+
kernels and device functions.
|
|
3665
|
+
"""
|
|
3666
|
+
interface = pyval.__cuda_array_interface__
|
|
3667
|
+
|
|
3668
|
+
# Hold on to the device array to prevent garbage collection.
|
|
3669
|
+
context.active_code_library.referenced_objects[id(pyval)] = pyval
|
|
3670
|
+
|
|
3671
|
+
shape = interface["shape"]
|
|
3672
|
+
strides = interface.get("strides")
|
|
3673
|
+
data_ptr = interface["data"][0]
|
|
3674
|
+
typestr = interface["typestr"]
|
|
3675
|
+
itemsize = np.dtype(typestr).itemsize
|
|
3676
|
+
|
|
3677
|
+
# Calculate strides if not provided (C-contiguous)
|
|
3678
|
+
if strides is None:
|
|
3679
|
+
strides = strides_from_shape(shape, itemsize, order="C")
|
|
3680
|
+
|
|
3681
|
+
# Embed device pointer as constant
|
|
3682
|
+
llvoidptr = context.get_value_type(types.voidptr)
|
|
3683
|
+
data = context.get_constant(types.uintp, data_ptr).inttoptr(llvoidptr)
|
|
3684
|
+
|
|
3685
|
+
# Build array structure
|
|
3686
|
+
ary = context.make_array(ty)(context, builder)
|
|
3687
|
+
kshape = [context.get_constant(types.intp, s) for s in shape]
|
|
3688
|
+
kstrides = [context.get_constant(types.intp, s) for s in strides]
|
|
3689
|
+
|
|
3690
|
+
context.populate_array(
|
|
3691
|
+
ary,
|
|
3692
|
+
data=builder.bitcast(data, ary.data.type),
|
|
3693
|
+
shape=kshape,
|
|
3694
|
+
strides=kstrides,
|
|
3695
|
+
itemsize=context.get_constant(types.intp, itemsize),
|
|
3696
|
+
parent=None,
|
|
3697
|
+
meminfo=None,
|
|
3698
|
+
)
|
|
3699
|
+
|
|
3700
|
+
return ary._getvalue()
|
|
3701
|
+
|
|
3702
|
+
|
|
3649
3703
|
@lower_constant(types.Record)
|
|
3650
3704
|
def constant_record(context, builder, ty, pyval):
|
|
3651
3705
|
"""
|
|
@@ -4768,13 +4822,11 @@ def _parse_shape(context, builder, ty, val):
|
|
|
4768
4822
|
ndim = ty.count
|
|
4769
4823
|
passed_shapes = cgutils.unpack_tuple(builder, val, count=ndim)
|
|
4770
4824
|
|
|
4771
|
-
shapes = []
|
|
4772
|
-
for s in passed_shapes:
|
|
4773
|
-
shapes.append(safecast_intp(context, builder, s.type, s))
|
|
4825
|
+
shapes = [safecast_intp(context, builder, s.type, s) for s in passed_shapes]
|
|
4774
4826
|
|
|
4775
4827
|
zero = context.get_constant_generic(builder, types.intp, 0)
|
|
4776
|
-
for
|
|
4777
|
-
is_neg = builder.icmp_signed("<",
|
|
4828
|
+
for shape in shapes:
|
|
4829
|
+
is_neg = builder.icmp_signed("<", shape, zero)
|
|
4778
4830
|
with cgutils.if_unlikely(builder, is_neg):
|
|
4779
4831
|
context.call_conv.return_user_exc(
|
|
4780
4832
|
builder, ValueError, ("negative dimensions not allowed",)
|
|
@@ -3,7 +3,11 @@
|
|
|
3
3
|
|
|
4
4
|
import collections
|
|
5
5
|
import ctypes
|
|
6
|
+
import itertools
|
|
7
|
+
import functools
|
|
8
|
+
import operator
|
|
6
9
|
import re
|
|
10
|
+
|
|
7
11
|
import numpy as np
|
|
8
12
|
|
|
9
13
|
from numba.cuda import types
|
|
@@ -17,6 +21,30 @@ from numba.cuda.cgutils import is_nonelike # noqa: F401
|
|
|
17
21
|
|
|
18
22
|
numpy_version = tuple(map(int, np.__version__.split(".")[:2]))
|
|
19
23
|
|
|
24
|
+
|
|
25
|
+
@functools.lru_cache
|
|
26
|
+
def strides_from_shape(
|
|
27
|
+
shape: tuple[int, ...], itemsize: int, *, order: str
|
|
28
|
+
) -> tuple[int, ...]:
|
|
29
|
+
"""Compute strides for a contiguous array with given shape and order."""
|
|
30
|
+
if not shape:
|
|
31
|
+
# 0-D arrays have empty strides
|
|
32
|
+
return ()
|
|
33
|
+
limits = slice(1, None) if order == "C" else slice(None, -1)
|
|
34
|
+
transform = reversed if order == "C" else lambda x: x
|
|
35
|
+
strides = tuple(
|
|
36
|
+
map(
|
|
37
|
+
itemsize.__mul__,
|
|
38
|
+
itertools.accumulate(
|
|
39
|
+
transform(shape[limits]), operator.mul, initial=1
|
|
40
|
+
),
|
|
41
|
+
)
|
|
42
|
+
)
|
|
43
|
+
if order == "F":
|
|
44
|
+
return strides
|
|
45
|
+
return strides[::-1]
|
|
46
|
+
|
|
47
|
+
|
|
20
48
|
FROM_DTYPE = {
|
|
21
49
|
np.dtype("bool"): types.boolean,
|
|
22
50
|
np.dtype("int8"): types.int8,
|
|
@@ -92,16 +120,11 @@ def from_dtype(dtype):
|
|
|
92
120
|
elif getattr(dtype, "fields", None) is not None:
|
|
93
121
|
return from_struct_dtype(dtype)
|
|
94
122
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
pass
|
|
123
|
+
result = FROM_DTYPE.get(dtype)
|
|
124
|
+
if result is not None:
|
|
125
|
+
return result
|
|
99
126
|
|
|
100
|
-
|
|
101
|
-
char = dtype.char
|
|
102
|
-
except AttributeError:
|
|
103
|
-
pass
|
|
104
|
-
else:
|
|
127
|
+
if (char := getattr(dtype, "char", None)) is not None:
|
|
105
128
|
if char in "SU":
|
|
106
129
|
return _from_str_dtype(dtype)
|
|
107
130
|
if char in "mM":
|
|
@@ -122,9 +122,10 @@ def polyutils_as_series(alist, trim=True):
|
|
|
122
122
|
|
|
123
123
|
def impl(alist, trim=True):
|
|
124
124
|
if tuple_input:
|
|
125
|
-
arrays = [
|
|
126
|
-
|
|
127
|
-
|
|
125
|
+
arrays = [
|
|
126
|
+
np.atleast_1d(np.asarray(item)).astype(res_dtype)
|
|
127
|
+
for item in literal_unroll(alist)
|
|
128
|
+
]
|
|
128
129
|
|
|
129
130
|
elif list_input:
|
|
130
131
|
arrays = [
|
|
@@ -32,6 +32,26 @@ def print_item(ty, context, builder, val):
|
|
|
32
32
|
)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
@print_item.register(types.Tuple)
|
|
36
|
+
@print_item.register(types.UniTuple)
|
|
37
|
+
def tuple_print_impl(ty, context, builder, val):
|
|
38
|
+
formats = []
|
|
39
|
+
values = []
|
|
40
|
+
|
|
41
|
+
for i, argtyp in enumerate(ty.types):
|
|
42
|
+
argval = builder.extract_value(val, i)
|
|
43
|
+
argfmt, argvals = print_item(argtyp, context, builder, argval)
|
|
44
|
+
formats.append(argfmt)
|
|
45
|
+
values.extend(argvals)
|
|
46
|
+
|
|
47
|
+
if len(formats) == 1:
|
|
48
|
+
base = "({},)"
|
|
49
|
+
else:
|
|
50
|
+
base = "({})"
|
|
51
|
+
rawfmt = base.format(", ".join(formats))
|
|
52
|
+
return rawfmt, values
|
|
53
|
+
|
|
54
|
+
|
|
35
55
|
@print_item.register(types.Integer)
|
|
36
56
|
@print_item.register(types.IntegerLiteral)
|
|
37
57
|
def int_print_impl(ty, context, builder, val):
|
|
@@ -197,6 +197,16 @@ class NumbaPickler(cloudpickle.CloudPickler):
|
|
|
197
197
|
# Overridden to disable pickling of certain types
|
|
198
198
|
if type(obj) in self.disabled_types:
|
|
199
199
|
_no_pickle(obj) # noreturn
|
|
200
|
+
|
|
201
|
+
# Prevent pickling of objects implementing __cuda_array_interface__
|
|
202
|
+
# These contain device pointers that would become stale after unpickling
|
|
203
|
+
if getattr(obj, "__cuda_array_interface__", None) is not None:
|
|
204
|
+
raise pickle.PicklingError(
|
|
205
|
+
"Cannot serialize kernels or device functions referencing "
|
|
206
|
+
"global device arrays. Pass the array(s) as arguments "
|
|
207
|
+
"to the kernel instead."
|
|
208
|
+
)
|
|
209
|
+
|
|
200
210
|
return super().reducer_override(obj)
|
|
201
211
|
|
|
202
212
|
|
numba_cuda/numba/cuda/stubs.py
CHANGED
|
@@ -200,17 +200,6 @@ class syncwarp(Stub):
|
|
|
200
200
|
_description_ = "<warp_sync()>"
|
|
201
201
|
|
|
202
202
|
|
|
203
|
-
class vote_sync_intrinsic(Stub):
|
|
204
|
-
"""
|
|
205
|
-
vote_sync_intrinsic(mask, mode, predictate)
|
|
206
|
-
|
|
207
|
-
Nvvm intrinsic for performing a reduce and broadcast across a warp
|
|
208
|
-
docs.nvidia.com/cuda/nvvm-ir-spec/index.html#nvvm-intrin-warp-level-vote
|
|
209
|
-
"""
|
|
210
|
-
|
|
211
|
-
_description_ = "<vote_sync()>"
|
|
212
|
-
|
|
213
|
-
|
|
214
203
|
class match_any_sync(Stub):
|
|
215
204
|
"""
|
|
216
205
|
match_any_sync(mask, value)
|
numba_cuda/numba/cuda/testing.py
CHANGED
|
@@ -276,14 +276,6 @@ def skip_if_curand_kernel_missing(fn):
|
|
|
276
276
|
return unittest.skipUnless(curand_kernel_h_file, reason)(fn)
|
|
277
277
|
|
|
278
278
|
|
|
279
|
-
def skip_if_mvc_enabled(reason):
|
|
280
|
-
"""Skip a test if Minor Version Compatibility is enabled"""
|
|
281
|
-
assert isinstance(reason, str)
|
|
282
|
-
return unittest.skipIf(
|
|
283
|
-
config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY, reason
|
|
284
|
-
)
|
|
285
|
-
|
|
286
|
-
|
|
287
279
|
def cc_X_or_above(major, minor):
|
|
288
280
|
if not config.ENABLE_CUDASIM:
|
|
289
281
|
cc = devices.get_context().device.compute_capability
|
|
@@ -308,6 +300,10 @@ def skip_unless_cc_75(fn):
|
|
|
308
300
|
return unittest.skipUnless(cc_X_or_above(7, 5), "requires cc >= 7.5")(fn)
|
|
309
301
|
|
|
310
302
|
|
|
303
|
+
def skip_unless_cc_90(fn):
|
|
304
|
+
return unittest.skipUnless(cc_X_or_above(9, 0), "requires cc >= 9.0")(fn)
|
|
305
|
+
|
|
306
|
+
|
|
311
307
|
def xfail_unless_cudasim(fn):
|
|
312
308
|
if config.ENABLE_CUDASIM:
|
|
313
309
|
return fn
|
|
@@ -36,8 +36,13 @@ pytestmark = pytest.mark.skipif(
|
|
|
36
36
|
),
|
|
37
37
|
],
|
|
38
38
|
)
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
@pytest.mark.parametrize(
|
|
40
|
+
"jit",
|
|
41
|
+
[cuda.jit, cuda.jit("void(float32[::1])")],
|
|
42
|
+
ids=["dispatch", "signature"],
|
|
43
|
+
)
|
|
44
|
+
def test_one_arg(benchmark, array_func, jit):
|
|
45
|
+
@jit
|
|
41
46
|
def one_arg(arr1):
|
|
42
47
|
return
|
|
43
48
|
|
|
@@ -78,10 +83,22 @@ def test_one_arg(benchmark, array_func):
|
|
|
78
83
|
),
|
|
79
84
|
],
|
|
80
85
|
)
|
|
81
|
-
|
|
86
|
+
@pytest.mark.parametrize(
|
|
87
|
+
"jit",
|
|
88
|
+
[
|
|
89
|
+
cuda.jit,
|
|
90
|
+
cuda.jit(
|
|
91
|
+
"void({})".format(
|
|
92
|
+
", ".join(["float32[::1]"] * len(string.ascii_lowercase))
|
|
93
|
+
)
|
|
94
|
+
),
|
|
95
|
+
],
|
|
96
|
+
ids=["dispatch", "signature"],
|
|
97
|
+
)
|
|
98
|
+
def test_many_args(benchmark, array_func, jit):
|
|
82
99
|
many_arrs = array_func()
|
|
83
100
|
|
|
84
|
-
@
|
|
101
|
+
@jit
|
|
85
102
|
def many_args(
|
|
86
103
|
a,
|
|
87
104
|
b,
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
3
|
|
|
4
4
|
import numbers
|
|
5
|
-
import weakref
|
|
6
5
|
|
|
7
6
|
from numba import cuda
|
|
8
7
|
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
@@ -90,8 +89,8 @@ class Test3rdPartyContext(CUDATestCase):
|
|
|
90
89
|
dev = driver.binding.CUdevice(0)
|
|
91
90
|
binding_hctx = the_driver.cuDevicePrimaryCtxRetain(dev)
|
|
92
91
|
hctx = driver.drvapi.cu_context(int(binding_hctx))
|
|
92
|
+
ctx = driver.Context(dev, hctx)
|
|
93
93
|
try:
|
|
94
|
-
ctx = driver.Context(weakref.proxy(self), hctx)
|
|
95
94
|
ctx.push()
|
|
96
95
|
# Check that the context from numba matches the created primary
|
|
97
96
|
# context.
|