numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +1 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
- numba_cuda/numba/cuda/api.py +6 -1
- numba_cuda/numba/cuda/bf16.py +285 -2
- numba_cuda/numba/cuda/cgutils.py +2 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +1 -1
- numba_cuda/numba/cuda/compiler.py +373 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +2 -2
- numba_cuda/numba/cuda/core/compiler.py +6 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +48 -26
- numba_cuda/numba/cuda/core/ir_utils.py +15 -26
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +738 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +422 -246
- numba_cuda/numba/cuda/cudadecl.py +1 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
- numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
- numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
- numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +5 -1
- numba_cuda/numba/cuda/debuginfo.py +85 -2
- numba_cuda/numba/cuda/decorators.py +3 -3
- numba_cuda/numba/cuda/descriptor.py +3 -4
- numba_cuda/numba/cuda/deviceufunc.py +66 -2
- numba_cuda/numba/cuda/dispatcher.py +18 -39
- numba_cuda/numba/cuda/flags.py +141 -1
- numba_cuda/numba/cuda/fp16.py +0 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/lowering.py +7 -144
- numba_cuda/numba/cuda/mathimpl.py +2 -1
- numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +9 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +1 -1
- numba_cuda/numba/cuda/printimpl.py +12 -1
- numba_cuda/numba/cuda/random.py +1 -1
- numba_cuda/numba/cuda/serialize.py +1 -1
- numba_cuda/numba/cuda/simulator/__init__.py +1 -1
- numba_cuda/numba/cuda/simulator/api.py +1 -1
- numba_cuda/numba/cuda/simulator/compiler.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
- numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
- numba_cuda/numba/cuda/target.py +35 -17
- numba_cuda/numba/cuda/testing.py +7 -19
- numba_cuda/numba/cuda/tests/__init__.py +1 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
- numba_cuda/numba/cuda/tests/support.py +55 -15
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +56 -0
- numba_cuda/numba/cuda/typing/__init__.py +9 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +7 -6
- numba_cuda/numba/cuda/ufuncs.py +3 -3
- numba_cuda/numba/cuda/utils.py +6 -112
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
|
@@ -629,7 +629,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
629
629
|
arg1 = np.array([3.0], dtype=np.float16)
|
|
630
630
|
arg2 = np.array([4.0], dtype=np.float16)
|
|
631
631
|
compiled[1, 1](ary, arg1, arg2)
|
|
632
|
-
np.testing.assert_allclose(ary[0], arg1 + arg2)
|
|
632
|
+
np.testing.assert_allclose(ary[0], arg1 + arg2, rtol=self.FLOAT16_RTOL)
|
|
633
633
|
|
|
634
634
|
@skip_unless_cc_53
|
|
635
635
|
def test_hadd_scalar(self):
|
|
@@ -639,7 +639,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
639
639
|
arg2 = np.float16(3.0)
|
|
640
640
|
compiled[1, 1](ary, arg1, arg2)
|
|
641
641
|
ref = arg1 + arg2
|
|
642
|
-
np.testing.assert_allclose(ary[0], ref)
|
|
642
|
+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
|
|
643
643
|
|
|
644
644
|
@skip_on_cudasim("Compilation unsupported in the simulator")
|
|
645
645
|
@skip_if_nvjitlink_missing("Numbast generated bindings")
|
|
@@ -657,7 +657,9 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
657
657
|
arg2 = np.array([3.0], dtype=np.float16)
|
|
658
658
|
arg3 = np.array([4.0], dtype=np.float16)
|
|
659
659
|
compiled[1, 1](ary, arg1, arg2, arg3)
|
|
660
|
-
np.testing.assert_allclose(
|
|
660
|
+
np.testing.assert_allclose(
|
|
661
|
+
ary[0], arg1 * arg2 + arg3, rtol=self.FLOAT16_RTOL
|
|
662
|
+
)
|
|
661
663
|
|
|
662
664
|
@skip_unless_cc_53
|
|
663
665
|
def test_hfma_scalar(self):
|
|
@@ -668,7 +670,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
668
670
|
arg3 = np.float16(4.0)
|
|
669
671
|
compiled[1, 1](ary, arg1, arg2, arg3)
|
|
670
672
|
ref = arg1 * arg2 + arg3
|
|
671
|
-
np.testing.assert_allclose(ary[0], ref)
|
|
673
|
+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
|
|
672
674
|
|
|
673
675
|
@skip_on_cudasim("Compilation unsupported in the simulator")
|
|
674
676
|
@skip_if_nvjitlink_missing("Numbast generated bindings")
|
|
@@ -687,7 +689,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
687
689
|
arg1 = np.array([3.0], dtype=np.float16)
|
|
688
690
|
arg2 = np.array([4.0], dtype=np.float16)
|
|
689
691
|
compiled[1, 1](ary, arg1, arg2)
|
|
690
|
-
np.testing.assert_allclose(ary[0], arg1 - arg2)
|
|
692
|
+
np.testing.assert_allclose(ary[0], arg1 - arg2, rtol=self.FLOAT16_RTOL)
|
|
691
693
|
|
|
692
694
|
@skip_unless_cc_53
|
|
693
695
|
def test_hsub_scalar(self):
|
|
@@ -697,7 +699,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
697
699
|
arg2 = np.float16(1.57)
|
|
698
700
|
compiled[1, 1](ary, arg1, arg2)
|
|
699
701
|
ref = arg1 - arg2
|
|
700
|
-
np.testing.assert_allclose(ary[0], ref)
|
|
702
|
+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
|
|
701
703
|
|
|
702
704
|
@skip_on_cudasim("Compilation unsupported in the simulator")
|
|
703
705
|
@skip_if_nvjitlink_missing("Numbast generated bindings")
|
|
@@ -714,7 +716,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
714
716
|
arg1 = np.array([3.0], dtype=np.float16)
|
|
715
717
|
arg2 = np.array([4.0], dtype=np.float16)
|
|
716
718
|
compiled[1, 1](ary, arg1, arg2)
|
|
717
|
-
np.testing.assert_allclose(ary[0], arg1 * arg2)
|
|
719
|
+
np.testing.assert_allclose(ary[0], arg1 * arg2, rtol=self.FLOAT16_RTOL)
|
|
718
720
|
|
|
719
721
|
@skip_unless_cc_53
|
|
720
722
|
def test_hmul_scalar(self):
|
|
@@ -724,7 +726,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
724
726
|
arg2 = np.float16(1.57)
|
|
725
727
|
compiled[1, 1](ary, arg1, arg2)
|
|
726
728
|
ref = arg1 * arg2
|
|
727
|
-
np.testing.assert_allclose(ary[0], ref)
|
|
729
|
+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
|
|
728
730
|
|
|
729
731
|
@skip_on_cudasim("Compilation unsupported in the simulator")
|
|
730
732
|
@skip_if_nvjitlink_missing("Numbast generated bindings")
|
|
@@ -743,7 +745,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
743
745
|
|
|
744
746
|
compiled[1, 1](ary, arg1, arg2)
|
|
745
747
|
ref = arg1 / arg2
|
|
746
|
-
np.testing.assert_allclose(ary[0], ref)
|
|
748
|
+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
|
|
747
749
|
|
|
748
750
|
@skip_unless_cc_53
|
|
749
751
|
def test_hdiv(self):
|
|
@@ -754,7 +756,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
754
756
|
|
|
755
757
|
compiled.forall(ary.size)(ary, arry1, arry2)
|
|
756
758
|
ref = arry1 / arry2
|
|
757
|
-
np.testing.assert_allclose(ary, ref)
|
|
759
|
+
np.testing.assert_allclose(ary, ref, rtol=self.FLOAT16_RTOL)
|
|
758
760
|
|
|
759
761
|
@skip_unless_cc_53
|
|
760
762
|
def test_hneg(self):
|
|
@@ -762,7 +764,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
762
764
|
ary = np.zeros(1, dtype=np.float16)
|
|
763
765
|
arg1 = np.array([3.0], dtype=np.float16)
|
|
764
766
|
compiled[1, 1](ary, arg1)
|
|
765
|
-
np.testing.assert_allclose(ary[0], -arg1)
|
|
767
|
+
np.testing.assert_allclose(ary[0], -arg1, rtol=self.FLOAT16_RTOL)
|
|
766
768
|
|
|
767
769
|
@skip_unless_cc_53
|
|
768
770
|
def test_hneg_scalar(self):
|
|
@@ -771,7 +773,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
771
773
|
arg1 = np.float16(3.1415926)
|
|
772
774
|
compiled[1, 1](ary, arg1)
|
|
773
775
|
ref = -arg1
|
|
774
|
-
np.testing.assert_allclose(ary[0], ref)
|
|
776
|
+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
|
|
775
777
|
|
|
776
778
|
@skip_on_cudasim("Compilation unsupported in the simulator")
|
|
777
779
|
@skip_if_nvjitlink_missing("Numbast generated bindings")
|
|
@@ -787,7 +789,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
787
789
|
ary = np.zeros(1, dtype=np.float16)
|
|
788
790
|
arg1 = np.array([-3.0], dtype=np.float16)
|
|
789
791
|
compiled[1, 1](ary, arg1)
|
|
790
|
-
np.testing.assert_allclose(ary[0], abs(arg1))
|
|
792
|
+
np.testing.assert_allclose(ary[0], abs(arg1), rtol=self.FLOAT16_RTOL)
|
|
791
793
|
|
|
792
794
|
@skip_unless_cc_53
|
|
793
795
|
def test_habs_scalar(self):
|
|
@@ -796,7 +798,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
796
798
|
arg1 = np.float16(-3.1415926)
|
|
797
799
|
compiled[1, 1](ary, arg1)
|
|
798
800
|
ref = abs(arg1)
|
|
799
|
-
np.testing.assert_allclose(ary[0], ref)
|
|
801
|
+
np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
|
|
800
802
|
|
|
801
803
|
@skip_on_cudasim("Compilation unsupported in the simulator")
|
|
802
804
|
@skip_if_nvjitlink_missing("Numbast generated bindings")
|
|
@@ -849,7 +851,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
849
851
|
kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
|
|
850
852
|
kernel[1, N](r, x)
|
|
851
853
|
expected = fn(x, dtype=np.float16)
|
|
852
|
-
np.testing.assert_allclose(r, expected)
|
|
854
|
+
np.testing.assert_allclose(r, expected, rtol=self.FLOAT16_RTOL)
|
|
853
855
|
|
|
854
856
|
x2 = np.random.randint(1, 10, size=N).astype(np.float16)
|
|
855
857
|
for kernel, fn in zip(exp_kernels, expected_exp_functions):
|
|
@@ -857,7 +859,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
857
859
|
kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
|
|
858
860
|
kernel[1, N](r, x2)
|
|
859
861
|
expected = fn(x2, dtype=np.float16)
|
|
860
|
-
np.testing.assert_allclose(r, expected)
|
|
862
|
+
np.testing.assert_allclose(r, expected, rtol=self.FLOAT16_RTOL)
|
|
861
863
|
|
|
862
864
|
@skip_unless_cc_53
|
|
863
865
|
def test_hexp10(self):
|
|
@@ -876,7 +878,7 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
876
878
|
|
|
877
879
|
# Run the kernel
|
|
878
880
|
hexp10_vectors[1, N](r, x)
|
|
879
|
-
np.testing.assert_allclose(r, 10**x)
|
|
881
|
+
np.testing.assert_allclose(r, 10**x, rtol=self.FLOAT16_RTOL)
|
|
880
882
|
|
|
881
883
|
@skip_unless_cc_53
|
|
882
884
|
def test_fp16_comparison(self):
|
|
@@ -948,10 +950,10 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
948
950
|
arg1 = np.float16(3.0)
|
|
949
951
|
arg2 = np.float16(4.0)
|
|
950
952
|
compiled[1, 1](ary, arg1, arg2)
|
|
951
|
-
np.testing.assert_allclose(ary[0], arg2)
|
|
953
|
+
np.testing.assert_allclose(ary[0], arg2, rtol=self.FLOAT16_RTOL)
|
|
952
954
|
arg1 = np.float16(5.0)
|
|
953
955
|
compiled[1, 1](ary, arg1, arg2)
|
|
954
|
-
np.testing.assert_allclose(ary[0], arg1)
|
|
956
|
+
np.testing.assert_allclose(ary[0], arg1, rtol=self.FLOAT16_RTOL)
|
|
955
957
|
|
|
956
958
|
@skip_unless_cc_53
|
|
957
959
|
def test_hmin(self):
|
|
@@ -960,10 +962,10 @@ class TestCudaIntrinsic(CUDATestCase):
|
|
|
960
962
|
arg1 = np.float16(3.0)
|
|
961
963
|
arg2 = np.float16(4.0)
|
|
962
964
|
compiled[1, 1](ary, arg1, arg2)
|
|
963
|
-
np.testing.assert_allclose(ary[0], arg1)
|
|
965
|
+
np.testing.assert_allclose(ary[0], arg1, rtol=self.FLOAT16_RTOL)
|
|
964
966
|
arg1 = np.float16(5.0)
|
|
965
967
|
compiled[1, 1](ary, arg1, arg2)
|
|
966
|
-
np.testing.assert_allclose(ary[0], arg2)
|
|
968
|
+
np.testing.assert_allclose(ary[0], arg2, rtol=self.FLOAT16_RTOL)
|
|
967
969
|
|
|
968
970
|
def test_cbrt_f32(self):
|
|
969
971
|
compiled = cuda.jit("void(float32[:], float32)")(simple_cbrt)
|
|
@@ -2,63 +2,34 @@
|
|
|
2
2
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
3
|
|
|
4
4
|
from numba.cuda.testing import CUDATestCase
|
|
5
|
-
import
|
|
6
|
-
from numba.
|
|
7
|
-
from numba.core.
|
|
8
|
-
from numba.core.compiler_machinery import PassManager
|
|
5
|
+
from numba.cuda.core.compiler import CompilerBase
|
|
6
|
+
from numba.cuda.flags import Flags
|
|
7
|
+
from numba.cuda.core.compiler_machinery import PassManager
|
|
9
8
|
from numba.cuda.core import ir_utils
|
|
10
|
-
from numba.core import types, ir, bytecode,
|
|
11
|
-
from numba.
|
|
9
|
+
from numba.core import types, ir, bytecode, registry
|
|
10
|
+
from numba.cuda import compiler
|
|
11
|
+
from numba.cuda.core.untyped_passes import (
|
|
12
12
|
ExtractByteCode,
|
|
13
13
|
TranslateByteCode,
|
|
14
14
|
FixupArgs,
|
|
15
15
|
IRProcessing,
|
|
16
16
|
)
|
|
17
|
-
from numba.
|
|
18
|
-
from numba.core.typed_passes import (
|
|
17
|
+
from numba.cuda.core.typed_passes import (
|
|
19
18
|
NopythonTypeInference,
|
|
20
|
-
type_inference_stage,
|
|
21
19
|
DeadCodeElimination,
|
|
22
20
|
)
|
|
21
|
+
from numba.cuda.testing import skip_on_cudasim
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
# global constant for testing find_const
|
|
26
25
|
GLOBAL_B = 11
|
|
27
26
|
|
|
28
27
|
|
|
29
|
-
@jitclass([("val", numba.core.types.List(numba.intp))])
|
|
30
|
-
class Dummy(object):
|
|
31
|
-
def __init__(self, val):
|
|
32
|
-
self.val = val
|
|
33
|
-
|
|
34
|
-
|
|
35
28
|
class TestIrUtils(CUDATestCase):
|
|
36
29
|
"""
|
|
37
30
|
Tests ir handling utility functions like find_callname.
|
|
38
31
|
"""
|
|
39
32
|
|
|
40
|
-
def test_obj_func_match(self):
|
|
41
|
-
"""Test matching of an object method (other than Array see #3449)"""
|
|
42
|
-
|
|
43
|
-
def test_func():
|
|
44
|
-
d = Dummy([1])
|
|
45
|
-
d.val.append(2)
|
|
46
|
-
|
|
47
|
-
test_ir = compiler.run_frontend(test_func)
|
|
48
|
-
typingctx = cpu_target.typing_context
|
|
49
|
-
targetctx = cpu_target.target_context
|
|
50
|
-
typing_res = type_inference_stage(
|
|
51
|
-
typingctx, targetctx, test_ir, (), None
|
|
52
|
-
)
|
|
53
|
-
matched_call = ir_utils.find_callname(
|
|
54
|
-
test_ir, test_ir.blocks[0].body[7].value, typing_res.typemap
|
|
55
|
-
)
|
|
56
|
-
self.assertTrue(
|
|
57
|
-
isinstance(matched_call, tuple)
|
|
58
|
-
and len(matched_call) == 2
|
|
59
|
-
and matched_call[0] == "append"
|
|
60
|
-
)
|
|
61
|
-
|
|
62
33
|
def test_dead_code_elimination(self):
|
|
63
34
|
class Tester(CompilerBase):
|
|
64
35
|
@classmethod
|
|
@@ -177,6 +148,7 @@ class TestIrUtils(CUDATestCase):
|
|
|
177
148
|
len(no_dce.blocks[0].body) - len(removed), len(w_dce.blocks[0].body)
|
|
178
149
|
)
|
|
179
150
|
|
|
151
|
+
@skip_on_cudasim("Skipping ir utils tests on CUDA simulator")
|
|
180
152
|
def test_find_const_global(self):
|
|
181
153
|
"""
|
|
182
154
|
Test find_const() for values in globals (ir.Global) and freevars
|
|
@@ -203,6 +175,7 @@ class TestIrUtils(CUDATestCase):
|
|
|
203
175
|
self.assertEqual(const_b, GLOBAL_B)
|
|
204
176
|
self.assertEqual(const_c, FREEVAR_C)
|
|
205
177
|
|
|
178
|
+
@skip_on_cudasim("Skipping ir utils tests on CUDA simulator")
|
|
206
179
|
def test_flatten_labels(self):
|
|
207
180
|
"""tests flatten_labels"""
|
|
208
181
|
|
|
@@ -5,7 +5,7 @@ import numpy as np
|
|
|
5
5
|
|
|
6
6
|
from numba import cuda, float32, void
|
|
7
7
|
from numba.cuda.testing import unittest, CUDATestCase
|
|
8
|
-
from numba.core import config
|
|
8
|
+
from numba.cuda.core import config
|
|
9
9
|
|
|
10
10
|
# Ensure the test takes a reasonable amount of time in the simulator
|
|
11
11
|
if config.ENABLE_CUDASIM:
|
|
@@ -15,7 +15,7 @@ from numba.core.types import f2, b1
|
|
|
15
15
|
from numba.cuda.typing import signature
|
|
16
16
|
import operator
|
|
17
17
|
import itertools
|
|
18
|
-
from numba.np.numpy_support import from_dtype
|
|
18
|
+
from numba.cuda.np.numpy_support import from_dtype
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def simple_fp16_div_scalar(ary, a, b):
|
|
@@ -102,6 +102,21 @@ print_too_many[1, 1](np.arange(33))
|
|
|
102
102
|
cuda.synchronize()
|
|
103
103
|
"""
|
|
104
104
|
|
|
105
|
+
print_bfloat16_usecase = """\
|
|
106
|
+
from numba import cuda
|
|
107
|
+
from numba.cuda import config
|
|
108
|
+
|
|
109
|
+
@cuda.jit
|
|
110
|
+
def print_bfloat16():
|
|
111
|
+
# 0.9375 is a dyadic rational, it's integer significand can expand within 7 digits.
|
|
112
|
+
# printing this should not give any rounding error.
|
|
113
|
+
a = cuda.types.bfloat16(0.9375)
|
|
114
|
+
print(a, a, a)
|
|
115
|
+
|
|
116
|
+
print_bfloat16[1, 1]()
|
|
117
|
+
cuda.synchronize()
|
|
118
|
+
"""
|
|
119
|
+
|
|
105
120
|
|
|
106
121
|
class TestPrint(CUDATestCase):
|
|
107
122
|
# Note that in these tests we generally strip the output to avoid dealing
|
|
@@ -148,6 +163,11 @@ class TestPrint(CUDATestCase):
|
|
|
148
163
|
expected = [str(i) for i in np.ndindex(2, 2, 2)]
|
|
149
164
|
self.assertEqual(sorted(lines), expected)
|
|
150
165
|
|
|
166
|
+
@skip_on_cudasim("bfloat16 on host is not yet supported.")
|
|
167
|
+
def test_bfloat16(self):
|
|
168
|
+
output, _ = self.run_code(print_bfloat16_usecase)
|
|
169
|
+
self.assertEqual(output.strip(), "0.937500 0.937500 0.937500")
|
|
170
|
+
|
|
151
171
|
@skip_on_cudasim("cudasim can print unlimited output")
|
|
152
172
|
def test_too_many_args(self):
|
|
153
173
|
# Tests that we emit the format string and warn when there are more
|
|
@@ -7,7 +7,7 @@ from numba import cuda, vectorize
|
|
|
7
7
|
from numba.core import types
|
|
8
8
|
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
|
9
9
|
import unittest
|
|
10
|
-
from numba.np import numpy_support
|
|
10
|
+
from numba.cuda.np import numpy_support
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
@skip_on_cudasim("pickling not supported in CUDASIM")
|
|
@@ -7,7 +7,7 @@ from numba.core import types
|
|
|
7
7
|
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
|
-
from numba.np import numpy_support as nps
|
|
10
|
+
from numba.cuda.np import numpy_support as nps
|
|
11
11
|
|
|
12
12
|
from .extensions_usecases import test_struct_model_type, TestStruct
|
|
13
13
|
|