numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (172) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +7 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
  129. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  130. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  134. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  139. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  141. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  143. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  146. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  147. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  148. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  151. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  152. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  153. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  154. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  155. numba_cuda/numba/cuda/tests/support.py +55 -15
  156. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  157. numba_cuda/numba/cuda/types.py +56 -0
  158. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  159. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  160. numba_cuda/numba/cuda/typing/context.py +751 -0
  161. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  162. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  163. numba_cuda/numba/cuda/typing/templates.py +7 -6
  164. numba_cuda/numba/cuda/ufuncs.py +3 -3
  165. numba_cuda/numba/cuda/utils.py +6 -112
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
  167. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
  168. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
  172. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
@@ -629,7 +629,7 @@ class TestCudaIntrinsic(CUDATestCase):
629
629
  arg1 = np.array([3.0], dtype=np.float16)
630
630
  arg2 = np.array([4.0], dtype=np.float16)
631
631
  compiled[1, 1](ary, arg1, arg2)
632
- np.testing.assert_allclose(ary[0], arg1 + arg2)
632
+ np.testing.assert_allclose(ary[0], arg1 + arg2, rtol=self.FLOAT16_RTOL)
633
633
 
634
634
  @skip_unless_cc_53
635
635
  def test_hadd_scalar(self):
@@ -639,7 +639,7 @@ class TestCudaIntrinsic(CUDATestCase):
639
639
  arg2 = np.float16(3.0)
640
640
  compiled[1, 1](ary, arg1, arg2)
641
641
  ref = arg1 + arg2
642
- np.testing.assert_allclose(ary[0], ref)
642
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
643
643
 
644
644
  @skip_on_cudasim("Compilation unsupported in the simulator")
645
645
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -657,7 +657,9 @@ class TestCudaIntrinsic(CUDATestCase):
657
657
  arg2 = np.array([3.0], dtype=np.float16)
658
658
  arg3 = np.array([4.0], dtype=np.float16)
659
659
  compiled[1, 1](ary, arg1, arg2, arg3)
660
- np.testing.assert_allclose(ary[0], arg1 * arg2 + arg3)
660
+ np.testing.assert_allclose(
661
+ ary[0], arg1 * arg2 + arg3, rtol=self.FLOAT16_RTOL
662
+ )
661
663
 
662
664
  @skip_unless_cc_53
663
665
  def test_hfma_scalar(self):
@@ -668,7 +670,7 @@ class TestCudaIntrinsic(CUDATestCase):
668
670
  arg3 = np.float16(4.0)
669
671
  compiled[1, 1](ary, arg1, arg2, arg3)
670
672
  ref = arg1 * arg2 + arg3
671
- np.testing.assert_allclose(ary[0], ref)
673
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
672
674
 
673
675
  @skip_on_cudasim("Compilation unsupported in the simulator")
674
676
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -687,7 +689,7 @@ class TestCudaIntrinsic(CUDATestCase):
687
689
  arg1 = np.array([3.0], dtype=np.float16)
688
690
  arg2 = np.array([4.0], dtype=np.float16)
689
691
  compiled[1, 1](ary, arg1, arg2)
690
- np.testing.assert_allclose(ary[0], arg1 - arg2)
692
+ np.testing.assert_allclose(ary[0], arg1 - arg2, rtol=self.FLOAT16_RTOL)
691
693
 
692
694
  @skip_unless_cc_53
693
695
  def test_hsub_scalar(self):
@@ -697,7 +699,7 @@ class TestCudaIntrinsic(CUDATestCase):
697
699
  arg2 = np.float16(1.57)
698
700
  compiled[1, 1](ary, arg1, arg2)
699
701
  ref = arg1 - arg2
700
- np.testing.assert_allclose(ary[0], ref)
702
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
701
703
 
702
704
  @skip_on_cudasim("Compilation unsupported in the simulator")
703
705
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -714,7 +716,7 @@ class TestCudaIntrinsic(CUDATestCase):
714
716
  arg1 = np.array([3.0], dtype=np.float16)
715
717
  arg2 = np.array([4.0], dtype=np.float16)
716
718
  compiled[1, 1](ary, arg1, arg2)
717
- np.testing.assert_allclose(ary[0], arg1 * arg2)
719
+ np.testing.assert_allclose(ary[0], arg1 * arg2, rtol=self.FLOAT16_RTOL)
718
720
 
719
721
  @skip_unless_cc_53
720
722
  def test_hmul_scalar(self):
@@ -724,7 +726,7 @@ class TestCudaIntrinsic(CUDATestCase):
724
726
  arg2 = np.float16(1.57)
725
727
  compiled[1, 1](ary, arg1, arg2)
726
728
  ref = arg1 * arg2
727
- np.testing.assert_allclose(ary[0], ref)
729
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
728
730
 
729
731
  @skip_on_cudasim("Compilation unsupported in the simulator")
730
732
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -743,7 +745,7 @@ class TestCudaIntrinsic(CUDATestCase):
743
745
 
744
746
  compiled[1, 1](ary, arg1, arg2)
745
747
  ref = arg1 / arg2
746
- np.testing.assert_allclose(ary[0], ref)
748
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
747
749
 
748
750
  @skip_unless_cc_53
749
751
  def test_hdiv(self):
@@ -754,7 +756,7 @@ class TestCudaIntrinsic(CUDATestCase):
754
756
 
755
757
  compiled.forall(ary.size)(ary, arry1, arry2)
756
758
  ref = arry1 / arry2
757
- np.testing.assert_allclose(ary, ref)
759
+ np.testing.assert_allclose(ary, ref, rtol=self.FLOAT16_RTOL)
758
760
 
759
761
  @skip_unless_cc_53
760
762
  def test_hneg(self):
@@ -762,7 +764,7 @@ class TestCudaIntrinsic(CUDATestCase):
762
764
  ary = np.zeros(1, dtype=np.float16)
763
765
  arg1 = np.array([3.0], dtype=np.float16)
764
766
  compiled[1, 1](ary, arg1)
765
- np.testing.assert_allclose(ary[0], -arg1)
767
+ np.testing.assert_allclose(ary[0], -arg1, rtol=self.FLOAT16_RTOL)
766
768
 
767
769
  @skip_unless_cc_53
768
770
  def test_hneg_scalar(self):
@@ -771,7 +773,7 @@ class TestCudaIntrinsic(CUDATestCase):
771
773
  arg1 = np.float16(3.1415926)
772
774
  compiled[1, 1](ary, arg1)
773
775
  ref = -arg1
774
- np.testing.assert_allclose(ary[0], ref)
776
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
775
777
 
776
778
  @skip_on_cudasim("Compilation unsupported in the simulator")
777
779
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -787,7 +789,7 @@ class TestCudaIntrinsic(CUDATestCase):
787
789
  ary = np.zeros(1, dtype=np.float16)
788
790
  arg1 = np.array([-3.0], dtype=np.float16)
789
791
  compiled[1, 1](ary, arg1)
790
- np.testing.assert_allclose(ary[0], abs(arg1))
792
+ np.testing.assert_allclose(ary[0], abs(arg1), rtol=self.FLOAT16_RTOL)
791
793
 
792
794
  @skip_unless_cc_53
793
795
  def test_habs_scalar(self):
@@ -796,7 +798,7 @@ class TestCudaIntrinsic(CUDATestCase):
796
798
  arg1 = np.float16(-3.1415926)
797
799
  compiled[1, 1](ary, arg1)
798
800
  ref = abs(arg1)
799
- np.testing.assert_allclose(ary[0], ref)
801
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
800
802
 
801
803
  @skip_on_cudasim("Compilation unsupported in the simulator")
802
804
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -849,7 +851,7 @@ class TestCudaIntrinsic(CUDATestCase):
849
851
  kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
850
852
  kernel[1, N](r, x)
851
853
  expected = fn(x, dtype=np.float16)
852
- np.testing.assert_allclose(r, expected)
854
+ np.testing.assert_allclose(r, expected, rtol=self.FLOAT16_RTOL)
853
855
 
854
856
  x2 = np.random.randint(1, 10, size=N).astype(np.float16)
855
857
  for kernel, fn in zip(exp_kernels, expected_exp_functions):
@@ -857,7 +859,7 @@ class TestCudaIntrinsic(CUDATestCase):
857
859
  kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
858
860
  kernel[1, N](r, x2)
859
861
  expected = fn(x2, dtype=np.float16)
860
- np.testing.assert_allclose(r, expected)
862
+ np.testing.assert_allclose(r, expected, rtol=self.FLOAT16_RTOL)
861
863
 
862
864
  @skip_unless_cc_53
863
865
  def test_hexp10(self):
@@ -876,7 +878,7 @@ class TestCudaIntrinsic(CUDATestCase):
876
878
 
877
879
  # Run the kernel
878
880
  hexp10_vectors[1, N](r, x)
879
- np.testing.assert_allclose(r, 10**x)
881
+ np.testing.assert_allclose(r, 10**x, rtol=self.FLOAT16_RTOL)
880
882
 
881
883
  @skip_unless_cc_53
882
884
  def test_fp16_comparison(self):
@@ -948,10 +950,10 @@ class TestCudaIntrinsic(CUDATestCase):
948
950
  arg1 = np.float16(3.0)
949
951
  arg2 = np.float16(4.0)
950
952
  compiled[1, 1](ary, arg1, arg2)
951
- np.testing.assert_allclose(ary[0], arg2)
953
+ np.testing.assert_allclose(ary[0], arg2, rtol=self.FLOAT16_RTOL)
952
954
  arg1 = np.float16(5.0)
953
955
  compiled[1, 1](ary, arg1, arg2)
954
- np.testing.assert_allclose(ary[0], arg1)
956
+ np.testing.assert_allclose(ary[0], arg1, rtol=self.FLOAT16_RTOL)
955
957
 
956
958
  @skip_unless_cc_53
957
959
  def test_hmin(self):
@@ -960,10 +962,10 @@ class TestCudaIntrinsic(CUDATestCase):
960
962
  arg1 = np.float16(3.0)
961
963
  arg2 = np.float16(4.0)
962
964
  compiled[1, 1](ary, arg1, arg2)
963
- np.testing.assert_allclose(ary[0], arg1)
965
+ np.testing.assert_allclose(ary[0], arg1, rtol=self.FLOAT16_RTOL)
964
966
  arg1 = np.float16(5.0)
965
967
  compiled[1, 1](ary, arg1, arg2)
966
- np.testing.assert_allclose(ary[0], arg2)
968
+ np.testing.assert_allclose(ary[0], arg2, rtol=self.FLOAT16_RTOL)
967
969
 
968
970
  def test_cbrt_f32(self):
969
971
  compiled = cuda.jit("void(float32[:], float32)")(simple_cbrt)
@@ -2,63 +2,34 @@
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
4
  from numba.cuda.testing import CUDATestCase
5
- import numba
6
- from numba.core.registry import cpu_target
7
- from numba.core.compiler import CompilerBase, Flags
8
- from numba.core.compiler_machinery import PassManager
5
+ from numba.cuda.core.compiler import CompilerBase
6
+ from numba.cuda.flags import Flags
7
+ from numba.cuda.core.compiler_machinery import PassManager
9
8
  from numba.cuda.core import ir_utils
10
- from numba.core import types, ir, bytecode, compiler, registry
11
- from numba.core.untyped_passes import (
9
+ from numba.core import types, ir, bytecode, registry
10
+ from numba.cuda import compiler
11
+ from numba.cuda.core.untyped_passes import (
12
12
  ExtractByteCode,
13
13
  TranslateByteCode,
14
14
  FixupArgs,
15
15
  IRProcessing,
16
16
  )
17
- from numba.experimental import jitclass
18
- from numba.core.typed_passes import (
17
+ from numba.cuda.core.typed_passes import (
19
18
  NopythonTypeInference,
20
- type_inference_stage,
21
19
  DeadCodeElimination,
22
20
  )
21
+ from numba.cuda.testing import skip_on_cudasim
23
22
 
24
23
 
25
24
  # global constant for testing find_const
26
25
  GLOBAL_B = 11
27
26
 
28
27
 
29
- @jitclass([("val", numba.core.types.List(numba.intp))])
30
- class Dummy(object):
31
- def __init__(self, val):
32
- self.val = val
33
-
34
-
35
28
  class TestIrUtils(CUDATestCase):
36
29
  """
37
30
  Tests ir handling utility functions like find_callname.
38
31
  """
39
32
 
40
- def test_obj_func_match(self):
41
- """Test matching of an object method (other than Array see #3449)"""
42
-
43
- def test_func():
44
- d = Dummy([1])
45
- d.val.append(2)
46
-
47
- test_ir = compiler.run_frontend(test_func)
48
- typingctx = cpu_target.typing_context
49
- targetctx = cpu_target.target_context
50
- typing_res = type_inference_stage(
51
- typingctx, targetctx, test_ir, (), None
52
- )
53
- matched_call = ir_utils.find_callname(
54
- test_ir, test_ir.blocks[0].body[7].value, typing_res.typemap
55
- )
56
- self.assertTrue(
57
- isinstance(matched_call, tuple)
58
- and len(matched_call) == 2
59
- and matched_call[0] == "append"
60
- )
61
-
62
33
  def test_dead_code_elimination(self):
63
34
  class Tester(CompilerBase):
64
35
  @classmethod
@@ -177,6 +148,7 @@ class TestIrUtils(CUDATestCase):
177
148
  len(no_dce.blocks[0].body) - len(removed), len(w_dce.blocks[0].body)
178
149
  )
179
150
 
151
+ @skip_on_cudasim("Skipping ir utils tests on CUDA simulator")
180
152
  def test_find_const_global(self):
181
153
  """
182
154
  Test find_const() for values in globals (ir.Global) and freevars
@@ -203,6 +175,7 @@ class TestIrUtils(CUDATestCase):
203
175
  self.assertEqual(const_b, GLOBAL_B)
204
176
  self.assertEqual(const_c, FREEVAR_C)
205
177
 
178
+ @skip_on_cudasim("Skipping ir utils tests on CUDA simulator")
206
179
  def test_flatten_labels(self):
207
180
  """tests flatten_labels"""
208
181
 
@@ -4,7 +4,7 @@
4
4
  import numpy as np
5
5
  from numba import cuda, float64, void
6
6
  from numba.cuda.testing import unittest, CUDATestCase
7
- from numba.core import config
7
+ from numba.cuda.core import config
8
8
 
9
9
  # NOTE: CUDA kernel does not return any value
10
10
 
@@ -8,7 +8,7 @@ from numba.cuda.testing import (
8
8
  CUDATestCase,
9
9
  skip_on_cudasim,
10
10
  )
11
- from numba.np import numpy_support
11
+ from numba.cuda.np import numpy_support
12
12
  from numba import cuda, float32, float64, int32, vectorize, void, int64
13
13
  import math
14
14
 
@@ -5,7 +5,7 @@ import numpy as np
5
5
 
6
6
  from numba import cuda, float32, void
7
7
  from numba.cuda.testing import unittest, CUDATestCase
8
- from numba.core import config
8
+ from numba.cuda.core import config
9
9
 
10
10
  # Ensure the test takes a reasonable amount of time in the simulator
11
11
  if config.ENABLE_CUDASIM:
@@ -15,7 +15,7 @@ from numba.core.types import f2, b1
15
15
  from numba.cuda.typing import signature
16
16
  import operator
17
17
  import itertools
18
- from numba.np.numpy_support import from_dtype
18
+ from numba.cuda.np.numpy_support import from_dtype
19
19
 
20
20
 
21
21
  def simple_fp16_div_scalar(ary, a, b):
@@ -102,6 +102,21 @@ print_too_many[1, 1](np.arange(33))
102
102
  cuda.synchronize()
103
103
  """
104
104
 
105
+ print_bfloat16_usecase = """\
106
+ from numba import cuda
107
+ from numba.cuda import config
108
+
109
+ @cuda.jit
110
+ def print_bfloat16():
111
+ # 0.9375 is a dyadic rational, it's integer significand can expand within 7 digits.
112
+ # printing this should not give any rounding error.
113
+ a = cuda.types.bfloat16(0.9375)
114
+ print(a, a, a)
115
+
116
+ print_bfloat16[1, 1]()
117
+ cuda.synchronize()
118
+ """
119
+
105
120
 
106
121
  class TestPrint(CUDATestCase):
107
122
  # Note that in these tests we generally strip the output to avoid dealing
@@ -148,6 +163,11 @@ class TestPrint(CUDATestCase):
148
163
  expected = [str(i) for i in np.ndindex(2, 2, 2)]
149
164
  self.assertEqual(sorted(lines), expected)
150
165
 
166
+ @skip_on_cudasim("bfloat16 on host is not yet supported.")
167
+ def test_bfloat16(self):
168
+ output, _ = self.run_code(print_bfloat16_usecase)
169
+ self.assertEqual(output.strip(), "0.937500 0.937500 0.937500")
170
+
151
171
  @skip_on_cudasim("cudasim can print unlimited output")
152
172
  def test_too_many_args(self):
153
173
  # Tests that we emit the format string and warn when there are more
@@ -6,7 +6,7 @@ from numba import cuda
6
6
  from numba.core import types
7
7
  from numba.cuda.testing import skip_on_cudasim, CUDATestCase
8
8
  import unittest
9
- from numba.np import numpy_support
9
+ from numba.cuda.np import numpy_support
10
10
 
11
11
 
12
12
  def set_a(ary, i, v):
@@ -3,7 +3,7 @@
3
3
 
4
4
  import numpy as np
5
5
  from numba import cuda
6
- from numba.core.config import ENABLE_CUDASIM
6
+ from numba.cuda.core.config import ENABLE_CUDASIM
7
7
  from numba.cuda.testing import CUDATestCase
8
8
  import unittest
9
9
 
@@ -7,7 +7,7 @@ from numba import cuda, vectorize
7
7
  from numba.core import types
8
8
  from numba.cuda.testing import skip_on_cudasim, CUDATestCase
9
9
  import unittest
10
- from numba.np import numpy_support
10
+ from numba.cuda.np import numpy_support
11
11
 
12
12
 
13
13
  @skip_on_cudasim("pickling not supported in CUDASIM")
@@ -7,7 +7,7 @@ from numba.core import types
7
7
  from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
8
8
 
9
9
  import numpy as np
10
- from numba.np import numpy_support as nps
10
+ from numba.cuda.np import numpy_support as nps
11
11
 
12
12
  from .extensions_usecases import test_struct_model_type, TestStruct
13
13