numba-cuda 0.20.0__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

numba_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 0.20.0
1
+ 0.20.1
@@ -17,6 +17,7 @@ from pathlib import Path
17
17
  from typing import Iterable, Union
18
18
  from io import StringIO
19
19
  import unittest
20
+ import numpy as np
20
21
 
21
22
  if PYVERSION >= (3, 10):
22
23
  from filecheck.matcher import Matcher
@@ -44,6 +45,8 @@ class CUDATestCase(TestCase):
44
45
  matches FileCheck checks, and is not specific to CUDADispatcher.
45
46
  """
46
47
 
48
+ FLOAT16_RTOL = np.finfo(np.float16).eps
49
+
47
50
  def setUp(self):
48
51
  self._low_occupancy_warnings = config.CUDA_LOW_OCCUPANCY_WARNINGS
49
52
  self._warn_on_implicit_copy = config.CUDA_WARN_ON_IMPLICIT_COPY
@@ -629,7 +629,7 @@ class TestCudaIntrinsic(CUDATestCase):
629
629
  arg1 = np.array([3.0], dtype=np.float16)
630
630
  arg2 = np.array([4.0], dtype=np.float16)
631
631
  compiled[1, 1](ary, arg1, arg2)
632
- np.testing.assert_allclose(ary[0], arg1 + arg2)
632
+ np.testing.assert_allclose(ary[0], arg1 + arg2, rtol=self.FLOAT16_RTOL)
633
633
 
634
634
  @skip_unless_cc_53
635
635
  def test_hadd_scalar(self):
@@ -639,7 +639,7 @@ class TestCudaIntrinsic(CUDATestCase):
639
639
  arg2 = np.float16(3.0)
640
640
  compiled[1, 1](ary, arg1, arg2)
641
641
  ref = arg1 + arg2
642
- np.testing.assert_allclose(ary[0], ref)
642
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
643
643
 
644
644
  @skip_on_cudasim("Compilation unsupported in the simulator")
645
645
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -657,7 +657,9 @@ class TestCudaIntrinsic(CUDATestCase):
657
657
  arg2 = np.array([3.0], dtype=np.float16)
658
658
  arg3 = np.array([4.0], dtype=np.float16)
659
659
  compiled[1, 1](ary, arg1, arg2, arg3)
660
- np.testing.assert_allclose(ary[0], arg1 * arg2 + arg3)
660
+ np.testing.assert_allclose(
661
+ ary[0], arg1 * arg2 + arg3, rtol=self.FLOAT16_RTOL
662
+ )
661
663
 
662
664
  @skip_unless_cc_53
663
665
  def test_hfma_scalar(self):
@@ -668,7 +670,7 @@ class TestCudaIntrinsic(CUDATestCase):
668
670
  arg3 = np.float16(4.0)
669
671
  compiled[1, 1](ary, arg1, arg2, arg3)
670
672
  ref = arg1 * arg2 + arg3
671
- np.testing.assert_allclose(ary[0], ref)
673
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
672
674
 
673
675
  @skip_on_cudasim("Compilation unsupported in the simulator")
674
676
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -687,7 +689,7 @@ class TestCudaIntrinsic(CUDATestCase):
687
689
  arg1 = np.array([3.0], dtype=np.float16)
688
690
  arg2 = np.array([4.0], dtype=np.float16)
689
691
  compiled[1, 1](ary, arg1, arg2)
690
- np.testing.assert_allclose(ary[0], arg1 - arg2)
692
+ np.testing.assert_allclose(ary[0], arg1 - arg2, rtol=self.FLOAT16_RTOL)
691
693
 
692
694
  @skip_unless_cc_53
693
695
  def test_hsub_scalar(self):
@@ -697,7 +699,7 @@ class TestCudaIntrinsic(CUDATestCase):
697
699
  arg2 = np.float16(1.57)
698
700
  compiled[1, 1](ary, arg1, arg2)
699
701
  ref = arg1 - arg2
700
- np.testing.assert_allclose(ary[0], ref)
702
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
701
703
 
702
704
  @skip_on_cudasim("Compilation unsupported in the simulator")
703
705
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -714,7 +716,7 @@ class TestCudaIntrinsic(CUDATestCase):
714
716
  arg1 = np.array([3.0], dtype=np.float16)
715
717
  arg2 = np.array([4.0], dtype=np.float16)
716
718
  compiled[1, 1](ary, arg1, arg2)
717
- np.testing.assert_allclose(ary[0], arg1 * arg2)
719
+ np.testing.assert_allclose(ary[0], arg1 * arg2, rtol=self.FLOAT16_RTOL)
718
720
 
719
721
  @skip_unless_cc_53
720
722
  def test_hmul_scalar(self):
@@ -724,7 +726,7 @@ class TestCudaIntrinsic(CUDATestCase):
724
726
  arg2 = np.float16(1.57)
725
727
  compiled[1, 1](ary, arg1, arg2)
726
728
  ref = arg1 * arg2
727
- np.testing.assert_allclose(ary[0], ref)
729
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
728
730
 
729
731
  @skip_on_cudasim("Compilation unsupported in the simulator")
730
732
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -743,7 +745,7 @@ class TestCudaIntrinsic(CUDATestCase):
743
745
 
744
746
  compiled[1, 1](ary, arg1, arg2)
745
747
  ref = arg1 / arg2
746
- np.testing.assert_allclose(ary[0], ref)
748
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
747
749
 
748
750
  @skip_unless_cc_53
749
751
  def test_hdiv(self):
@@ -754,7 +756,7 @@ class TestCudaIntrinsic(CUDATestCase):
754
756
 
755
757
  compiled.forall(ary.size)(ary, arry1, arry2)
756
758
  ref = arry1 / arry2
757
- np.testing.assert_allclose(ary, ref)
759
+ np.testing.assert_allclose(ary, ref, rtol=self.FLOAT16_RTOL)
758
760
 
759
761
  @skip_unless_cc_53
760
762
  def test_hneg(self):
@@ -762,7 +764,7 @@ class TestCudaIntrinsic(CUDATestCase):
762
764
  ary = np.zeros(1, dtype=np.float16)
763
765
  arg1 = np.array([3.0], dtype=np.float16)
764
766
  compiled[1, 1](ary, arg1)
765
- np.testing.assert_allclose(ary[0], -arg1)
767
+ np.testing.assert_allclose(ary[0], -arg1, rtol=self.FLOAT16_RTOL)
766
768
 
767
769
  @skip_unless_cc_53
768
770
  def test_hneg_scalar(self):
@@ -771,7 +773,7 @@ class TestCudaIntrinsic(CUDATestCase):
771
773
  arg1 = np.float16(3.1415926)
772
774
  compiled[1, 1](ary, arg1)
773
775
  ref = -arg1
774
- np.testing.assert_allclose(ary[0], ref)
776
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
775
777
 
776
778
  @skip_on_cudasim("Compilation unsupported in the simulator")
777
779
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -787,7 +789,7 @@ class TestCudaIntrinsic(CUDATestCase):
787
789
  ary = np.zeros(1, dtype=np.float16)
788
790
  arg1 = np.array([-3.0], dtype=np.float16)
789
791
  compiled[1, 1](ary, arg1)
790
- np.testing.assert_allclose(ary[0], abs(arg1))
792
+ np.testing.assert_allclose(ary[0], abs(arg1), rtol=self.FLOAT16_RTOL)
791
793
 
792
794
  @skip_unless_cc_53
793
795
  def test_habs_scalar(self):
@@ -796,7 +798,7 @@ class TestCudaIntrinsic(CUDATestCase):
796
798
  arg1 = np.float16(-3.1415926)
797
799
  compiled[1, 1](ary, arg1)
798
800
  ref = abs(arg1)
799
- np.testing.assert_allclose(ary[0], ref)
801
+ np.testing.assert_allclose(ary[0], ref, rtol=self.FLOAT16_RTOL)
800
802
 
801
803
  @skip_on_cudasim("Compilation unsupported in the simulator")
802
804
  @skip_if_nvjitlink_missing("Numbast generated bindings")
@@ -849,7 +851,7 @@ class TestCudaIntrinsic(CUDATestCase):
849
851
  kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
850
852
  kernel[1, N](r, x)
851
853
  expected = fn(x, dtype=np.float16)
852
- np.testing.assert_allclose(r, expected)
854
+ np.testing.assert_allclose(r, expected, rtol=self.FLOAT16_RTOL)
853
855
 
854
856
  x2 = np.random.randint(1, 10, size=N).astype(np.float16)
855
857
  for kernel, fn in zip(exp_kernels, expected_exp_functions):
@@ -857,7 +859,7 @@ class TestCudaIntrinsic(CUDATestCase):
857
859
  kernel = cuda.jit("void(f2[:], f2[:])")(kernel)
858
860
  kernel[1, N](r, x2)
859
861
  expected = fn(x2, dtype=np.float16)
860
- np.testing.assert_allclose(r, expected)
862
+ np.testing.assert_allclose(r, expected, rtol=self.FLOAT16_RTOL)
861
863
 
862
864
  @skip_unless_cc_53
863
865
  def test_hexp10(self):
@@ -876,7 +878,7 @@ class TestCudaIntrinsic(CUDATestCase):
876
878
 
877
879
  # Run the kernel
878
880
  hexp10_vectors[1, N](r, x)
879
- np.testing.assert_allclose(r, 10**x)
881
+ np.testing.assert_allclose(r, 10**x, rtol=self.FLOAT16_RTOL)
880
882
 
881
883
  @skip_unless_cc_53
882
884
  def test_fp16_comparison(self):
@@ -948,10 +950,10 @@ class TestCudaIntrinsic(CUDATestCase):
948
950
  arg1 = np.float16(3.0)
949
951
  arg2 = np.float16(4.0)
950
952
  compiled[1, 1](ary, arg1, arg2)
951
- np.testing.assert_allclose(ary[0], arg2)
953
+ np.testing.assert_allclose(ary[0], arg2, rtol=self.FLOAT16_RTOL)
952
954
  arg1 = np.float16(5.0)
953
955
  compiled[1, 1](ary, arg1, arg2)
954
- np.testing.assert_allclose(ary[0], arg1)
956
+ np.testing.assert_allclose(ary[0], arg1, rtol=self.FLOAT16_RTOL)
955
957
 
956
958
  @skip_unless_cc_53
957
959
  def test_hmin(self):
@@ -960,10 +962,10 @@ class TestCudaIntrinsic(CUDATestCase):
960
962
  arg1 = np.float16(3.0)
961
963
  arg2 = np.float16(4.0)
962
964
  compiled[1, 1](ary, arg1, arg2)
963
- np.testing.assert_allclose(ary[0], arg1)
965
+ np.testing.assert_allclose(ary[0], arg1, rtol=self.FLOAT16_RTOL)
964
966
  arg1 = np.float16(5.0)
965
967
  compiled[1, 1](ary, arg1, arg2)
966
- np.testing.assert_allclose(ary[0], arg2)
968
+ np.testing.assert_allclose(ary[0], arg2, rtol=self.FLOAT16_RTOL)
967
969
 
968
970
  def test_cbrt_f32(self):
969
971
  compiled = cuda.jit("void(float32[:], float32)")(simple_cbrt)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: numba-cuda
3
- Version: 0.20.0
3
+ Version: 0.20.1
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License-Expression: BSD-2-Clause
@@ -16,7 +16,7 @@ License-File: LICENSE.numba
16
16
  Requires-Dist: numba>=0.60.0
17
17
  Provides-Extra: cu12
18
18
  Requires-Dist: cuda-bindings<13.0.0,>=12.9.1; extra == "cu12"
19
- Requires-Dist: cuda-core==0.3.*; extra == "cu12"
19
+ Requires-Dist: cuda-core<1.0.0,>=0.3.0; extra == "cu12"
20
20
  Requires-Dist: cuda-python==12.9.*; extra == "cu12"
21
21
  Requires-Dist: nvidia-cuda-nvcc-cu12; extra == "cu12"
22
22
  Requires-Dist: nvidia-cuda-runtime-cu12; extra == "cu12"
@@ -25,7 +25,7 @@ Requires-Dist: nvidia-nvjitlink-cu12; extra == "cu12"
25
25
  Requires-Dist: nvidia-cuda-cccl-cu12; extra == "cu12"
26
26
  Provides-Extra: cu13
27
27
  Requires-Dist: cuda-bindings==13.*; extra == "cu13"
28
- Requires-Dist: cuda-core<0.4.0dev0,==0.3.2; extra == "cu13"
28
+ Requires-Dist: cuda-core<1.0.0,>=0.3.2; extra == "cu13"
29
29
  Requires-Dist: cuda-python==13.*; extra == "cu13"
30
30
  Requires-Dist: nvidia-nvvm==13.*; extra == "cu13"
31
31
  Requires-Dist: nvidia-cuda-runtime==13.*; extra == "cu13"
@@ -1,6 +1,6 @@
1
1
  _numba_cuda_redirector.pth,sha256=Il6zlJxEoQiz1AdLkQhk8m_7WIoS50MmOMLVgXBGcN4,170
2
2
  _numba_cuda_redirector.py,sha256=hJphzrGT9qp3-NtdQ51_Jv94lqlmrwf6S5Sh_UFvaEE,2799
3
- numba_cuda/VERSION,sha256=jaai7lw2uydw3mRcvNaJIJ9ZNbOA1ocp4QfQJQpjO1M,7
3
+ numba_cuda/VERSION,sha256=NWycnwmdp_mzgMb-3FEHfumN4iQCs3i8D0Mf3Feqx1c,7
4
4
  numba_cuda/__init__.py,sha256=4P6_SMBIgPf3E7O2rxPXSz_MbSAHNqx0Rng-nsgyGQY,211
5
5
  numba_cuda/_version.py,sha256=4SQp70c0OEm9jbZUU0iI72YiAs79LOoMQiF0tTMEg2I,285
6
6
  numba_cuda/numba/cuda/__init__.py,sha256=jXQGekoQK5wxXH7z8cUoxML1rOr4idJHU7evV2G_0DU,3531
@@ -46,7 +46,7 @@ numba_cuda/numba/cuda/serialize.py,sha256=Zx2a2_G27zfbxFJKTJkJO2AThc3MBracTAjV-s
46
46
  numba_cuda/numba/cuda/simulator_init.py,sha256=FT9Max5WAS4M0JLGomoZxLKstXeLi0Iuq-1pqxUQyC8,590
47
47
  numba_cuda/numba/cuda/stubs.py,sha256=2UALzKfJYa3IlwpYxR-6nJSxQYaCnkioQLd48JP2KGc,16667
48
48
  numba_cuda/numba/cuda/target.py,sha256=Rgi5MHG-m0k42_x0Q_9cTZgDtdGCq68MO-eYmJHmtoM,15423
49
- numba_cuda/numba/cuda/testing.py,sha256=iYWK_y4OxzUEgLiRkz5oPGKCTYJY_bGDX8mCg2TfWLo,11792
49
+ numba_cuda/numba/cuda/testing.py,sha256=rewVOq7240JyrffueHPudjFNV87wU1mWCQ28HzsUUP8,11856
50
50
  numba_cuda/numba/cuda/types.py,sha256=5QsQHbw4Vj0FCxHlltMcAbSEjPPCgchR6uQgZ1fD5No,2967
51
51
  numba_cuda/numba/cuda/ufuncs.py,sha256=lT2hoM3U4OuNrfUDuIt7lrfOBEA_pUKpdDKU_Oe_XF8,23714
52
52
  numba_cuda/numba/cuda/utils.py,sha256=6dA0uW4eX4ms3TRZWJQk3Nb70ncG0A1S0NwYhGR4d1Q,19259
@@ -253,7 +253,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py,sha256=oLP8RUmgh53J
253
253
  numba_cuda/numba/cuda/tests/cudapy/test_idiv.py,sha256=M89feOfFGRiva1FpOm5bA3OpyNjEFugT5RzjsKFCNdI,1194
254
254
  numba_cuda/numba/cuda/tests/cudapy/test_inline.py,sha256=R-yq6VlRYQRrKaOa-n0OJE7lBq8dMrO-OOBcrjlD-9U,5048
255
255
  numba_cuda/numba/cuda/tests/cudapy/test_inspect.py,sha256=idteoOYUoSidwCkQ395GQq8GcbcLIWTsegAoQCAOSfU,7540
256
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=_tHm47rDfosrg3yXm6Hr1m1I4xgqzwIk7MKRjk6tnmU,37554
256
+ numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=CgRu8tvy_H8HC2KbFPYEvM7Mjiil_6dru01cBrJJSM0,38080
257
257
  numba_cuda/numba/cuda/tests/cudapy/test_ipc.py,sha256=Z6CC24xKU2rhdJRK-UAPeorYoE1x-0ZnS4V0YjV5Yy4,10698
258
258
  numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py,sha256=6qBuuabtsxa5bITbZ-KH6_AR17ZoG8fFyMHG4vnZ1bs,9044
259
259
  numba_cuda/numba/cuda/tests/cudapy/test_iterators.py,sha256=iAF15K202ylm9NdctQFBp7Kr4NIXoA9y4gYJ8L19dFU,2483
@@ -349,9 +349,9 @@ numba_cuda/numba/cuda/typing/context.py,sha256=Smca11cA_d2T6RlIkK6ERu30x72P0k8Te
349
349
  numba_cuda/numba/cuda/typing/enumdecl.py,sha256=ptGYmkVIzvG_hWa-h0Se-8IqK4tE5N9PZqfTsFy8p34,1652
350
350
  numba_cuda/numba/cuda/typing/npydecl.py,sha256=Fe-tl9Tsp3-etcTSEFVy50lm6DbrsJgdlcaGWtCIWxM,20057
351
351
  numba_cuda/numba/cuda/typing/templates.py,sha256=4xQaKFeXJSuZT8YvPYWGecJvjbBV0k_idKwhfx6G74E,51619
352
- numba_cuda-0.20.0.dist-info/licenses/LICENSE,sha256=qeS8IOtEoJl4qHh9S9LIPPJm77GQlJld2Rc3RNMhgk8,1327
353
- numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba,sha256=OnDgohytAxRyRvY2PQe_dvriFscneXB_SuXbBrys8Gc,1286
354
- numba_cuda-0.20.0.dist-info/METADATA,sha256=ukJGGzojlcEaHHPRirauk9IPyUVdGOSh5Ech8YndLUk,3684
355
- numba_cuda-0.20.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
356
- numba_cuda-0.20.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
357
- numba_cuda-0.20.0.dist-info/RECORD,,
352
+ numba_cuda-0.20.1.dist-info/licenses/LICENSE,sha256=qeS8IOtEoJl4qHh9S9LIPPJm77GQlJld2Rc3RNMhgk8,1327
353
+ numba_cuda-0.20.1.dist-info/licenses/LICENSE.numba,sha256=OnDgohytAxRyRvY2PQe_dvriFscneXB_SuXbBrys8Gc,1286
354
+ numba_cuda-0.20.1.dist-info/METADATA,sha256=cJv8o_FMsnYhX3WU9MPU0VS8mVfWEXpD6PnCCEyDqWg,3687
355
+ numba_cuda-0.20.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
356
+ numba_cuda-0.20.1.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
357
+ numba_cuda-0.20.1.dist-info/RECORD,,