numba-cuda 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +0 -8
  3. numba_cuda/numba/cuda/_internal/cuda_fp16.py +14225 -0
  4. numba_cuda/numba/cuda/api_util.py +6 -0
  5. numba_cuda/numba/cuda/cgutils.py +1291 -0
  6. numba_cuda/numba/cuda/codegen.py +32 -14
  7. numba_cuda/numba/cuda/compiler.py +113 -10
  8. numba_cuda/numba/cuda/core/caching.py +741 -0
  9. numba_cuda/numba/cuda/core/callconv.py +338 -0
  10. numba_cuda/numba/cuda/core/codegen.py +168 -0
  11. numba_cuda/numba/cuda/core/compiler.py +205 -0
  12. numba_cuda/numba/cuda/core/typed_passes.py +139 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +1 -1
  14. numba_cuda/numba/cuda/cudadecl.py +0 -268
  15. numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
  16. numba_cuda/numba/cuda/cudadrv/devices.py +4 -6
  17. numba_cuda/numba/cuda/cudadrv/driver.py +105 -50
  18. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -1
  19. numba_cuda/numba/cuda/cudaimpl.py +4 -178
  20. numba_cuda/numba/cuda/debuginfo.py +469 -3
  21. numba_cuda/numba/cuda/device_init.py +0 -1
  22. numba_cuda/numba/cuda/dispatcher.py +311 -14
  23. numba_cuda/numba/cuda/extending.py +2 -1
  24. numba_cuda/numba/cuda/fp16.py +348 -0
  25. numba_cuda/numba/cuda/intrinsics.py +1 -1
  26. numba_cuda/numba/cuda/libdeviceimpl.py +2 -1
  27. numba_cuda/numba/cuda/lowering.py +1833 -8
  28. numba_cuda/numba/cuda/mathimpl.py +2 -90
  29. numba_cuda/numba/cuda/memory_management/nrt.py +1 -1
  30. numba_cuda/numba/cuda/nvvmutils.py +2 -1
  31. numba_cuda/numba/cuda/printimpl.py +2 -1
  32. numba_cuda/numba/cuda/serialize.py +264 -0
  33. numba_cuda/numba/cuda/simulator/__init__.py +2 -0
  34. numba_cuda/numba/cuda/simulator/dispatcher.py +7 -0
  35. numba_cuda/numba/cuda/stubs.py +0 -308
  36. numba_cuda/numba/cuda/target.py +13 -5
  37. numba_cuda/numba/cuda/testing.py +156 -5
  38. numba_cuda/numba/cuda/tests/complex_usecases.py +113 -0
  39. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +110 -0
  40. numba_cuda/numba/cuda/tests/core/test_serialize.py +359 -0
  41. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +16 -5
  42. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +5 -1
  43. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +33 -0
  44. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  45. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +2 -2
  46. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +1 -0
  47. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  48. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +5 -10
  49. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  50. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +1 -5
  51. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +381 -0
  52. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -1
  53. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
  54. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +94 -24
  55. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +37 -23
  56. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +43 -27
  57. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +2 -5
  58. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +26 -9
  59. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +27 -2
  60. numba_cuda/numba/cuda/tests/enum_usecases.py +56 -0
  61. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +1 -2
  62. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +1 -1
  63. numba_cuda/numba/cuda/utils.py +785 -0
  64. numba_cuda/numba/cuda/vector_types.py +1 -1
  65. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/METADATA +18 -4
  66. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/RECORD +69 -56
  67. numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -46
  68. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/WHEEL +0 -0
  69. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/licenses/LICENSE +0 -0
  70. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,6 @@ import numpy as np
3
3
  import operator
4
4
  import re
5
5
  from numba import cuda, int64
6
- from numba.cuda import compile_ptx
7
6
  from numba.core.errors import TypingError
8
7
  from numba.core.types import f2
9
8
  from numba.cuda.testing import (
@@ -11,6 +10,7 @@ from numba.cuda.testing import (
11
10
  CUDATestCase,
12
11
  skip_on_cudasim,
13
12
  skip_unless_cc_53,
13
+ skip_if_nvjitlink_missing,
14
14
  )
15
15
 
16
16
 
@@ -174,27 +174,27 @@ def hlt_func_2(x, y):
174
174
  return cuda.fp16.hlt(x, y)
175
175
 
176
176
 
177
- def test_multiple_hcmp_1(r, a, b, c):
177
+ def multiple_hcmp_1(r, a, b, c):
178
178
  # float16 predicates used in two separate functions
179
179
  r[0] = hlt_func_1(a, b) and hlt_func_2(b, c)
180
180
 
181
181
 
182
- def test_multiple_hcmp_2(r, a, b, c):
182
+ def multiple_hcmp_2(r, a, b, c):
183
183
  # The same float16 predicate used in the caller and callee
184
184
  r[0] = hlt_func_1(a, b) and cuda.fp16.hlt(b, c)
185
185
 
186
186
 
187
- def test_multiple_hcmp_3(r, a, b, c):
187
+ def multiple_hcmp_3(r, a, b, c):
188
188
  # Different float16 predicates used in the caller and callee
189
189
  r[0] = hlt_func_1(a, b) and cuda.fp16.hge(c, b)
190
190
 
191
191
 
192
- def test_multiple_hcmp_4(r, a, b, c):
192
+ def multiple_hcmp_4(r, a, b, c):
193
193
  # The same float16 predicates used twice in a function
194
194
  r[0] = cuda.fp16.hlt(a, b) and cuda.fp16.hlt(b, c)
195
195
 
196
196
 
197
- def test_multiple_hcmp_5(r, a, b, c):
197
+ def multiple_hcmp_5(r, a, b, c):
198
198
  # Different float16 predicates used in a function
199
199
  r[0] = cuda.fp16.hlt(a, b) and cuda.fp16.hge(c, b)
200
200
 
@@ -561,13 +561,13 @@ class TestCudaIntrinsic(CUDATestCase):
561
561
 
562
562
  def test_popc_u1(self):
563
563
  compiled = cuda.jit("void(int32[:], uint8)")(simple_popc)
564
- ary = np.zeros(1, dtype=np.int8)
564
+ ary = np.zeros(1, dtype=np.int32)
565
565
  compiled[1, 1](ary, np.uint8(0xFF))
566
566
  self.assertEqual(ary[0], 8)
567
567
 
568
568
  def test_popc_u2(self):
569
569
  compiled = cuda.jit("void(int32[:], uint16)")(simple_popc)
570
- ary = np.zeros(1, dtype=np.int16)
570
+ ary = np.zeros(1, dtype=np.int32)
571
571
  compiled[1, 1](ary, np.uint16(0xFFFF))
572
572
  self.assertEqual(ary[0], 16)
573
573
 
@@ -585,13 +585,13 @@ class TestCudaIntrinsic(CUDATestCase):
585
585
 
586
586
  def test_bit_count_u1(self):
587
587
  compiled = cuda.jit("void(int32[:], uint8)")(simple_bit_count)
588
- ary = np.zeros(1, dtype=np.int8)
588
+ ary = np.zeros(1, dtype=np.int32)
589
589
  compiled[1, 1](ary, np.uint8(0xFF))
590
590
  self.assertEqual(ary[0], 8)
591
591
 
592
592
  def test_bit_count_u2(self):
593
593
  compiled = cuda.jit("void(int32[:], uint16)")(simple_bit_count)
594
- ary = np.zeros(1, dtype=np.int16)
594
+ ary = np.zeros(1, dtype=np.int32)
595
595
  compiled[1, 1](ary, np.uint16(0xFFFF))
596
596
  self.assertEqual(ary[0], 16)
597
597
 
@@ -639,9 +639,11 @@ class TestCudaIntrinsic(CUDATestCase):
639
639
  np.testing.assert_allclose(ary[0], ref)
640
640
 
641
641
  @skip_on_cudasim("Compilation unsupported in the simulator")
642
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
642
643
  def test_hadd_ptx(self):
644
+ compiled = cuda.jit("void(f2[:], f2, f2)", lto=True)(simple_hadd_scalar)
643
645
  args = (f2[:], f2, f2)
644
- ptx, _ = compile_ptx(simple_hadd_scalar, args)
646
+ ptx = compiled.inspect_lto_ptx(args)
645
647
  self.assertIn("add.f16", ptx)
646
648
 
647
649
  @skip_unless_cc_53
@@ -666,9 +668,13 @@ class TestCudaIntrinsic(CUDATestCase):
666
668
  np.testing.assert_allclose(ary[0], ref)
667
669
 
668
670
  @skip_on_cudasim("Compilation unsupported in the simulator")
671
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
669
672
  def test_hfma_ptx(self):
673
+ compiled = cuda.jit("void(f2[:], f2, f2, f2)", lto=True)(
674
+ simple_hfma_scalar
675
+ )
670
676
  args = (f2[:], f2, f2, f2)
671
- ptx, _ = compile_ptx(simple_hfma_scalar, args)
677
+ ptx = compiled.inspect_lto_ptx(args)
672
678
  self.assertIn("fma.rn.f16", ptx)
673
679
 
674
680
  @skip_unless_cc_53
@@ -691,14 +697,16 @@ class TestCudaIntrinsic(CUDATestCase):
691
697
  np.testing.assert_allclose(ary[0], ref)
692
698
 
693
699
  @skip_on_cudasim("Compilation unsupported in the simulator")
700
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
694
701
  def test_hsub_ptx(self):
702
+ compiled = cuda.jit("void(f2[:], f2, f2)", lto=True)(simple_hsub_scalar)
695
703
  args = (f2[:], f2, f2)
696
- ptx, _ = compile_ptx(simple_hsub_scalar, args)
704
+ ptx = compiled.inspect_lto_ptx(args)
697
705
  self.assertIn("sub.f16", ptx)
698
706
 
699
707
  @skip_unless_cc_53
700
708
  def test_hmul(self):
701
- compiled = cuda.jit()(simple_hmul)
709
+ compiled = cuda.jit(simple_hmul)
702
710
  ary = np.zeros(1, dtype=np.float16)
703
711
  arg1 = np.array([3.0], dtype=np.float16)
704
712
  arg2 = np.array([4.0], dtype=np.float16)
@@ -716,9 +724,11 @@ class TestCudaIntrinsic(CUDATestCase):
716
724
  np.testing.assert_allclose(ary[0], ref)
717
725
 
718
726
  @skip_on_cudasim("Compilation unsupported in the simulator")
727
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
719
728
  def test_hmul_ptx(self):
729
+ compiled = cuda.jit("void(f2[:], f2, f2)", lto=True)(simple_hmul_scalar)
720
730
  args = (f2[:], f2, f2)
721
- ptx, _ = compile_ptx(simple_hmul_scalar, args)
731
+ ptx = compiled.inspect_lto_ptx(args)
722
732
  self.assertIn("mul.f16", ptx)
723
733
 
724
734
  @skip_unless_cc_53
@@ -761,14 +771,16 @@ class TestCudaIntrinsic(CUDATestCase):
761
771
  np.testing.assert_allclose(ary[0], ref)
762
772
 
763
773
  @skip_on_cudasim("Compilation unsupported in the simulator")
774
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
764
775
  def test_hneg_ptx(self):
776
+ compiled = cuda.jit("void(f2[:], f2)", lto=True)(simple_hneg_scalar)
765
777
  args = (f2[:], f2)
766
- ptx, _ = compile_ptx(simple_hneg_scalar, args)
778
+ ptx = compiled.inspect_lto_ptx(args)
767
779
  self.assertIn("neg.f16", ptx)
768
780
 
769
781
  @skip_unless_cc_53
770
782
  def test_habs(self):
771
- compiled = cuda.jit()(simple_habs)
783
+ compiled = cuda.jit(simple_habs)
772
784
  ary = np.zeros(1, dtype=np.float16)
773
785
  arg1 = np.array([-3.0], dtype=np.float16)
774
786
  compiled[1, 1](ary, arg1)
@@ -784,9 +796,11 @@ class TestCudaIntrinsic(CUDATestCase):
784
796
  np.testing.assert_allclose(ary[0], ref)
785
797
 
786
798
  @skip_on_cudasim("Compilation unsupported in the simulator")
799
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
787
800
  def test_habs_ptx(self):
801
+ compiled = cuda.jit("void(f2[:], f2)", lto=True)(simple_habs_scalar)
788
802
  args = (f2[:], f2)
789
- ptx, _ = compile_ptx(simple_habs_scalar, args)
803
+ ptx = compiled.inspect_lto_ptx(args)
790
804
  self.assertIn("abs.f16", ptx)
791
805
 
792
806
  @skip_unless_cc_53
@@ -908,11 +922,11 @@ class TestCudaIntrinsic(CUDATestCase):
908
922
  @skip_unless_cc_53
909
923
  def test_multiple_float16_comparisons(self):
910
924
  functions = (
911
- test_multiple_hcmp_1,
912
- test_multiple_hcmp_2,
913
- test_multiple_hcmp_3,
914
- test_multiple_hcmp_4,
915
- test_multiple_hcmp_5,
925
+ multiple_hcmp_1,
926
+ multiple_hcmp_2,
927
+ multiple_hcmp_3,
928
+ multiple_hcmp_4,
929
+ multiple_hcmp_5,
916
930
  )
917
931
  for fn in functions:
918
932
  with self.subTest(fn=fn):
@@ -4,10 +4,12 @@ from numba.cuda.testing import (
4
4
  CUDATestCase,
5
5
  skip_unless_cc_53,
6
6
  skip_on_cudasim,
7
+ skip_if_nvjitlink_missing,
7
8
  )
8
9
  from numba import cuda
10
+ from numba.core import types
9
11
  from numba.core.types import f2, b1
10
- from numba.cuda import compile_ptx
12
+ from numba.core.typing import signature
11
13
  import operator
12
14
  import itertools
13
15
  from numba.np.numpy_support import from_dtype
@@ -87,27 +89,27 @@ def hlt_func_2(x, y):
87
89
  return x < y
88
90
 
89
91
 
90
- def test_multiple_hcmp_1(r, a, b, c):
92
+ def multiple_hcmp_1(r, a, b, c):
91
93
  # float16 predicates used in two separate functions
92
94
  r[0] = hlt_func_1(a, b) and hlt_func_2(b, c)
93
95
 
94
96
 
95
- def test_multiple_hcmp_2(r, a, b, c):
97
+ def multiple_hcmp_2(r, a, b, c):
96
98
  # The same float16 predicate used in the caller and callee
97
99
  r[0] = hlt_func_1(a, b) and b < c
98
100
 
99
101
 
100
- def test_multiple_hcmp_3(r, a, b, c):
102
+ def multiple_hcmp_3(r, a, b, c):
101
103
  # Different float16 predicates used in the caller and callee
102
104
  r[0] = hlt_func_1(a, b) and c >= b
103
105
 
104
106
 
105
- def test_multiple_hcmp_4(r, a, b, c):
107
+ def multiple_hcmp_4(r, a, b, c):
106
108
  # The same float16 predicates used twice in a function
107
109
  r[0] = a < b and b < c
108
110
 
109
111
 
110
- def test_multiple_hcmp_5(r, a, b, c):
112
+ def multiple_hcmp_5(r, a, b, c):
111
113
  # Different float16 predicates used in a function
112
114
  r[0] = a < b and c >= b
113
115
 
@@ -172,16 +174,19 @@ class TestOperatorModule(CUDATestCase):
172
174
  np.testing.assert_allclose(got, expected)
173
175
 
174
176
  @skip_on_cudasim("Compilation unsupported in the simulator")
177
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
175
178
  def test_fp16_binary_ptx(self):
176
179
  functions = (simple_fp16add, simple_fp16sub, simple_fp16mul)
177
180
  instrs = ("add.f16", "sub.f16", "mul.f16")
178
181
  args = (f2[:], f2, f2)
179
182
  for fn, instr in zip(functions, instrs):
180
183
  with self.subTest(instr=instr):
181
- ptx, _ = compile_ptx(fn, args)
184
+ compiled = cuda.jit("void(f2[:], f2, f2)", lto=True)(fn)
185
+ ptx = compiled.inspect_lto_ptx(args)
182
186
  self.assertIn(instr, ptx)
183
187
 
184
188
  @skip_unless_cc_53
189
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
185
190
  def test_mixed_fp16_binary_arithmetic(self):
186
191
  functions = (
187
192
  simple_fp16add,
@@ -193,7 +198,7 @@ class TestOperatorModule(CUDATestCase):
193
198
  types = (np.int8, np.int16, np.int32, np.int64, np.float32, np.float64)
194
199
  for (fn, op), ty in itertools.product(zip(functions, ops), types):
195
200
  with self.subTest(op=op, ty=ty):
196
- kernel = cuda.jit(fn)
201
+ kernel = cuda.jit(fn, lto=True)
197
202
 
198
203
  arg1 = np.random.random(1).astype(np.float16)
199
204
  arg2 = (np.random.random(1) * 100).astype(ty)
@@ -205,6 +210,7 @@ class TestOperatorModule(CUDATestCase):
205
210
  np.testing.assert_allclose(got, expected)
206
211
 
207
212
  @skip_on_cudasim("Compilation unsupported in the simulator")
213
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
208
214
  def test_fp16_inplace_binary_ptx(self):
209
215
  functions = (simple_fp16_iadd, simple_fp16_isub, simple_fp16_imul)
210
216
  instrs = ("add.f16", "sub.f16", "mul.f16")
@@ -212,7 +218,8 @@ class TestOperatorModule(CUDATestCase):
212
218
 
213
219
  for fn, instr in zip(functions, instrs):
214
220
  with self.subTest(instr=instr):
215
- ptx, _ = compile_ptx(fn, args)
221
+ compiled = cuda.jit("void(f2[:], f2)", lto=True)(fn)
222
+ ptx = compiled.inspect_lto_ptx(args)
216
223
  self.assertIn(instr, ptx)
217
224
 
218
225
  @skip_unless_cc_53
@@ -253,16 +260,19 @@ class TestOperatorModule(CUDATestCase):
253
260
  np.testing.assert_allclose(got, expected)
254
261
 
255
262
  @skip_on_cudasim("Compilation unsupported in the simulator")
263
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
256
264
  def test_fp16_neg_ptx(self):
257
265
  args = (f2[:], f2)
258
- ptx, _ = compile_ptx(simple_fp16neg, args)
266
+ compiled = cuda.jit("void(f2[:], f2)", lto=True)(simple_fp16neg)
267
+ ptx = compiled.inspect_lto_ptx(args)
259
268
  self.assertIn("neg.f16", ptx)
260
269
 
261
270
  @skip_on_cudasim("Compilation unsupported in the simulator")
271
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
262
272
  def test_fp16_abs_ptx(self):
263
273
  args = (f2[:], f2)
264
- ptx, _ = compile_ptx(simple_fp16abs, args)
265
-
274
+ compiled = cuda.jit("void(f2[:], f2)", lto=True)(simple_fp16abs)
275
+ ptx = compiled.inspect_lto_ptx(args)
266
276
  self.assertIn("abs.f16", ptx)
267
277
 
268
278
  @skip_unless_cc_53
@@ -331,11 +341,11 @@ class TestOperatorModule(CUDATestCase):
331
341
  @skip_unless_cc_53
332
342
  def test_multiple_float16_comparisons(self):
333
343
  functions = (
334
- test_multiple_hcmp_1,
335
- test_multiple_hcmp_2,
336
- test_multiple_hcmp_3,
337
- test_multiple_hcmp_4,
338
- test_multiple_hcmp_5,
344
+ multiple_hcmp_1,
345
+ multiple_hcmp_2,
346
+ multiple_hcmp_3,
347
+ multiple_hcmp_4,
348
+ multiple_hcmp_5,
339
349
  )
340
350
  for fn in functions:
341
351
  with self.subTest(fn=fn):
@@ -350,11 +360,11 @@ class TestOperatorModule(CUDATestCase):
350
360
  @skip_unless_cc_53
351
361
  def test_multiple_float16_comparisons_false(self):
352
362
  functions = (
353
- test_multiple_hcmp_1,
354
- test_multiple_hcmp_2,
355
- test_multiple_hcmp_3,
356
- test_multiple_hcmp_4,
357
- test_multiple_hcmp_5,
363
+ multiple_hcmp_1,
364
+ multiple_hcmp_2,
365
+ multiple_hcmp_3,
366
+ multiple_hcmp_4,
367
+ multiple_hcmp_5,
358
368
  )
359
369
  for fn in functions:
360
370
  with self.subTest(fn=fn):
@@ -367,6 +377,7 @@ class TestOperatorModule(CUDATestCase):
367
377
  self.assertFalse(ary[0])
368
378
 
369
379
  @skip_on_cudasim("Compilation unsupported in the simulator")
380
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
370
381
  def test_fp16_comparison_ptx(self):
371
382
  functions = (
372
383
  simple_fp16_gt,
@@ -390,16 +401,18 @@ class TestOperatorModule(CUDATestCase):
390
401
  "setp.lt.f16",
391
402
  "setp.le.f16",
392
403
  "setp.eq.f16",
393
- "setp.ne.f16",
404
+ "setp.neu.f16",
394
405
  )
395
406
  args = (b1[:], f2, f2)
396
407
 
397
408
  for fn, op, s in zip(functions, ops, opstring):
398
409
  with self.subTest(op=op):
399
- ptx, _ = compile_ptx(fn, args)
410
+ compiled = cuda.jit("void(b1[:], f2, f2)", lto=True)(fn)
411
+ ptx = compiled.inspect_lto_ptx(args)
400
412
  self.assertIn(s, ptx)
401
413
 
402
414
  @skip_on_cudasim("Compilation unsupported in the simulator")
415
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
403
416
  def test_fp16_int8_comparison_ptx(self):
404
417
  # Test that int8 can be safely converted to fp16
405
418
  # in a comparison
@@ -426,15 +439,17 @@ class TestOperatorModule(CUDATestCase):
426
439
  operator.lt: "setp.lt.f16",
427
440
  operator.le: "setp.le.f16",
428
441
  operator.eq: "setp.eq.f16",
429
- operator.ne: "setp.ne.f16",
442
+ operator.ne: "setp.neu.f16",
430
443
  }
431
444
  for fn, op in zip(functions, ops):
432
445
  with self.subTest(op=op):
433
446
  args = (b1[:], f2, from_dtype(np.int8))
434
- ptx, _ = compile_ptx(fn, args)
447
+ compiled = cuda.jit(signature(types.void, *args), lto=True)(fn)
448
+ ptx = compiled.inspect_lto_ptx(args)
435
449
  self.assertIn(opstring[op], ptx)
436
450
 
437
451
  @skip_on_cudasim("Compilation unsupported in the simulator")
452
+ @skip_if_nvjitlink_missing("Numbast generated bindings")
438
453
  def test_mixed_fp16_comparison_promotion_ptx(self):
439
454
  functions = (
440
455
  simple_fp16_gt,
@@ -475,7 +490,8 @@ class TestOperatorModule(CUDATestCase):
475
490
  with self.subTest(op=op, ty=ty):
476
491
  arg2_ty = np.result_type(np.float16, ty)
477
492
  args = (b1[:], f2, from_dtype(arg2_ty))
478
- ptx, _ = compile_ptx(fn, args)
493
+ compiled = cuda.jit(signature(types.void, *args), lto=True)(fn)
494
+ ptx = compiled.inspect_lto_ptx(args)
479
495
 
480
496
  ops = opstring[op] + opsuffix[arg2_ty]
481
497
  self.assertIn(ops, ptx)
@@ -4,7 +4,7 @@ from numba.cuda.testing import (
4
4
  unittest,
5
5
  CUDATestCase,
6
6
  )
7
- from numba import config, cuda
7
+ from numba import cuda
8
8
 
9
9
  # Basic tests that stream APIs execute on the hardware and in the simulator.
10
10
  #
@@ -38,10 +38,7 @@ class TestStreamAPI(CUDATestCase):
38
38
  # We don't test synchronization on the stream because it's not a real
39
39
  # stream - we used a dummy pointer for testing the API, so we just
40
40
  # ensure that the stream handle matches the external stream pointer.
41
- if config.CUDA_USE_NVIDIA_BINDING:
42
- value = int(s.handle)
43
- else:
44
- value = s.handle.value
41
+ value = s.handle.value
45
42
  self.assertEqual(ptr, value)
46
43
 
47
44
  @skip_unless_cudasim("External streams are usable with hardware")
@@ -17,16 +17,16 @@ def _make_ufunc_usecase(ufunc):
17
17
  return fn
18
18
 
19
19
 
20
- # This test would also be a CUDATestCase, but to avoid a confusing and
21
- # potentially dangerous inheritance diamond with setUp methods that modify
22
- # global state, we implement the necessary parts of CUDATestCase within this
23
- # class instead. These are:
20
+ # This class provides common functionality for UFunc tests. The UFunc tests
21
+ # are quite long-running in comparison to other tests, so we break the tests up
22
+ # into multiple test classes for distribution across workers.
24
23
  #
25
- # - Disable parallel testing with _numba_parallel_test_.
26
- # - Disabling CUDA performance warnings for the duration of tests.
27
- class TestUFuncs(BasicUFuncTest, TestCase):
28
- _numba_parallel_test_ = False
29
-
24
+ # This class would also be a CUDATestCase, but to avoid a confusing and
25
+ # potentially dangerous inheritance diamond with setUp methods that modify
26
+ # global state, we implement the necessary part of CUDATestCase within this
27
+ # class instead. This disables CUDA performance warnings for the duration of
28
+ # tests.
29
+ class CUDAUFuncTestBase(BasicUFuncTest, TestCase):
30
30
  def setUp(self):
31
31
  BasicUFuncTest.setUp(self)
32
32
 
@@ -146,6 +146,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
146
146
  ############################################################################
147
147
  # Trigonometric Functions
148
148
 
149
+
150
+ class TestBasicTrigUFuncs(CUDAUFuncTestBase):
149
151
  def test_sin_ufunc(self):
150
152
  self.basic_ufunc_test(np.sin, kinds="cf")
151
153
 
@@ -167,6 +169,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
167
169
  def test_arctan2_ufunc(self):
168
170
  self.basic_ufunc_test(np.arctan2, kinds="f")
169
171
 
172
+
173
+ class TestHypTrigUFuncs(CUDAUFuncTestBase):
170
174
  def test_hypot_ufunc(self):
171
175
  self.basic_ufunc_test(np.hypot, kinds="f")
172
176
 
@@ -207,6 +211,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
207
211
 
208
212
  self.basic_ufunc_test(np.arctanh, skip_inputs=to_skip, kinds="cf")
209
213
 
214
+
215
+ class TestConversionUFuncs(CUDAUFuncTestBase):
210
216
  def test_deg2rad_ufunc(self):
211
217
  self.basic_ufunc_test(np.deg2rad, kinds="f")
212
218
 
@@ -221,6 +227,9 @@ class TestUFuncs(BasicUFuncTest, TestCase):
221
227
 
222
228
  ############################################################################
223
229
  # Comparison functions
230
+
231
+
232
+ class TestComparisonUFuncs1(CUDAUFuncTestBase):
224
233
  def test_greater_ufunc(self):
225
234
  self.signed_unsigned_cmp_test(np.greater)
226
235
 
@@ -239,6 +248,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
239
248
  def test_equal_ufunc(self):
240
249
  self.signed_unsigned_cmp_test(np.equal)
241
250
 
251
+
252
+ class TestLogicalUFuncs(CUDAUFuncTestBase):
242
253
  def test_logical_and_ufunc(self):
243
254
  self.basic_ufunc_test(np.logical_and)
244
255
 
@@ -251,6 +262,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
251
262
  def test_logical_not_ufunc(self):
252
263
  self.basic_ufunc_test(np.logical_not)
253
264
 
265
+
266
+ class TestMinmaxUFuncs(CUDAUFuncTestBase):
254
267
  def test_maximum_ufunc(self):
255
268
  self.basic_ufunc_test(np.maximum)
256
269
 
@@ -263,6 +276,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
263
276
  def test_fmin_ufunc(self):
264
277
  self.basic_ufunc_test(np.fmin)
265
278
 
279
+
280
+ class TestBitwiseUFuncs(CUDAUFuncTestBase):
266
281
  def test_bitwise_and_ufunc(self):
267
282
  self.basic_int_ufunc_test(np.bitwise_and)
268
283
 
@@ -286,6 +301,8 @@ class TestUFuncs(BasicUFuncTest, TestCase):
286
301
  ############################################################################
287
302
  # Mathematical Functions
288
303
 
304
+
305
+ class TestLogUFuncs(CUDAUFuncTestBase):
289
306
  def test_log_ufunc(self):
290
307
  self.basic_ufunc_test(np.log, kinds="cf")
291
308
 
@@ -1,7 +1,12 @@
1
1
  import numpy as np
2
2
  from numba import cuda
3
- from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
4
- from numba.tests.support import linux_only, override_config
3
+ from numba.cuda.cudadrv import driver
4
+ from numba.cuda.testing import (
5
+ unittest,
6
+ CUDATestCase,
7
+ skip_on_cudasim,
8
+ )
9
+ from numba.tests.support import linux_only, override_config, run_in_subprocess
5
10
  from numba.core.errors import NumbaPerformanceWarning
6
11
  from numba.core import config
7
12
  import warnings
@@ -9,6 +14,26 @@ import warnings
9
14
 
10
15
  @skip_on_cudasim("cudasim does not raise performance warnings")
11
16
  class TestWarnings(CUDATestCase):
17
+ def test_float16_warn_if_lto_missing(self):
18
+ fp16_kernel_invocation = """
19
+ import math
20
+ from numba import cuda, core
21
+
22
+ @cuda.jit
23
+ def kernel():
24
+ x = core.types.float16(1.0)
25
+ y = math.sin(x)
26
+
27
+ kernel[1,1]()
28
+ kernel[1,1]()
29
+ """
30
+ performance_warning = "float16 relies on LTO for performance"
31
+ expected_warning_count = 0 if driver._have_nvjitlink() else 1
32
+ _, err = run_in_subprocess(fp16_kernel_invocation)
33
+ self.assertEqual(
34
+ err.decode().count(performance_warning), expected_warning_count
35
+ )
36
+
12
37
  def test_inefficient_launch_configuration(self):
13
38
  @cuda.jit
14
39
  def kernel():
@@ -0,0 +1,56 @@
1
+ from enum import Enum, IntEnum
2
+
3
+
4
+ class Color(Enum):
5
+ red = 1
6
+ green = 2
7
+ blue = 3
8
+
9
+
10
+ class Shake(Enum):
11
+ vanilla = 7
12
+ chocolate = 4
13
+ cookies = 9
14
+ # Same as Color.blue
15
+ mint = 3
16
+
17
+
18
+ class Planet(Enum):
19
+ MERCURY = (3.303e23, 2.4397e6)
20
+ VENUS = (4.869e24, 6.0518e6)
21
+ EARTH = (5.976e24, 6.37814e6)
22
+ MARS = (6.421e23, 3.3972e6)
23
+ JUPITER = (1.9e27, 7.1492e7)
24
+ SATURN = (5.688e26, 6.0268e7)
25
+ URANUS = (8.686e25, 2.5559e7)
26
+ NEPTUNE = (1.024e26, 2.4746e7)
27
+
28
+
29
+ class HeterogeneousEnum(Enum):
30
+ red = 1.0
31
+ green = 2.0
32
+ blue = 3j
33
+
34
+
35
+ class Shape(IntEnum):
36
+ # Same as Color.green
37
+ circle = 2
38
+ # Same as RequestError.internal_error
39
+ square = 500
40
+
41
+
42
+ class RequestError(IntEnum):
43
+ dummy = 2
44
+ not_found = 404
45
+ internal_error = 500
46
+
47
+
48
+ class IntEnumWithNegatives(IntEnum):
49
+ # Used for testing of hash, need to make sure -1 -> -2 to comply with CPy
50
+ one = 1
51
+ two = 2
52
+ too = 2
53
+ three = 3
54
+ negone = -1
55
+ negtwo = -2
56
+ negthree = -3
@@ -9,7 +9,6 @@ from numba.cuda.cudadrv import nvvm
9
9
  from numba.cuda.testing import (
10
10
  unittest,
11
11
  skip_on_cudasim,
12
- SerialMixin,
13
12
  skip_unless_conda_cudatoolkit,
14
13
  )
15
14
  from numba.cuda.cuda_paths import (
@@ -24,7 +23,7 @@ has_cuda = nvvm.is_available()
24
23
  has_mp_get_context = hasattr(mp, "get_context")
25
24
 
26
25
 
27
- class LibraryLookupBase(SerialMixin, unittest.TestCase):
26
+ class LibraryLookupBase(unittest.TestCase):
28
27
  def setUp(self):
29
28
  ctx = mp.get_context("spawn")
30
29
 
@@ -1,6 +1,6 @@
1
1
  from numba.cuda.cudadrv import nvvm
2
2
  from numba.cuda.testing import skip_on_cudasim
3
- from numba.core import utils
3
+ from numba.cuda import utils
4
4
 
5
5
  from llvmlite import ir
6
6
  from llvmlite import binding as llvm