numba-cuda 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +0 -8
  3. numba_cuda/numba/cuda/_internal/cuda_fp16.py +14225 -0
  4. numba_cuda/numba/cuda/api_util.py +6 -0
  5. numba_cuda/numba/cuda/cgutils.py +1291 -0
  6. numba_cuda/numba/cuda/codegen.py +32 -14
  7. numba_cuda/numba/cuda/compiler.py +113 -10
  8. numba_cuda/numba/cuda/core/caching.py +741 -0
  9. numba_cuda/numba/cuda/core/callconv.py +338 -0
  10. numba_cuda/numba/cuda/core/codegen.py +168 -0
  11. numba_cuda/numba/cuda/core/compiler.py +205 -0
  12. numba_cuda/numba/cuda/core/typed_passes.py +139 -0
  13. numba_cuda/numba/cuda/cuda_paths.py +1 -1
  14. numba_cuda/numba/cuda/cudadecl.py +0 -268
  15. numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
  16. numba_cuda/numba/cuda/cudadrv/devices.py +4 -6
  17. numba_cuda/numba/cuda/cudadrv/driver.py +105 -50
  18. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -1
  19. numba_cuda/numba/cuda/cudaimpl.py +4 -178
  20. numba_cuda/numba/cuda/debuginfo.py +469 -3
  21. numba_cuda/numba/cuda/device_init.py +0 -1
  22. numba_cuda/numba/cuda/dispatcher.py +311 -14
  23. numba_cuda/numba/cuda/extending.py +2 -1
  24. numba_cuda/numba/cuda/fp16.py +348 -0
  25. numba_cuda/numba/cuda/intrinsics.py +1 -1
  26. numba_cuda/numba/cuda/libdeviceimpl.py +2 -1
  27. numba_cuda/numba/cuda/lowering.py +1833 -8
  28. numba_cuda/numba/cuda/mathimpl.py +2 -90
  29. numba_cuda/numba/cuda/memory_management/nrt.py +1 -1
  30. numba_cuda/numba/cuda/nvvmutils.py +2 -1
  31. numba_cuda/numba/cuda/printimpl.py +2 -1
  32. numba_cuda/numba/cuda/serialize.py +264 -0
  33. numba_cuda/numba/cuda/simulator/__init__.py +2 -0
  34. numba_cuda/numba/cuda/simulator/dispatcher.py +7 -0
  35. numba_cuda/numba/cuda/stubs.py +0 -308
  36. numba_cuda/numba/cuda/target.py +13 -5
  37. numba_cuda/numba/cuda/testing.py +156 -5
  38. numba_cuda/numba/cuda/tests/complex_usecases.py +113 -0
  39. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +110 -0
  40. numba_cuda/numba/cuda/tests/core/test_serialize.py +359 -0
  41. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +16 -5
  42. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +5 -1
  43. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +33 -0
  44. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  45. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +2 -2
  46. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +1 -0
  47. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  48. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +5 -10
  49. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  50. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +1 -5
  51. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +381 -0
  52. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -1
  53. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
  54. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +94 -24
  55. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +37 -23
  56. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +43 -27
  57. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +2 -5
  58. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +26 -9
  59. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +27 -2
  60. numba_cuda/numba/cuda/tests/enum_usecases.py +56 -0
  61. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +1 -2
  62. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +1 -1
  63. numba_cuda/numba/cuda/utils.py +785 -0
  64. numba_cuda/numba/cuda/vector_types.py +1 -1
  65. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/METADATA +18 -4
  66. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/RECORD +69 -56
  67. numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -46
  68. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/WHEEL +0 -0
  69. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/licenses/LICENSE +0 -0
  70. {numba_cuda-0.16.0.dist-info → numba_cuda-0.18.0.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,6 @@ import numpy as np
2
2
 
3
3
  from numba import vectorize, guvectorize
4
4
  from numba import cuda
5
- from numba.cuda.cudadrv import driver
6
5
  from numba.cuda.testing import unittest, ContextResettingTestCase, ForeignArray
7
6
  from numba.cuda.testing import skip_on_cudasim, skip_if_external_memmgr
8
7
  from numba.tests.support import linux_only, override_config
@@ -32,10 +31,7 @@ class TestCudaArrayInterface(ContextResettingTestCase):
32
31
  self.assertPointersEqual(wrapped, d_arr)
33
32
 
34
33
  def get_stream_value(self, stream):
35
- if driver.USE_NV_BINDING:
36
- return int(stream.handle)
37
- else:
38
- return stream.handle.value
34
+ return stream.handle.value
39
35
 
40
36
  @skip_if_external_memmgr("Ownership not relevant with external memmgr")
41
37
  def test_ownership(self):
@@ -403,6 +403,387 @@ class TestCudaDebugInfo(CUDATestCase):
403
403
  match = re.compile(pat6).search(llvm_ir)
404
404
  self.assertIsNotNone(match, msg=llvm_ir)
405
405
 
406
+ def test_DW_LANG(self):
407
+ @cuda.jit(debug=True)
408
+ def foo():
409
+ """
410
+ CHECK: distinct !DICompileUnit
411
+ CHECK-SAME: emissionKind: FullDebug
412
+ CHECK-SAME: isOptimized: true
413
+ CHECK-SAME: language: DW_LANG_C_plus_plus
414
+ CHECK-SAME: producer: "clang (Numba)"
415
+ """
416
+ pass
417
+
418
+ foo[1, 1]()
419
+
420
+ llvm_ir = foo.inspect_llvm()[tuple()]
421
+ self.assertFileCheckMatches(llvm_ir, foo.__doc__)
422
+
423
+ def test_DILocation(self):
424
+ """Tests that DILocation information is reasonable.
425
+
426
+ The kernel `foo` produces LLVM like:
427
+ define function() {
428
+ entry:
429
+ alloca
430
+ store 0 to alloca
431
+ <arithmetic for doing the operations on b, c, d>
432
+ setup for print
433
+ branch
434
+ other_labels:
435
+ ... <elided>
436
+ }
437
+
438
+ The following checks that:
439
+ * the alloca and store have no !dbg
440
+ * the arithmetic occurs in the order defined and with !dbg
441
+ * that the !dbg entries are monotonically increasing in value with
442
+ source line number
443
+ """
444
+ sig = (types.float64,)
445
+
446
+ @cuda.jit(sig, debug=True)
447
+ def foo(a):
448
+ """
449
+ CHECK-LABEL: define void @{{.+}}foo
450
+ CHECK: entry:
451
+
452
+ CHECK: %[[VAL_0:.*]] = alloca double
453
+ CHECK-NOT: !dbg
454
+ CHECK: store double 0.0, double* %[[VAL_0]]
455
+ CHECK-NOT: !dbg
456
+ CHECK: %[[VAL_1:.*]] = alloca double
457
+ CHECK-NOT: !dbg
458
+ CHECK: store double 0.0, double* %[[VAL_1]]
459
+ CHECK-NOT: !dbg
460
+ CHECK: %[[VAL_2:.*]] = alloca double
461
+ CHECK-NOT: !dbg
462
+ CHECK: store double 0.0, double* %[[VAL_2]]
463
+ CHECK-NOT: !dbg
464
+ CHECK: %[[VAL_3:.*]] = alloca double
465
+ CHECK-NOT: !dbg
466
+ CHECK: store double 0.0, double* %[[VAL_3]]
467
+ CHECK-NOT: !dbg
468
+ CHECK: %[[VAL_4:.*]] = alloca double
469
+ CHECK-NOT: !dbg
470
+ CHECK: store double 0.0, double* %[[VAL_4]]
471
+ CHECK-NOT: !dbg
472
+ CHECK: %[[VAL_5:.*]] = alloca double
473
+ CHECK-NOT: !dbg
474
+ CHECK: store double 0.0, double* %[[VAL_5]]
475
+ CHECK-NOT: !dbg
476
+ CHECK: %[[VAL_6:.*]] = alloca i8*
477
+ CHECK-NOT: !dbg
478
+ CHECK: store i8* null, i8** %[[VAL_6]]
479
+ CHECK-NOT: !dbg
480
+ CHECK: %[[VAL_7:.*]] = alloca i8*
481
+ CHECK-NOT: !dbg
482
+ CHECK: store i8* null, i8** %[[VAL_7]]
483
+ CHECK-NOT: !dbg
484
+
485
+ CHECK: br label %"[[ENTRY:.+]]"
486
+ CHECK-NOT: !dbg
487
+ CHECK: [[ENTRY]]:
488
+
489
+ CHECK: fadd{{.+}} !dbg ![[DBGADD:[0-9]+]]
490
+ CHECK: fmul{{.+}} !dbg ![[DBGMUL:[0-9]+]]
491
+ CHECK: fdiv{{.+}} !dbg ![[DBGDIV:[0-9]+]]
492
+
493
+ CHECK: ![[DBGADD]] = !DILocation
494
+ CHECK: ![[DBGMUL]] = !DILocation
495
+ CHECK: ![[DBGDIV]] = !DILocation
496
+ """
497
+ b = a + 1.23
498
+ c = b * 2.34
499
+ a = b / c
500
+
501
+ ir = foo.inspect_llvm()[sig]
502
+ self.assertFileCheckMatches(ir, foo.__doc__)
503
+
504
+ def test_DITypes(self):
505
+ """Tests that DITypes are emitted for the types used in the kernel."""
506
+ sig = (
507
+ types.float32,
508
+ types.float64,
509
+ types.int8,
510
+ types.int16,
511
+ types.int32,
512
+ types.int64,
513
+ types.uint8,
514
+ types.uint16,
515
+ types.uint32,
516
+ types.uint64,
517
+ types.complex64,
518
+ types.complex128,
519
+ )
520
+
521
+ @cuda.jit(sig, debug=True)
522
+ def foo(a, b, c, d, e, f, g, h, i, j, k, l):
523
+ """
524
+ CHECK: [[DBG1:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
525
+ CHECK: [[DBG2:.+]] = !DIDerivedType(
526
+ CHECK-SAME: baseType: [[DBG1]]
527
+ CHECK-SAME: size: 64
528
+ CHECK-SAME: tag: DW_TAG_pointer_type
529
+ CHECK: [[DBG3:.+]] = !DIDerivedType(
530
+ CHECK-SAME: baseType: [[DBG2]]
531
+ CHECK-SAME: size: 64
532
+ CHECK-SAME: tag: DW_TAG_pointer_type
533
+ CHECK: [[DBG4:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
534
+ CHECK: [[DBG5:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float64", size: 64)
535
+ CHECK: [[DBG6:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int8", size: 8)
536
+ CHECK: [[DBG7:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int16", size: 16)
537
+ CHECK: [[DBG8:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int32", size: 32)
538
+ CHECK: [[DBG9:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
539
+ CHECK: [[DBG10:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint8", size: 8)
540
+ CHECK: [[DBG11:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint16", size: 16)
541
+ CHECK: [[DBG12:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint32", size: 32)
542
+ CHECK: [[DBG13:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint64", size: 64)
543
+ CHECK: [[DBG14:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
544
+ CHECK: [[DBG15:.+]] = !DIDerivedType(
545
+ CHECK-SAME: baseType: [[DBG14]]
546
+ CHECK-SAME: name: "real"
547
+ CHECK-SAME: offset: 0
548
+ CHECK-SAME: size: 32
549
+ CHECK-SAME: tag: DW_TAG_member
550
+ CHECK: [[DBG16:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
551
+ CHECK: [[DBG17:.+]] = !DIDerivedType(
552
+ CHECK-SAME: baseType: [[DBG16]]
553
+ CHECK-SAME: name: "imag"
554
+ CHECK-SAME: offset: 32
555
+ CHECK-SAME: size: 32
556
+ CHECK-SAME: tag: DW_TAG_member
557
+ CHECK: [[DBG18:.+]] = !{ [[DBG15]], [[DBG17]] }
558
+ CHECK: [[DBG19:.+]] = distinct !DICompositeType(
559
+ CHECK-SAME: elements: [[DBG18]]
560
+ CHECK-SAME: name: "complex64 ({float, float})"
561
+ CHECK-SAME: size: 64
562
+ CHECK-SAME: tag: DW_TAG_structure_type
563
+ CHECK: [[DBG20:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
564
+ CHECK: [[DBG21:.+]] = !DIDerivedType(
565
+ CHECK-SAME: baseType: [[DBG20]]
566
+ CHECK-SAME: name: "real"
567
+ CHECK-SAME: offset: 0
568
+ CHECK-SAME: size: 64
569
+ CHECK-SAME: tag: DW_TAG_member
570
+ CHECK: [[DBG22:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
571
+ CHECK: [[DBG23:.+]] = !DIDerivedType(
572
+ CHECK-SAME: baseType: [[DBG22]]
573
+ CHECK-SAME: name: "imag"
574
+ CHECK-SAME: offset: 64
575
+ CHECK-SAME: size: 64
576
+ CHECK-SAME: tag: DW_TAG_member
577
+ CHECK: [[DBG24:.+]] = !{ [[DBG21]], [[DBG23]] }
578
+ CHECK: [[DBG25:.+]] = distinct !DICompositeType(
579
+ CHECK-SAME: elements: [[DBG24]]
580
+ CHECK-SAME: name: "complex128 ({double, double})"
581
+ CHECK-SAME: size: 128
582
+ CHECK-SAME: tag: DW_TAG_structure_type
583
+ CHECK: [[DBG32:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
584
+ CHECK: [[DBG33:.+]] = !DILocalVariable(
585
+ CHECK-SAME: name: "a"
586
+ CHECK-SAME: type: [[DBG32]]
587
+ CHECK: [[DBG34:.+]] = !DIExpression()
588
+ CHECK: [[DBG35:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float64", size: 64)
589
+ CHECK: [[DBG36:.+]] = !DILocalVariable(
590
+ CHECK-SAME: name: "b"
591
+ CHECK-SAME: type: [[DBG35]]
592
+ CHECK: [[DBG37:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int8", size: 8)
593
+ CHECK: [[DBG38:.+]] = !DILocalVariable(
594
+ CHECK-SAME: name: "c"
595
+ CHECK-SAME: type: [[DBG37]]
596
+ CHECK: [[DBG39:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int16", size: 16)
597
+ CHECK: [[DBG40:.+]] = !DILocalVariable(
598
+ CHECK-SAME: name: "d"
599
+ CHECK-SAME: type: [[DBG39]]
600
+ CHECK: [[DBG41:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int32", size: 32)
601
+ CHECK: [[DBG42:.+]] = !DILocalVariable(
602
+ CHECK-SAME: name: "e"
603
+ CHECK-SAME: type: [[DBG41]]
604
+ CHECK: [[DBG43:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
605
+ CHECK: [[DBG44:.+]] = !DILocalVariable(
606
+ CHECK-SAME: name: "f"
607
+ CHECK-SAME: type: [[DBG43]]
608
+ CHECK: [[DBG45:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint8", size: 8)
609
+ CHECK: [[DBG46:.+]] = !DILocalVariable(
610
+ CHECK-SAME: name: "g"
611
+ CHECK-SAME: type: [[DBG45]]
612
+ CHECK: [[DBG47:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint16", size: 16)
613
+ CHECK: [[DBG48:.+]] = !DILocalVariable(
614
+ CHECK-SAME: name: "h"
615
+ CHECK-SAME: type: [[DBG47]]
616
+ CHECK: [[DBG49:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint32", size: 32)
617
+ CHECK: [[DBG50:.+]] = !DILocalVariable(
618
+ CHECK-SAME: name: "i"
619
+ CHECK-SAME: type: [[DBG49]]
620
+ CHECK: [[DBG51:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint64", size: 64)
621
+ CHECK: [[DBG52:.+]] = !DILocalVariable(
622
+ CHECK-SAME: name: "j"
623
+ CHECK-SAME: type: [[DBG51]]
624
+ CHECK: [[DBG53:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
625
+ CHECK: [[DBG54:.+]] = !DIDerivedType(
626
+ CHECK-SAME: baseType: [[DBG53]]
627
+ CHECK-SAME: name: "real"
628
+ CHECK-SAME: offset: 0
629
+ CHECK-SAME: size: 32
630
+ CHECK-SAME: tag: DW_TAG_member
631
+ CHECK: [[DBG55:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
632
+ CHECK: [[DBG56:.+]] = !DIDerivedType(
633
+ CHECK-SAME: baseType: [[DBG55]]
634
+ CHECK-SAME: name: "imag"
635
+ CHECK-SAME: offset: 32
636
+ CHECK-SAME: size: 32
637
+ CHECK-SAME: tag: DW_TAG_member
638
+ CHECK: [[DBG57:.+]] = !{ [[DBG54]], [[DBG56]] }
639
+ CHECK: [[DBG58:.+]] = distinct !DICompositeType(
640
+ CHECK-SAME: elements: [[DBG57]]
641
+ CHECK-SAME: name: "complex64 ({float, float})"
642
+ CHECK-SAME: size: 64
643
+ CHECK-SAME: tag: DW_TAG_structure_type
644
+ CHECK: [[DBG59:.+]] = !DILocalVariable(
645
+ CHECK-SAME: name: "k"
646
+ CHECK-SAME: type: [[DBG58]]
647
+ CHECK: [[DBG60:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
648
+ CHECK: [[DBG61:.+]] = !DIDerivedType(
649
+ CHECK-SAME: baseType: [[DBG60]]
650
+ CHECK-SAME: name: "real"
651
+ CHECK-SAME: offset: 0
652
+ CHECK-SAME: size: 64
653
+ CHECK-SAME: tag: DW_TAG_member
654
+ CHECK: [[DBG62:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
655
+ CHECK: [[DBG63:.+]] = !DIDerivedType(
656
+ CHECK-SAME: baseType: [[DBG62]]
657
+ CHECK-SAME: name: "imag"
658
+ CHECK-SAME: offset: 64
659
+ CHECK-SAME: size: 64
660
+ CHECK-SAME: tag: DW_TAG_member
661
+ CHECK: [[DBG64:.+]] = !{ [[DBG61]], [[DBG63]] }
662
+ CHECK: [[DBG65:.+]] = distinct !DICompositeType(
663
+ CHECK-SAME: elements: [[DBG64]]
664
+ CHECK-SAME: name: "complex128 ({double, double})"
665
+ CHECK-SAME: size: 128
666
+ CHECK-SAME: tag: DW_TAG_structure_type
667
+ CHECK: [[DBG66:.+]] = !DILocalVariable(
668
+ CHECK-SAME: name: "l"
669
+ CHECK-SAME: type: [[DBG65]]
670
+ """
671
+ pass
672
+
673
+ ir = foo.inspect_llvm()[sig]
674
+ self.assertFileCheckMatches(ir, foo.__doc__)
675
+
676
+ def test_arrays(self):
677
+ """Tests that arrays are emitted as DIBasicType."""
678
+ sig = (types.float32[::1],)
679
+
680
+ @cuda.jit(sig, debug=True)
681
+ def foo(a):
682
+ """
683
+ CHECK: distinct !DICompileUnit
684
+ CHECK: distinct !DISubprogram
685
+ CHECK: [[DBG127:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
686
+ CHECK: [[DBG128:.+]] = !DIDerivedType(
687
+ CHECK-SAME: baseType: [[DBG127]]
688
+ CHECK-SAME: size: 64
689
+ CHECK-SAME: tag: DW_TAG_pointer_type
690
+ CHECK-SAME: )
691
+ CHECK: [[DBG129:.+]] = !DIDerivedType(
692
+ CHECK-SAME: baseType: [[DBG128]]
693
+ CHECK-SAME: name: "meminfo"
694
+ CHECK-SAME: offset: 0
695
+ CHECK-SAME: size: 64
696
+ CHECK-SAME: tag: DW_TAG_member
697
+ CHECK-SAME: )
698
+ CHECK: [[DBG130:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
699
+ CHECK: [[DBG131:.+]] = !DIDerivedType(
700
+ CHECK-SAME: baseType: [[DBG130]]
701
+ CHECK-SAME: size: 64
702
+ CHECK-SAME: tag: DW_TAG_pointer_type
703
+ CHECK-SAME: )
704
+ CHECK: [[DBG132:.+]] = !DIDerivedType(
705
+ CHECK-SAME: baseType: [[DBG131]]
706
+ CHECK-SAME: name: "parent"
707
+ CHECK-SAME: offset: 64
708
+ CHECK-SAME: size: 64
709
+ CHECK-SAME: tag: DW_TAG_member
710
+ CHECK-SAME: )
711
+ CHECK: [[DBG133:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
712
+ CHECK: [[DBG134:.+]] = !DIDerivedType(
713
+ CHECK-SAME: baseType: [[DBG133]]
714
+ CHECK-SAME: name: "nitems"
715
+ CHECK-SAME: offset: 128
716
+ CHECK-SAME: size: 64
717
+ CHECK-SAME: tag: DW_TAG_member
718
+ CHECK-SAME: )
719
+ CHECK: [[DBG135:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
720
+ CHECK: [[DBG136:.+]] = !DIDerivedType(
721
+ CHECK-SAME: baseType: [[DBG135]]
722
+ CHECK-SAME: name: "itemsize"
723
+ CHECK-SAME: offset: 192
724
+ CHECK-SAME: size: 64
725
+ CHECK-SAME: tag: DW_TAG_member
726
+ CHECK-SAME: )
727
+ CHECK: [[DBG137:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
728
+ CHECK: [[DBG138:.+]] = !DIDerivedType(
729
+ CHECK-SAME: baseType: [[DBG137]]
730
+ CHECK-SAME: size: 64
731
+ CHECK-SAME: tag: DW_TAG_pointer_type
732
+ CHECK-SAME: )
733
+ CHECK: [[DBG139:.+]] = !DIDerivedType(
734
+ CHECK-SAME: baseType: [[DBG138]]
735
+ CHECK-SAME: name: "data"
736
+ CHECK-SAME: offset: 256
737
+ CHECK-SAME: size: 64
738
+ CHECK-SAME: tag: DW_TAG_member
739
+ CHECK-SAME: )
740
+ CHECK: [[DBG140:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "i64", size: 64)
741
+ CHECK: [[DBG141:.+]] = !DICompositeType(
742
+ CHECK-SAME: baseType: [[DBG140]]
743
+ CHECK-SAME: identifier: "[1 x i64]"
744
+ CHECK-SAME: name: "UniTuple(int64 x 1) ([1 x i64])"
745
+ CHECK-SAME: tag: DW_TAG_array_type
746
+ CHECK-SAME: )
747
+ CHECK: [[DBG142:.+]] = !DIDerivedType(
748
+ CHECK-SAME: baseType: [[DBG141]]
749
+ CHECK-SAME: name: "shape"
750
+ CHECK-SAME: offset: 320
751
+ CHECK-SAME: size: 64
752
+ CHECK-SAME: tag: DW_TAG_member
753
+ CHECK-SAME: )
754
+ CHECK: [[DBG143:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "i64", size: 64)
755
+ CHECK: [[DBG144:.+]] = !DICompositeType(
756
+ CHECK-SAME: baseType: [[DBG143]]
757
+ CHECK-SAME: identifier: "[1 x i64]"
758
+ CHECK-SAME: name: "UniTuple(int64 x 1) ([1 x i64])"
759
+ CHECK-SAME: size: 64
760
+ CHECK-SAME: tag: DW_TAG_array_type
761
+ CHECK-SAME: )
762
+ CHECK: [[DBG145:.+]] = !DIDerivedType(
763
+ CHECK-SAME: baseType: [[DBG144]]
764
+ CHECK-SAME: name: "strides"
765
+ CHECK-SAME: offset: 384
766
+ CHECK-SAME: size: 64
767
+ CHECK-SAME: tag: DW_TAG_member
768
+ CHECK-SAME: )
769
+ CHECK: [[DBG146:.+]] = !{ [[DBG129]], [[DBG132]], [[DBG134]], [[DBG136]], [[DBG139]], [[DBG142]], [[DBG145]] }
770
+ CHECK: [[DBG147:.+]] = distinct !DICompositeType(
771
+ CHECK-SAME: elements: [[DBG146]]
772
+ CHECK-SAME: identifier: "{i8*, i8*, i64, i64, float*, [1 x i64], [1 x i64]}"
773
+ CHECK-SAME: name: "array(float32, 1d, C) ({i8*, i8*, i64, i64, float*, [1 x i64], [1 x i64]})"
774
+ CHECK-SAME: size: 448
775
+ CHECK-SAME: tag: DW_TAG_structure_type
776
+ CHECK-SAME: )
777
+ CHECK: !DILocalVariable(
778
+ CHECK-SAME: name: "a"
779
+ CHECK-SAME: type: [[DBG147]]
780
+ CHECK-SAME: )
781
+ """
782
+ pass
783
+
784
+ ir = foo.inspect_llvm()[sig]
785
+ self.assertFileCheckMatches(ir, foo.__doc__)
786
+
406
787
 
407
788
  if __name__ == "__main__":
408
789
  unittest.main()
@@ -8,7 +8,7 @@ from numba import int16, int32
8
8
  from numba import cuda, vectorize, njit
9
9
  from numba.core import types
10
10
  from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
11
- from numba.tests.enum_usecases import (
11
+ from numba.cuda.tests.enum_usecases import (
12
12
  Color,
13
13
  Shape,
14
14
  Planet,
@@ -36,7 +36,7 @@ def sum_intervals(i, j):
36
36
 
37
37
 
38
38
  if not config.ENABLE_CUDASIM:
39
- from numba.core import cgutils
39
+ from numba.cuda import cgutils
40
40
  from numba.core.extending import (
41
41
  lower_builtin,
42
42
  models,
@@ -1,12 +1,18 @@
1
+ import re
2
+ import cffi
3
+
1
4
  import numpy as np
2
5
 
3
6
  from io import StringIO
4
7
  from numba import cuda, float32, float64, int32, intp
8
+ from numba.types import float16, CPointer
9
+ from numba.cuda import declare_device
5
10
  from numba.cuda.testing import unittest, CUDATestCase
6
11
  from numba.cuda.testing import (
7
12
  skip_on_cudasim,
8
13
  skip_with_nvdisasm,
9
14
  skip_without_nvdisasm,
15
+ skip_if_nvjitlink_missing,
10
16
  )
11
17
 
12
18
 
@@ -21,6 +27,17 @@ class TestInspect(CUDATestCase):
21
27
 
22
28
  @cuda.jit(sig)
23
29
  def foo(x, y):
30
+ """
31
+ // LLVM: define void
32
+ // LLVM-SAME: foo
33
+ // LLVM-LABEL: entry:
34
+ // LLVM-NEXT: br label %"[[VAL_0:.*]]"
35
+ // LLVM-NEXT: [[VAL_0]]:
36
+ // LLVM-NEXT: ret void
37
+
38
+ // ASM: Generated by NVIDIA NVVM Compiler
39
+ // ASM: foo
40
+ """
24
41
  pass
25
42
 
26
43
  file = StringIO()
@@ -31,28 +48,43 @@ class TestInspect(CUDATestCase):
31
48
  # Signature in annotation
32
49
  self.assertIn("(float32, int32)", typeanno)
33
50
  file.close()
34
- # Function name in LLVM
35
- llvm = foo.inspect_llvm(sig)
36
- self.assertIn("foo", llvm)
37
-
38
- # Kernel in LLVM
39
- self.assertIn("define void @", llvm)
40
-
41
- asm = foo.inspect_asm(sig)
42
51
 
43
- # Function name in PTX
44
- self.assertIn("foo", asm)
45
- # NVVM inserted comments in PTX
46
- self.assertIn("Generated by NVIDIA NVVM Compiler", asm)
52
+ self.assertFileCheckLLVM(foo, sig)
53
+ self.assertFileCheckAsm(foo, sig)
47
54
 
48
55
  def test_polytyped(self):
49
56
  @cuda.jit
50
57
  def foo(x, y):
58
+ """
59
+ // LLVM: define void
60
+ // LLVM-SAME: foo
61
+ // LLVM_INT-SAME: i64
62
+ // LLVM_INT-SAME: i64
63
+ // LLVM_FLOAT-SAME: double
64
+ // LLVM_FLOAT-SAME: double
65
+
66
+ // ASM: Generated by NVIDIA NVVM Compiler
67
+ // ASM: .visible
68
+ // ASM-SAME: .entry
69
+ // ASM-SAME: foo
70
+ """
51
71
  pass
52
72
 
53
73
  foo[1, 1](1, 1)
54
74
  foo[1, 1](1.2, 2.4)
55
75
 
76
+ int_sig = (intp, intp)
77
+ float_sig = (float64, float64)
78
+
79
+ self.assertFileCheckLLVM(
80
+ foo, int_sig, check_prefixes=["LLVM", "LLVM_INT"]
81
+ )
82
+ self.assertFileCheckAsm(foo, int_sig, check_prefixes=["ASM"])
83
+ self.assertFileCheckLLVM(
84
+ foo, float_sig, check_prefixes=["LLVM", "LLVM_FLOAT"]
85
+ )
86
+ self.assertFileCheckAsm(foo, float_sig, check_prefixes=["ASM"])
87
+
56
88
  file = StringIO()
57
89
  foo.inspect_types(file=file)
58
90
  typeanno = file.getvalue()
@@ -70,14 +102,6 @@ class TestInspect(CUDATestCase):
70
102
  self.assertIn((intp, intp), llvmirs)
71
103
  self.assertIn((float64, float64), llvmirs)
72
104
 
73
- # Function name in LLVM
74
- self.assertIn("foo", llvmirs[intp, intp])
75
- self.assertIn("foo", llvmirs[float64, float64])
76
-
77
- # Kernels in LLVM
78
- self.assertIn("define void @", llvmirs[intp, intp])
79
- self.assertIn("define void @", llvmirs[float64, float64])
80
-
81
105
  asmdict = foo.inspect_asm()
82
106
 
83
107
  # Signature in assembly dict
@@ -88,10 +112,6 @@ class TestInspect(CUDATestCase):
88
112
  self.assertIn((intp, intp), asmdict)
89
113
  self.assertIn((float64, float64), asmdict)
90
114
 
91
- # NVVM inserted in PTX
92
- self.assertIn("foo", asmdict[intp, intp])
93
- self.assertIn("foo", asmdict[float64, float64])
94
-
95
115
  def _test_inspect_sass(self, kernel, name, sass):
96
116
  # Ensure function appears in output
97
117
  seen_function = False
@@ -108,6 +128,56 @@ class TestInspect(CUDATestCase):
108
128
  self.assertIn("BRA", sass) # Branch
109
129
  self.assertIn("EXIT", sass) # Exit program
110
130
 
131
+ @skip_on_cudasim("Simulator does not generate code to be inspected")
132
+ @skip_if_nvjitlink_missing("nvJitLink is required for LTO")
133
+ def test_inspect_lto_asm(self):
134
+ ffi = cffi.FFI()
135
+
136
+ ext = cuda.CUSource("""
137
+ #include <cuda_fp16.h>
138
+ extern "C"
139
+ __device__ int add_f2_f2(__half * res, __half * a, __half *b) {
140
+ *res = *a + *b;
141
+ return 0;
142
+ }
143
+ """)
144
+
145
+ add = declare_device(
146
+ "add_f2_f2",
147
+ float16(CPointer(float16), CPointer(float16)),
148
+ link=ext,
149
+ )
150
+
151
+ @cuda.jit
152
+ def k(arr):
153
+ local_arr = cuda.local.array(shape=1, dtype=np.float16)
154
+ local_arr2 = cuda.local.array(shape=1, dtype=np.float16)
155
+ local_arr[0] = 1
156
+ local_arr2[0] = 2
157
+
158
+ ptr = ffi.from_buffer(local_arr)
159
+ ptr2 = ffi.from_buffer(local_arr2)
160
+
161
+ arr[0] = add(ptr, ptr2)
162
+
163
+ arr = np.array([0], dtype=np.float16)
164
+
165
+ k[1, 1](arr)
166
+
167
+ allasms = k.inspect_asm()
168
+ asm = next(iter(allasms.values()))
169
+
170
+ regex = re.compile(r"call(.|\n)*add_f2_f2")
171
+ self.assertRegex(asm, regex)
172
+
173
+ all_ext_asms = k.inspect_lto_ptx()
174
+ lto_asm = next(iter(all_ext_asms.values()))
175
+
176
+ self.assertIn("add.f16", lto_asm)
177
+ self.assertNotIn("call", lto_asm)
178
+
179
+ np.testing.assert_equal(arr[0], np.float16(1) + np.float16(2))
180
+
111
181
  @skip_without_nvdisasm("nvdisasm needed for inspect_sass()")
112
182
  def test_inspect_sass_eager(self):
113
183
  sig = (float32[::1], int32[::1])