numba-cuda 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +0 -8
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +14225 -0
- numba_cuda/numba/cuda/api_util.py +6 -0
- numba_cuda/numba/cuda/cgutils.py +1291 -0
- numba_cuda/numba/cuda/codegen.py +32 -14
- numba_cuda/numba/cuda/compiler.py +113 -10
- numba_cuda/numba/cuda/core/caching.py +741 -0
- numba_cuda/numba/cuda/core/callconv.py +338 -0
- numba_cuda/numba/cuda/core/codegen.py +168 -0
- numba_cuda/numba/cuda/core/compiler.py +205 -0
- numba_cuda/numba/cuda/core/typed_passes.py +139 -0
- numba_cuda/numba/cuda/cudadecl.py +0 -268
- numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +2 -1
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +4 -178
- numba_cuda/numba/cuda/debuginfo.py +469 -3
- numba_cuda/numba/cuda/device_init.py +0 -1
- numba_cuda/numba/cuda/dispatcher.py +309 -11
- numba_cuda/numba/cuda/extending.py +2 -1
- numba_cuda/numba/cuda/fp16.py +348 -0
- numba_cuda/numba/cuda/intrinsics.py +1 -1
- numba_cuda/numba/cuda/libdeviceimpl.py +2 -1
- numba_cuda/numba/cuda/lowering.py +1833 -8
- numba_cuda/numba/cuda/mathimpl.py +2 -90
- numba_cuda/numba/cuda/nvvmutils.py +2 -1
- numba_cuda/numba/cuda/printimpl.py +2 -1
- numba_cuda/numba/cuda/serialize.py +264 -0
- numba_cuda/numba/cuda/simulator/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +7 -0
- numba_cuda/numba/cuda/stubs.py +0 -308
- numba_cuda/numba/cuda/target.py +13 -5
- numba_cuda/numba/cuda/testing.py +156 -5
- numba_cuda/numba/cuda/tests/complex_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +110 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +359 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +33 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +5 -10
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +381 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +94 -24
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +37 -23
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +43 -27
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +26 -9
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +27 -2
- numba_cuda/numba/cuda/tests/enum_usecases.py +56 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +1 -2
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +1 -1
- numba_cuda/numba/cuda/utils.py +785 -0
- numba_cuda/numba/cuda/vector_types.py +1 -1
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/METADATA +18 -4
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/RECORD +61 -48
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -46
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/top_level.txt +0 -0
|
@@ -403,6 +403,387 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
|
403
403
|
match = re.compile(pat6).search(llvm_ir)
|
|
404
404
|
self.assertIsNotNone(match, msg=llvm_ir)
|
|
405
405
|
|
|
406
|
+
def test_DW_LANG(self):
|
|
407
|
+
@cuda.jit(debug=True)
|
|
408
|
+
def foo():
|
|
409
|
+
"""
|
|
410
|
+
CHECK: distinct !DICompileUnit
|
|
411
|
+
CHECK-SAME: emissionKind: FullDebug
|
|
412
|
+
CHECK-SAME: isOptimized: true
|
|
413
|
+
CHECK-SAME: language: DW_LANG_C_plus_plus
|
|
414
|
+
CHECK-SAME: producer: "clang (Numba)"
|
|
415
|
+
"""
|
|
416
|
+
pass
|
|
417
|
+
|
|
418
|
+
foo[1, 1]()
|
|
419
|
+
|
|
420
|
+
llvm_ir = foo.inspect_llvm()[tuple()]
|
|
421
|
+
self.assertFileCheckMatches(llvm_ir, foo.__doc__)
|
|
422
|
+
|
|
423
|
+
def test_DILocation(self):
|
|
424
|
+
"""Tests that DILocation information is reasonable.
|
|
425
|
+
|
|
426
|
+
The kernel `foo` produces LLVM like:
|
|
427
|
+
define function() {
|
|
428
|
+
entry:
|
|
429
|
+
alloca
|
|
430
|
+
store 0 to alloca
|
|
431
|
+
<arithmetic for doing the operations on b, c, d>
|
|
432
|
+
setup for print
|
|
433
|
+
branch
|
|
434
|
+
other_labels:
|
|
435
|
+
... <elided>
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
The following checks that:
|
|
439
|
+
* the alloca and store have no !dbg
|
|
440
|
+
* the arithmetic occurs in the order defined and with !dbg
|
|
441
|
+
* that the !dbg entries are monotonically increasing in value with
|
|
442
|
+
source line number
|
|
443
|
+
"""
|
|
444
|
+
sig = (types.float64,)
|
|
445
|
+
|
|
446
|
+
@cuda.jit(sig, debug=True)
|
|
447
|
+
def foo(a):
|
|
448
|
+
"""
|
|
449
|
+
CHECK-LABEL: define void @{{.+}}foo
|
|
450
|
+
CHECK: entry:
|
|
451
|
+
|
|
452
|
+
CHECK: %[[VAL_0:.*]] = alloca double
|
|
453
|
+
CHECK-NOT: !dbg
|
|
454
|
+
CHECK: store double 0.0, double* %[[VAL_0]]
|
|
455
|
+
CHECK-NOT: !dbg
|
|
456
|
+
CHECK: %[[VAL_1:.*]] = alloca double
|
|
457
|
+
CHECK-NOT: !dbg
|
|
458
|
+
CHECK: store double 0.0, double* %[[VAL_1]]
|
|
459
|
+
CHECK-NOT: !dbg
|
|
460
|
+
CHECK: %[[VAL_2:.*]] = alloca double
|
|
461
|
+
CHECK-NOT: !dbg
|
|
462
|
+
CHECK: store double 0.0, double* %[[VAL_2]]
|
|
463
|
+
CHECK-NOT: !dbg
|
|
464
|
+
CHECK: %[[VAL_3:.*]] = alloca double
|
|
465
|
+
CHECK-NOT: !dbg
|
|
466
|
+
CHECK: store double 0.0, double* %[[VAL_3]]
|
|
467
|
+
CHECK-NOT: !dbg
|
|
468
|
+
CHECK: %[[VAL_4:.*]] = alloca double
|
|
469
|
+
CHECK-NOT: !dbg
|
|
470
|
+
CHECK: store double 0.0, double* %[[VAL_4]]
|
|
471
|
+
CHECK-NOT: !dbg
|
|
472
|
+
CHECK: %[[VAL_5:.*]] = alloca double
|
|
473
|
+
CHECK-NOT: !dbg
|
|
474
|
+
CHECK: store double 0.0, double* %[[VAL_5]]
|
|
475
|
+
CHECK-NOT: !dbg
|
|
476
|
+
CHECK: %[[VAL_6:.*]] = alloca i8*
|
|
477
|
+
CHECK-NOT: !dbg
|
|
478
|
+
CHECK: store i8* null, i8** %[[VAL_6]]
|
|
479
|
+
CHECK-NOT: !dbg
|
|
480
|
+
CHECK: %[[VAL_7:.*]] = alloca i8*
|
|
481
|
+
CHECK-NOT: !dbg
|
|
482
|
+
CHECK: store i8* null, i8** %[[VAL_7]]
|
|
483
|
+
CHECK-NOT: !dbg
|
|
484
|
+
|
|
485
|
+
CHECK: br label %"[[ENTRY:.+]]"
|
|
486
|
+
CHECK-NOT: !dbg
|
|
487
|
+
CHECK: [[ENTRY]]:
|
|
488
|
+
|
|
489
|
+
CHECK: fadd{{.+}} !dbg ![[DBGADD:[0-9]+]]
|
|
490
|
+
CHECK: fmul{{.+}} !dbg ![[DBGMUL:[0-9]+]]
|
|
491
|
+
CHECK: fdiv{{.+}} !dbg ![[DBGDIV:[0-9]+]]
|
|
492
|
+
|
|
493
|
+
CHECK: ![[DBGADD]] = !DILocation
|
|
494
|
+
CHECK: ![[DBGMUL]] = !DILocation
|
|
495
|
+
CHECK: ![[DBGDIV]] = !DILocation
|
|
496
|
+
"""
|
|
497
|
+
b = a + 1.23
|
|
498
|
+
c = b * 2.34
|
|
499
|
+
a = b / c
|
|
500
|
+
|
|
501
|
+
ir = foo.inspect_llvm()[sig]
|
|
502
|
+
self.assertFileCheckMatches(ir, foo.__doc__)
|
|
503
|
+
|
|
504
|
+
def test_DITypes(self):
|
|
505
|
+
"""Tests that DITypes are emitted for the types used in the kernel."""
|
|
506
|
+
sig = (
|
|
507
|
+
types.float32,
|
|
508
|
+
types.float64,
|
|
509
|
+
types.int8,
|
|
510
|
+
types.int16,
|
|
511
|
+
types.int32,
|
|
512
|
+
types.int64,
|
|
513
|
+
types.uint8,
|
|
514
|
+
types.uint16,
|
|
515
|
+
types.uint32,
|
|
516
|
+
types.uint64,
|
|
517
|
+
types.complex64,
|
|
518
|
+
types.complex128,
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
@cuda.jit(sig, debug=True)
|
|
522
|
+
def foo(a, b, c, d, e, f, g, h, i, j, k, l):
|
|
523
|
+
"""
|
|
524
|
+
CHECK: [[DBG1:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
|
|
525
|
+
CHECK: [[DBG2:.+]] = !DIDerivedType(
|
|
526
|
+
CHECK-SAME: baseType: [[DBG1]]
|
|
527
|
+
CHECK-SAME: size: 64
|
|
528
|
+
CHECK-SAME: tag: DW_TAG_pointer_type
|
|
529
|
+
CHECK: [[DBG3:.+]] = !DIDerivedType(
|
|
530
|
+
CHECK-SAME: baseType: [[DBG2]]
|
|
531
|
+
CHECK-SAME: size: 64
|
|
532
|
+
CHECK-SAME: tag: DW_TAG_pointer_type
|
|
533
|
+
CHECK: [[DBG4:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
|
|
534
|
+
CHECK: [[DBG5:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float64", size: 64)
|
|
535
|
+
CHECK: [[DBG6:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int8", size: 8)
|
|
536
|
+
CHECK: [[DBG7:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int16", size: 16)
|
|
537
|
+
CHECK: [[DBG8:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int32", size: 32)
|
|
538
|
+
CHECK: [[DBG9:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
|
|
539
|
+
CHECK: [[DBG10:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint8", size: 8)
|
|
540
|
+
CHECK: [[DBG11:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint16", size: 16)
|
|
541
|
+
CHECK: [[DBG12:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint32", size: 32)
|
|
542
|
+
CHECK: [[DBG13:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint64", size: 64)
|
|
543
|
+
CHECK: [[DBG14:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
|
|
544
|
+
CHECK: [[DBG15:.+]] = !DIDerivedType(
|
|
545
|
+
CHECK-SAME: baseType: [[DBG14]]
|
|
546
|
+
CHECK-SAME: name: "real"
|
|
547
|
+
CHECK-SAME: offset: 0
|
|
548
|
+
CHECK-SAME: size: 32
|
|
549
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
550
|
+
CHECK: [[DBG16:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
|
|
551
|
+
CHECK: [[DBG17:.+]] = !DIDerivedType(
|
|
552
|
+
CHECK-SAME: baseType: [[DBG16]]
|
|
553
|
+
CHECK-SAME: name: "imag"
|
|
554
|
+
CHECK-SAME: offset: 32
|
|
555
|
+
CHECK-SAME: size: 32
|
|
556
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
557
|
+
CHECK: [[DBG18:.+]] = !{ [[DBG15]], [[DBG17]] }
|
|
558
|
+
CHECK: [[DBG19:.+]] = distinct !DICompositeType(
|
|
559
|
+
CHECK-SAME: elements: [[DBG18]]
|
|
560
|
+
CHECK-SAME: name: "complex64 ({float, float})"
|
|
561
|
+
CHECK-SAME: size: 64
|
|
562
|
+
CHECK-SAME: tag: DW_TAG_structure_type
|
|
563
|
+
CHECK: [[DBG20:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
|
|
564
|
+
CHECK: [[DBG21:.+]] = !DIDerivedType(
|
|
565
|
+
CHECK-SAME: baseType: [[DBG20]]
|
|
566
|
+
CHECK-SAME: name: "real"
|
|
567
|
+
CHECK-SAME: offset: 0
|
|
568
|
+
CHECK-SAME: size: 64
|
|
569
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
570
|
+
CHECK: [[DBG22:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
|
|
571
|
+
CHECK: [[DBG23:.+]] = !DIDerivedType(
|
|
572
|
+
CHECK-SAME: baseType: [[DBG22]]
|
|
573
|
+
CHECK-SAME: name: "imag"
|
|
574
|
+
CHECK-SAME: offset: 64
|
|
575
|
+
CHECK-SAME: size: 64
|
|
576
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
577
|
+
CHECK: [[DBG24:.+]] = !{ [[DBG21]], [[DBG23]] }
|
|
578
|
+
CHECK: [[DBG25:.+]] = distinct !DICompositeType(
|
|
579
|
+
CHECK-SAME: elements: [[DBG24]]
|
|
580
|
+
CHECK-SAME: name: "complex128 ({double, double})"
|
|
581
|
+
CHECK-SAME: size: 128
|
|
582
|
+
CHECK-SAME: tag: DW_TAG_structure_type
|
|
583
|
+
CHECK: [[DBG32:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
|
|
584
|
+
CHECK: [[DBG33:.+]] = !DILocalVariable(
|
|
585
|
+
CHECK-SAME: name: "a"
|
|
586
|
+
CHECK-SAME: type: [[DBG32]]
|
|
587
|
+
CHECK: [[DBG34:.+]] = !DIExpression()
|
|
588
|
+
CHECK: [[DBG35:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float64", size: 64)
|
|
589
|
+
CHECK: [[DBG36:.+]] = !DILocalVariable(
|
|
590
|
+
CHECK-SAME: name: "b"
|
|
591
|
+
CHECK-SAME: type: [[DBG35]]
|
|
592
|
+
CHECK: [[DBG37:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int8", size: 8)
|
|
593
|
+
CHECK: [[DBG38:.+]] = !DILocalVariable(
|
|
594
|
+
CHECK-SAME: name: "c"
|
|
595
|
+
CHECK-SAME: type: [[DBG37]]
|
|
596
|
+
CHECK: [[DBG39:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int16", size: 16)
|
|
597
|
+
CHECK: [[DBG40:.+]] = !DILocalVariable(
|
|
598
|
+
CHECK-SAME: name: "d"
|
|
599
|
+
CHECK-SAME: type: [[DBG39]]
|
|
600
|
+
CHECK: [[DBG41:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int32", size: 32)
|
|
601
|
+
CHECK: [[DBG42:.+]] = !DILocalVariable(
|
|
602
|
+
CHECK-SAME: name: "e"
|
|
603
|
+
CHECK-SAME: type: [[DBG41]]
|
|
604
|
+
CHECK: [[DBG43:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
|
|
605
|
+
CHECK: [[DBG44:.+]] = !DILocalVariable(
|
|
606
|
+
CHECK-SAME: name: "f"
|
|
607
|
+
CHECK-SAME: type: [[DBG43]]
|
|
608
|
+
CHECK: [[DBG45:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint8", size: 8)
|
|
609
|
+
CHECK: [[DBG46:.+]] = !DILocalVariable(
|
|
610
|
+
CHECK-SAME: name: "g"
|
|
611
|
+
CHECK-SAME: type: [[DBG45]]
|
|
612
|
+
CHECK: [[DBG47:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint16", size: 16)
|
|
613
|
+
CHECK: [[DBG48:.+]] = !DILocalVariable(
|
|
614
|
+
CHECK-SAME: name: "h"
|
|
615
|
+
CHECK-SAME: type: [[DBG47]]
|
|
616
|
+
CHECK: [[DBG49:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint32", size: 32)
|
|
617
|
+
CHECK: [[DBG50:.+]] = !DILocalVariable(
|
|
618
|
+
CHECK-SAME: name: "i"
|
|
619
|
+
CHECK-SAME: type: [[DBG49]]
|
|
620
|
+
CHECK: [[DBG51:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint64", size: 64)
|
|
621
|
+
CHECK: [[DBG52:.+]] = !DILocalVariable(
|
|
622
|
+
CHECK-SAME: name: "j"
|
|
623
|
+
CHECK-SAME: type: [[DBG51]]
|
|
624
|
+
CHECK: [[DBG53:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
|
|
625
|
+
CHECK: [[DBG54:.+]] = !DIDerivedType(
|
|
626
|
+
CHECK-SAME: baseType: [[DBG53]]
|
|
627
|
+
CHECK-SAME: name: "real"
|
|
628
|
+
CHECK-SAME: offset: 0
|
|
629
|
+
CHECK-SAME: size: 32
|
|
630
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
631
|
+
CHECK: [[DBG55:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
|
|
632
|
+
CHECK: [[DBG56:.+]] = !DIDerivedType(
|
|
633
|
+
CHECK-SAME: baseType: [[DBG55]]
|
|
634
|
+
CHECK-SAME: name: "imag"
|
|
635
|
+
CHECK-SAME: offset: 32
|
|
636
|
+
CHECK-SAME: size: 32
|
|
637
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
638
|
+
CHECK: [[DBG57:.+]] = !{ [[DBG54]], [[DBG56]] }
|
|
639
|
+
CHECK: [[DBG58:.+]] = distinct !DICompositeType(
|
|
640
|
+
CHECK-SAME: elements: [[DBG57]]
|
|
641
|
+
CHECK-SAME: name: "complex64 ({float, float})"
|
|
642
|
+
CHECK-SAME: size: 64
|
|
643
|
+
CHECK-SAME: tag: DW_TAG_structure_type
|
|
644
|
+
CHECK: [[DBG59:.+]] = !DILocalVariable(
|
|
645
|
+
CHECK-SAME: name: "k"
|
|
646
|
+
CHECK-SAME: type: [[DBG58]]
|
|
647
|
+
CHECK: [[DBG60:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
|
|
648
|
+
CHECK: [[DBG61:.+]] = !DIDerivedType(
|
|
649
|
+
CHECK-SAME: baseType: [[DBG60]]
|
|
650
|
+
CHECK-SAME: name: "real"
|
|
651
|
+
CHECK-SAME: offset: 0
|
|
652
|
+
CHECK-SAME: size: 64
|
|
653
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
654
|
+
CHECK: [[DBG62:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
|
|
655
|
+
CHECK: [[DBG63:.+]] = !DIDerivedType(
|
|
656
|
+
CHECK-SAME: baseType: [[DBG62]]
|
|
657
|
+
CHECK-SAME: name: "imag"
|
|
658
|
+
CHECK-SAME: offset: 64
|
|
659
|
+
CHECK-SAME: size: 64
|
|
660
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
661
|
+
CHECK: [[DBG64:.+]] = !{ [[DBG61]], [[DBG63]] }
|
|
662
|
+
CHECK: [[DBG65:.+]] = distinct !DICompositeType(
|
|
663
|
+
CHECK-SAME: elements: [[DBG64]]
|
|
664
|
+
CHECK-SAME: name: "complex128 ({double, double})"
|
|
665
|
+
CHECK-SAME: size: 128
|
|
666
|
+
CHECK-SAME: tag: DW_TAG_structure_type
|
|
667
|
+
CHECK: [[DBG66:.+]] = !DILocalVariable(
|
|
668
|
+
CHECK-SAME: name: "l"
|
|
669
|
+
CHECK-SAME: type: [[DBG65]]
|
|
670
|
+
"""
|
|
671
|
+
pass
|
|
672
|
+
|
|
673
|
+
ir = foo.inspect_llvm()[sig]
|
|
674
|
+
self.assertFileCheckMatches(ir, foo.__doc__)
|
|
675
|
+
|
|
676
|
+
def test_arrays(self):
|
|
677
|
+
"""Tests that arrays are emitted as DIBasicType."""
|
|
678
|
+
sig = (types.float32[::1],)
|
|
679
|
+
|
|
680
|
+
@cuda.jit(sig, debug=True)
|
|
681
|
+
def foo(a):
|
|
682
|
+
"""
|
|
683
|
+
CHECK: distinct !DICompileUnit
|
|
684
|
+
CHECK: distinct !DISubprogram
|
|
685
|
+
CHECK: [[DBG127:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
|
|
686
|
+
CHECK: [[DBG128:.+]] = !DIDerivedType(
|
|
687
|
+
CHECK-SAME: baseType: [[DBG127]]
|
|
688
|
+
CHECK-SAME: size: 64
|
|
689
|
+
CHECK-SAME: tag: DW_TAG_pointer_type
|
|
690
|
+
CHECK-SAME: )
|
|
691
|
+
CHECK: [[DBG129:.+]] = !DIDerivedType(
|
|
692
|
+
CHECK-SAME: baseType: [[DBG128]]
|
|
693
|
+
CHECK-SAME: name: "meminfo"
|
|
694
|
+
CHECK-SAME: offset: 0
|
|
695
|
+
CHECK-SAME: size: 64
|
|
696
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
697
|
+
CHECK-SAME: )
|
|
698
|
+
CHECK: [[DBG130:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
|
|
699
|
+
CHECK: [[DBG131:.+]] = !DIDerivedType(
|
|
700
|
+
CHECK-SAME: baseType: [[DBG130]]
|
|
701
|
+
CHECK-SAME: size: 64
|
|
702
|
+
CHECK-SAME: tag: DW_TAG_pointer_type
|
|
703
|
+
CHECK-SAME: )
|
|
704
|
+
CHECK: [[DBG132:.+]] = !DIDerivedType(
|
|
705
|
+
CHECK-SAME: baseType: [[DBG131]]
|
|
706
|
+
CHECK-SAME: name: "parent"
|
|
707
|
+
CHECK-SAME: offset: 64
|
|
708
|
+
CHECK-SAME: size: 64
|
|
709
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
710
|
+
CHECK-SAME: )
|
|
711
|
+
CHECK: [[DBG133:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
|
|
712
|
+
CHECK: [[DBG134:.+]] = !DIDerivedType(
|
|
713
|
+
CHECK-SAME: baseType: [[DBG133]]
|
|
714
|
+
CHECK-SAME: name: "nitems"
|
|
715
|
+
CHECK-SAME: offset: 128
|
|
716
|
+
CHECK-SAME: size: 64
|
|
717
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
718
|
+
CHECK-SAME: )
|
|
719
|
+
CHECK: [[DBG135:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
|
|
720
|
+
CHECK: [[DBG136:.+]] = !DIDerivedType(
|
|
721
|
+
CHECK-SAME: baseType: [[DBG135]]
|
|
722
|
+
CHECK-SAME: name: "itemsize"
|
|
723
|
+
CHECK-SAME: offset: 192
|
|
724
|
+
CHECK-SAME: size: 64
|
|
725
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
726
|
+
CHECK-SAME: )
|
|
727
|
+
CHECK: [[DBG137:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
|
|
728
|
+
CHECK: [[DBG138:.+]] = !DIDerivedType(
|
|
729
|
+
CHECK-SAME: baseType: [[DBG137]]
|
|
730
|
+
CHECK-SAME: size: 64
|
|
731
|
+
CHECK-SAME: tag: DW_TAG_pointer_type
|
|
732
|
+
CHECK-SAME: )
|
|
733
|
+
CHECK: [[DBG139:.+]] = !DIDerivedType(
|
|
734
|
+
CHECK-SAME: baseType: [[DBG138]]
|
|
735
|
+
CHECK-SAME: name: "data"
|
|
736
|
+
CHECK-SAME: offset: 256
|
|
737
|
+
CHECK-SAME: size: 64
|
|
738
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
739
|
+
CHECK-SAME: )
|
|
740
|
+
CHECK: [[DBG140:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "i64", size: 64)
|
|
741
|
+
CHECK: [[DBG141:.+]] = !DICompositeType(
|
|
742
|
+
CHECK-SAME: baseType: [[DBG140]]
|
|
743
|
+
CHECK-SAME: identifier: "[1 x i64]"
|
|
744
|
+
CHECK-SAME: name: "UniTuple(int64 x 1) ([1 x i64])"
|
|
745
|
+
CHECK-SAME: tag: DW_TAG_array_type
|
|
746
|
+
CHECK-SAME: )
|
|
747
|
+
CHECK: [[DBG142:.+]] = !DIDerivedType(
|
|
748
|
+
CHECK-SAME: baseType: [[DBG141]]
|
|
749
|
+
CHECK-SAME: name: "shape"
|
|
750
|
+
CHECK-SAME: offset: 320
|
|
751
|
+
CHECK-SAME: size: 64
|
|
752
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
753
|
+
CHECK-SAME: )
|
|
754
|
+
CHECK: [[DBG143:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "i64", size: 64)
|
|
755
|
+
CHECK: [[DBG144:.+]] = !DICompositeType(
|
|
756
|
+
CHECK-SAME: baseType: [[DBG143]]
|
|
757
|
+
CHECK-SAME: identifier: "[1 x i64]"
|
|
758
|
+
CHECK-SAME: name: "UniTuple(int64 x 1) ([1 x i64])"
|
|
759
|
+
CHECK-SAME: size: 64
|
|
760
|
+
CHECK-SAME: tag: DW_TAG_array_type
|
|
761
|
+
CHECK-SAME: )
|
|
762
|
+
CHECK: [[DBG145:.+]] = !DIDerivedType(
|
|
763
|
+
CHECK-SAME: baseType: [[DBG144]]
|
|
764
|
+
CHECK-SAME: name: "strides"
|
|
765
|
+
CHECK-SAME: offset: 384
|
|
766
|
+
CHECK-SAME: size: 64
|
|
767
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
768
|
+
CHECK-SAME: )
|
|
769
|
+
CHECK: [[DBG146:.+]] = !{ [[DBG129]], [[DBG132]], [[DBG134]], [[DBG136]], [[DBG139]], [[DBG142]], [[DBG145]] }
|
|
770
|
+
CHECK: [[DBG147:.+]] = distinct !DICompositeType(
|
|
771
|
+
CHECK-SAME: elements: [[DBG146]]
|
|
772
|
+
CHECK-SAME: identifier: "{i8*, i8*, i64, i64, float*, [1 x i64], [1 x i64]}"
|
|
773
|
+
CHECK-SAME: name: "array(float32, 1d, C) ({i8*, i8*, i64, i64, float*, [1 x i64], [1 x i64]})"
|
|
774
|
+
CHECK-SAME: size: 448
|
|
775
|
+
CHECK-SAME: tag: DW_TAG_structure_type
|
|
776
|
+
CHECK-SAME: )
|
|
777
|
+
CHECK: !DILocalVariable(
|
|
778
|
+
CHECK-SAME: name: "a"
|
|
779
|
+
CHECK-SAME: type: [[DBG147]]
|
|
780
|
+
CHECK-SAME: )
|
|
781
|
+
"""
|
|
782
|
+
pass
|
|
783
|
+
|
|
784
|
+
ir = foo.inspect_llvm()[sig]
|
|
785
|
+
self.assertFileCheckMatches(ir, foo.__doc__)
|
|
786
|
+
|
|
406
787
|
|
|
407
788
|
if __name__ == "__main__":
|
|
408
789
|
unittest.main()
|
|
@@ -8,7 +8,7 @@ from numba import int16, int32
|
|
|
8
8
|
from numba import cuda, vectorize, njit
|
|
9
9
|
from numba.core import types
|
|
10
10
|
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
11
|
-
from numba.tests.enum_usecases import (
|
|
11
|
+
from numba.cuda.tests.enum_usecases import (
|
|
12
12
|
Color,
|
|
13
13
|
Shape,
|
|
14
14
|
Planet,
|
|
@@ -1,12 +1,18 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import cffi
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
|
|
3
6
|
from io import StringIO
|
|
4
7
|
from numba import cuda, float32, float64, int32, intp
|
|
8
|
+
from numba.types import float16, CPointer
|
|
9
|
+
from numba.cuda import declare_device
|
|
5
10
|
from numba.cuda.testing import unittest, CUDATestCase
|
|
6
11
|
from numba.cuda.testing import (
|
|
7
12
|
skip_on_cudasim,
|
|
8
13
|
skip_with_nvdisasm,
|
|
9
14
|
skip_without_nvdisasm,
|
|
15
|
+
skip_if_nvjitlink_missing,
|
|
10
16
|
)
|
|
11
17
|
|
|
12
18
|
|
|
@@ -21,6 +27,17 @@ class TestInspect(CUDATestCase):
|
|
|
21
27
|
|
|
22
28
|
@cuda.jit(sig)
|
|
23
29
|
def foo(x, y):
|
|
30
|
+
"""
|
|
31
|
+
// LLVM: define void
|
|
32
|
+
// LLVM-SAME: foo
|
|
33
|
+
// LLVM-LABEL: entry:
|
|
34
|
+
// LLVM-NEXT: br label %"[[VAL_0:.*]]"
|
|
35
|
+
// LLVM-NEXT: [[VAL_0]]:
|
|
36
|
+
// LLVM-NEXT: ret void
|
|
37
|
+
|
|
38
|
+
// ASM: Generated by NVIDIA NVVM Compiler
|
|
39
|
+
// ASM: foo
|
|
40
|
+
"""
|
|
24
41
|
pass
|
|
25
42
|
|
|
26
43
|
file = StringIO()
|
|
@@ -31,28 +48,43 @@ class TestInspect(CUDATestCase):
|
|
|
31
48
|
# Signature in annotation
|
|
32
49
|
self.assertIn("(float32, int32)", typeanno)
|
|
33
50
|
file.close()
|
|
34
|
-
# Function name in LLVM
|
|
35
|
-
llvm = foo.inspect_llvm(sig)
|
|
36
|
-
self.assertIn("foo", llvm)
|
|
37
|
-
|
|
38
|
-
# Kernel in LLVM
|
|
39
|
-
self.assertIn("define void @", llvm)
|
|
40
|
-
|
|
41
|
-
asm = foo.inspect_asm(sig)
|
|
42
51
|
|
|
43
|
-
|
|
44
|
-
self.
|
|
45
|
-
# NVVM inserted comments in PTX
|
|
46
|
-
self.assertIn("Generated by NVIDIA NVVM Compiler", asm)
|
|
52
|
+
self.assertFileCheckLLVM(foo, sig)
|
|
53
|
+
self.assertFileCheckAsm(foo, sig)
|
|
47
54
|
|
|
48
55
|
def test_polytyped(self):
|
|
49
56
|
@cuda.jit
|
|
50
57
|
def foo(x, y):
|
|
58
|
+
"""
|
|
59
|
+
// LLVM: define void
|
|
60
|
+
// LLVM-SAME: foo
|
|
61
|
+
// LLVM_INT-SAME: i64
|
|
62
|
+
// LLVM_INT-SAME: i64
|
|
63
|
+
// LLVM_FLOAT-SAME: double
|
|
64
|
+
// LLVM_FLOAT-SAME: double
|
|
65
|
+
|
|
66
|
+
// ASM: Generated by NVIDIA NVVM Compiler
|
|
67
|
+
// ASM: .visible
|
|
68
|
+
// ASM-SAME: .entry
|
|
69
|
+
// ASM-SAME: foo
|
|
70
|
+
"""
|
|
51
71
|
pass
|
|
52
72
|
|
|
53
73
|
foo[1, 1](1, 1)
|
|
54
74
|
foo[1, 1](1.2, 2.4)
|
|
55
75
|
|
|
76
|
+
int_sig = (intp, intp)
|
|
77
|
+
float_sig = (float64, float64)
|
|
78
|
+
|
|
79
|
+
self.assertFileCheckLLVM(
|
|
80
|
+
foo, int_sig, check_prefixes=["LLVM", "LLVM_INT"]
|
|
81
|
+
)
|
|
82
|
+
self.assertFileCheckAsm(foo, int_sig, check_prefixes=["ASM"])
|
|
83
|
+
self.assertFileCheckLLVM(
|
|
84
|
+
foo, float_sig, check_prefixes=["LLVM", "LLVM_FLOAT"]
|
|
85
|
+
)
|
|
86
|
+
self.assertFileCheckAsm(foo, float_sig, check_prefixes=["ASM"])
|
|
87
|
+
|
|
56
88
|
file = StringIO()
|
|
57
89
|
foo.inspect_types(file=file)
|
|
58
90
|
typeanno = file.getvalue()
|
|
@@ -70,14 +102,6 @@ class TestInspect(CUDATestCase):
|
|
|
70
102
|
self.assertIn((intp, intp), llvmirs)
|
|
71
103
|
self.assertIn((float64, float64), llvmirs)
|
|
72
104
|
|
|
73
|
-
# Function name in LLVM
|
|
74
|
-
self.assertIn("foo", llvmirs[intp, intp])
|
|
75
|
-
self.assertIn("foo", llvmirs[float64, float64])
|
|
76
|
-
|
|
77
|
-
# Kernels in LLVM
|
|
78
|
-
self.assertIn("define void @", llvmirs[intp, intp])
|
|
79
|
-
self.assertIn("define void @", llvmirs[float64, float64])
|
|
80
|
-
|
|
81
105
|
asmdict = foo.inspect_asm()
|
|
82
106
|
|
|
83
107
|
# Signature in assembly dict
|
|
@@ -88,10 +112,6 @@ class TestInspect(CUDATestCase):
|
|
|
88
112
|
self.assertIn((intp, intp), asmdict)
|
|
89
113
|
self.assertIn((float64, float64), asmdict)
|
|
90
114
|
|
|
91
|
-
# NVVM inserted in PTX
|
|
92
|
-
self.assertIn("foo", asmdict[intp, intp])
|
|
93
|
-
self.assertIn("foo", asmdict[float64, float64])
|
|
94
|
-
|
|
95
115
|
def _test_inspect_sass(self, kernel, name, sass):
|
|
96
116
|
# Ensure function appears in output
|
|
97
117
|
seen_function = False
|
|
@@ -108,6 +128,56 @@ class TestInspect(CUDATestCase):
|
|
|
108
128
|
self.assertIn("BRA", sass) # Branch
|
|
109
129
|
self.assertIn("EXIT", sass) # Exit program
|
|
110
130
|
|
|
131
|
+
@skip_on_cudasim("Simulator does not generate code to be inspected")
|
|
132
|
+
@skip_if_nvjitlink_missing("nvJitLink is required for LTO")
|
|
133
|
+
def test_inspect_lto_asm(self):
|
|
134
|
+
ffi = cffi.FFI()
|
|
135
|
+
|
|
136
|
+
ext = cuda.CUSource("""
|
|
137
|
+
#include <cuda_fp16.h>
|
|
138
|
+
extern "C"
|
|
139
|
+
__device__ int add_f2_f2(__half * res, __half * a, __half *b) {
|
|
140
|
+
*res = *a + *b;
|
|
141
|
+
return 0;
|
|
142
|
+
}
|
|
143
|
+
""")
|
|
144
|
+
|
|
145
|
+
add = declare_device(
|
|
146
|
+
"add_f2_f2",
|
|
147
|
+
float16(CPointer(float16), CPointer(float16)),
|
|
148
|
+
link=ext,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
@cuda.jit
|
|
152
|
+
def k(arr):
|
|
153
|
+
local_arr = cuda.local.array(shape=1, dtype=np.float16)
|
|
154
|
+
local_arr2 = cuda.local.array(shape=1, dtype=np.float16)
|
|
155
|
+
local_arr[0] = 1
|
|
156
|
+
local_arr2[0] = 2
|
|
157
|
+
|
|
158
|
+
ptr = ffi.from_buffer(local_arr)
|
|
159
|
+
ptr2 = ffi.from_buffer(local_arr2)
|
|
160
|
+
|
|
161
|
+
arr[0] = add(ptr, ptr2)
|
|
162
|
+
|
|
163
|
+
arr = np.array([0], dtype=np.float16)
|
|
164
|
+
|
|
165
|
+
k[1, 1](arr)
|
|
166
|
+
|
|
167
|
+
allasms = k.inspect_asm()
|
|
168
|
+
asm = next(iter(allasms.values()))
|
|
169
|
+
|
|
170
|
+
regex = re.compile(r"call(.|\n)*add_f2_f2")
|
|
171
|
+
self.assertRegex(asm, regex)
|
|
172
|
+
|
|
173
|
+
all_ext_asms = k.inspect_lto_ptx()
|
|
174
|
+
lto_asm = next(iter(all_ext_asms.values()))
|
|
175
|
+
|
|
176
|
+
self.assertIn("add.f16", lto_asm)
|
|
177
|
+
self.assertNotIn("call", lto_asm)
|
|
178
|
+
|
|
179
|
+
np.testing.assert_equal(arr[0], np.float16(1) + np.float16(2))
|
|
180
|
+
|
|
111
181
|
@skip_without_nvdisasm("nvdisasm needed for inspect_sass()")
|
|
112
182
|
def test_inspect_sass_eager(self):
|
|
113
183
|
sig = (float32[::1], int32[::1])
|