numba-cuda 0.17.0__py3-none-any.whl → 0.18.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +0 -8
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +14225 -0
- numba_cuda/numba/cuda/api_util.py +6 -0
- numba_cuda/numba/cuda/cgutils.py +1291 -0
- numba_cuda/numba/cuda/codegen.py +32 -14
- numba_cuda/numba/cuda/compiler.py +113 -10
- numba_cuda/numba/cuda/core/caching.py +741 -0
- numba_cuda/numba/cuda/core/callconv.py +338 -0
- numba_cuda/numba/cuda/core/codegen.py +168 -0
- numba_cuda/numba/cuda/core/compiler.py +205 -0
- numba_cuda/numba/cuda/core/typed_passes.py +139 -0
- numba_cuda/numba/cuda/cudadecl.py +0 -268
- numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +2 -1
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +4 -178
- numba_cuda/numba/cuda/debuginfo.py +469 -3
- numba_cuda/numba/cuda/device_init.py +0 -1
- numba_cuda/numba/cuda/dispatcher.py +310 -11
- numba_cuda/numba/cuda/extending.py +2 -1
- numba_cuda/numba/cuda/fp16.py +348 -0
- numba_cuda/numba/cuda/intrinsics.py +1 -1
- numba_cuda/numba/cuda/libdeviceimpl.py +2 -1
- numba_cuda/numba/cuda/lowering.py +1833 -8
- numba_cuda/numba/cuda/mathimpl.py +2 -90
- numba_cuda/numba/cuda/nvvmutils.py +2 -1
- numba_cuda/numba/cuda/printimpl.py +2 -1
- numba_cuda/numba/cuda/serialize.py +264 -0
- numba_cuda/numba/cuda/simulator/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/dispatcher.py +7 -0
- numba_cuda/numba/cuda/stubs.py +0 -308
- numba_cuda/numba/cuda/target.py +13 -5
- numba_cuda/numba/cuda/testing.py +156 -5
- numba_cuda/numba/cuda/tests/complex_usecases.py +113 -0
- numba_cuda/numba/cuda/tests/core/serialize_usecases.py +110 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +359 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +10 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +33 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +5 -10
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +15 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +381 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +108 -24
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +37 -23
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +43 -27
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +26 -9
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +27 -2
- numba_cuda/numba/cuda/tests/enum_usecases.py +56 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +1 -2
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +1 -1
- numba_cuda/numba/cuda/utils.py +785 -0
- numba_cuda/numba/cuda/vector_types.py +1 -1
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.1.dist-info}/METADATA +18 -4
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.1.dist-info}/RECORD +63 -50
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -46
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.1.dist-info}/WHEEL +0 -0
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.1.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.1.dist-info}/top_level.txt +0 -0
|
@@ -403,6 +403,387 @@ class TestCudaDebugInfo(CUDATestCase):
|
|
|
403
403
|
match = re.compile(pat6).search(llvm_ir)
|
|
404
404
|
self.assertIsNotNone(match, msg=llvm_ir)
|
|
405
405
|
|
|
406
|
+
def test_DW_LANG(self):
|
|
407
|
+
@cuda.jit(debug=True)
|
|
408
|
+
def foo():
|
|
409
|
+
"""
|
|
410
|
+
CHECK: distinct !DICompileUnit
|
|
411
|
+
CHECK-SAME: emissionKind: FullDebug
|
|
412
|
+
CHECK-SAME: isOptimized: true
|
|
413
|
+
CHECK-SAME: language: DW_LANG_C_plus_plus
|
|
414
|
+
CHECK-SAME: producer: "clang (Numba)"
|
|
415
|
+
"""
|
|
416
|
+
pass
|
|
417
|
+
|
|
418
|
+
foo[1, 1]()
|
|
419
|
+
|
|
420
|
+
llvm_ir = foo.inspect_llvm()[tuple()]
|
|
421
|
+
self.assertFileCheckMatches(llvm_ir, foo.__doc__)
|
|
422
|
+
|
|
423
|
+
def test_DILocation(self):
|
|
424
|
+
"""Tests that DILocation information is reasonable.
|
|
425
|
+
|
|
426
|
+
The kernel `foo` produces LLVM like:
|
|
427
|
+
define function() {
|
|
428
|
+
entry:
|
|
429
|
+
alloca
|
|
430
|
+
store 0 to alloca
|
|
431
|
+
<arithmetic for doing the operations on b, c, d>
|
|
432
|
+
setup for print
|
|
433
|
+
branch
|
|
434
|
+
other_labels:
|
|
435
|
+
... <elided>
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
The following checks that:
|
|
439
|
+
* the alloca and store have no !dbg
|
|
440
|
+
* the arithmetic occurs in the order defined and with !dbg
|
|
441
|
+
* that the !dbg entries are monotonically increasing in value with
|
|
442
|
+
source line number
|
|
443
|
+
"""
|
|
444
|
+
sig = (types.float64,)
|
|
445
|
+
|
|
446
|
+
@cuda.jit(sig, debug=True)
|
|
447
|
+
def foo(a):
|
|
448
|
+
"""
|
|
449
|
+
CHECK-LABEL: define void @{{.+}}foo
|
|
450
|
+
CHECK: entry:
|
|
451
|
+
|
|
452
|
+
CHECK: %[[VAL_0:.*]] = alloca double
|
|
453
|
+
CHECK-NOT: !dbg
|
|
454
|
+
CHECK: store double 0.0, double* %[[VAL_0]]
|
|
455
|
+
CHECK-NOT: !dbg
|
|
456
|
+
CHECK: %[[VAL_1:.*]] = alloca double
|
|
457
|
+
CHECK-NOT: !dbg
|
|
458
|
+
CHECK: store double 0.0, double* %[[VAL_1]]
|
|
459
|
+
CHECK-NOT: !dbg
|
|
460
|
+
CHECK: %[[VAL_2:.*]] = alloca double
|
|
461
|
+
CHECK-NOT: !dbg
|
|
462
|
+
CHECK: store double 0.0, double* %[[VAL_2]]
|
|
463
|
+
CHECK-NOT: !dbg
|
|
464
|
+
CHECK: %[[VAL_3:.*]] = alloca double
|
|
465
|
+
CHECK-NOT: !dbg
|
|
466
|
+
CHECK: store double 0.0, double* %[[VAL_3]]
|
|
467
|
+
CHECK-NOT: !dbg
|
|
468
|
+
CHECK: %[[VAL_4:.*]] = alloca double
|
|
469
|
+
CHECK-NOT: !dbg
|
|
470
|
+
CHECK: store double 0.0, double* %[[VAL_4]]
|
|
471
|
+
CHECK-NOT: !dbg
|
|
472
|
+
CHECK: %[[VAL_5:.*]] = alloca double
|
|
473
|
+
CHECK-NOT: !dbg
|
|
474
|
+
CHECK: store double 0.0, double* %[[VAL_5]]
|
|
475
|
+
CHECK-NOT: !dbg
|
|
476
|
+
CHECK: %[[VAL_6:.*]] = alloca i8*
|
|
477
|
+
CHECK-NOT: !dbg
|
|
478
|
+
CHECK: store i8* null, i8** %[[VAL_6]]
|
|
479
|
+
CHECK-NOT: !dbg
|
|
480
|
+
CHECK: %[[VAL_7:.*]] = alloca i8*
|
|
481
|
+
CHECK-NOT: !dbg
|
|
482
|
+
CHECK: store i8* null, i8** %[[VAL_7]]
|
|
483
|
+
CHECK-NOT: !dbg
|
|
484
|
+
|
|
485
|
+
CHECK: br label %"[[ENTRY:.+]]"
|
|
486
|
+
CHECK-NOT: !dbg
|
|
487
|
+
CHECK: [[ENTRY]]:
|
|
488
|
+
|
|
489
|
+
CHECK: fadd{{.+}} !dbg ![[DBGADD:[0-9]+]]
|
|
490
|
+
CHECK: fmul{{.+}} !dbg ![[DBGMUL:[0-9]+]]
|
|
491
|
+
CHECK: fdiv{{.+}} !dbg ![[DBGDIV:[0-9]+]]
|
|
492
|
+
|
|
493
|
+
CHECK: ![[DBGADD]] = !DILocation
|
|
494
|
+
CHECK: ![[DBGMUL]] = !DILocation
|
|
495
|
+
CHECK: ![[DBGDIV]] = !DILocation
|
|
496
|
+
"""
|
|
497
|
+
b = a + 1.23
|
|
498
|
+
c = b * 2.34
|
|
499
|
+
a = b / c
|
|
500
|
+
|
|
501
|
+
ir = foo.inspect_llvm()[sig]
|
|
502
|
+
self.assertFileCheckMatches(ir, foo.__doc__)
|
|
503
|
+
|
|
504
|
+
def test_DITypes(self):
|
|
505
|
+
"""Tests that DITypes are emitted for the types used in the kernel."""
|
|
506
|
+
sig = (
|
|
507
|
+
types.float32,
|
|
508
|
+
types.float64,
|
|
509
|
+
types.int8,
|
|
510
|
+
types.int16,
|
|
511
|
+
types.int32,
|
|
512
|
+
types.int64,
|
|
513
|
+
types.uint8,
|
|
514
|
+
types.uint16,
|
|
515
|
+
types.uint32,
|
|
516
|
+
types.uint64,
|
|
517
|
+
types.complex64,
|
|
518
|
+
types.complex128,
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
@cuda.jit(sig, debug=True)
|
|
522
|
+
def foo(a, b, c, d, e, f, g, h, i, j, k, l):
|
|
523
|
+
"""
|
|
524
|
+
CHECK: [[DBG1:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
|
|
525
|
+
CHECK: [[DBG2:.+]] = !DIDerivedType(
|
|
526
|
+
CHECK-SAME: baseType: [[DBG1]]
|
|
527
|
+
CHECK-SAME: size: 64
|
|
528
|
+
CHECK-SAME: tag: DW_TAG_pointer_type
|
|
529
|
+
CHECK: [[DBG3:.+]] = !DIDerivedType(
|
|
530
|
+
CHECK-SAME: baseType: [[DBG2]]
|
|
531
|
+
CHECK-SAME: size: 64
|
|
532
|
+
CHECK-SAME: tag: DW_TAG_pointer_type
|
|
533
|
+
CHECK: [[DBG4:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
|
|
534
|
+
CHECK: [[DBG5:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float64", size: 64)
|
|
535
|
+
CHECK: [[DBG6:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int8", size: 8)
|
|
536
|
+
CHECK: [[DBG7:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int16", size: 16)
|
|
537
|
+
CHECK: [[DBG8:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int32", size: 32)
|
|
538
|
+
CHECK: [[DBG9:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
|
|
539
|
+
CHECK: [[DBG10:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint8", size: 8)
|
|
540
|
+
CHECK: [[DBG11:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint16", size: 16)
|
|
541
|
+
CHECK: [[DBG12:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint32", size: 32)
|
|
542
|
+
CHECK: [[DBG13:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint64", size: 64)
|
|
543
|
+
CHECK: [[DBG14:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
|
|
544
|
+
CHECK: [[DBG15:.+]] = !DIDerivedType(
|
|
545
|
+
CHECK-SAME: baseType: [[DBG14]]
|
|
546
|
+
CHECK-SAME: name: "real"
|
|
547
|
+
CHECK-SAME: offset: 0
|
|
548
|
+
CHECK-SAME: size: 32
|
|
549
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
550
|
+
CHECK: [[DBG16:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
|
|
551
|
+
CHECK: [[DBG17:.+]] = !DIDerivedType(
|
|
552
|
+
CHECK-SAME: baseType: [[DBG16]]
|
|
553
|
+
CHECK-SAME: name: "imag"
|
|
554
|
+
CHECK-SAME: offset: 32
|
|
555
|
+
CHECK-SAME: size: 32
|
|
556
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
557
|
+
CHECK: [[DBG18:.+]] = !{ [[DBG15]], [[DBG17]] }
|
|
558
|
+
CHECK: [[DBG19:.+]] = distinct !DICompositeType(
|
|
559
|
+
CHECK-SAME: elements: [[DBG18]]
|
|
560
|
+
CHECK-SAME: name: "complex64 ({float, float})"
|
|
561
|
+
CHECK-SAME: size: 64
|
|
562
|
+
CHECK-SAME: tag: DW_TAG_structure_type
|
|
563
|
+
CHECK: [[DBG20:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
|
|
564
|
+
CHECK: [[DBG21:.+]] = !DIDerivedType(
|
|
565
|
+
CHECK-SAME: baseType: [[DBG20]]
|
|
566
|
+
CHECK-SAME: name: "real"
|
|
567
|
+
CHECK-SAME: offset: 0
|
|
568
|
+
CHECK-SAME: size: 64
|
|
569
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
570
|
+
CHECK: [[DBG22:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
|
|
571
|
+
CHECK: [[DBG23:.+]] = !DIDerivedType(
|
|
572
|
+
CHECK-SAME: baseType: [[DBG22]]
|
|
573
|
+
CHECK-SAME: name: "imag"
|
|
574
|
+
CHECK-SAME: offset: 64
|
|
575
|
+
CHECK-SAME: size: 64
|
|
576
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
577
|
+
CHECK: [[DBG24:.+]] = !{ [[DBG21]], [[DBG23]] }
|
|
578
|
+
CHECK: [[DBG25:.+]] = distinct !DICompositeType(
|
|
579
|
+
CHECK-SAME: elements: [[DBG24]]
|
|
580
|
+
CHECK-SAME: name: "complex128 ({double, double})"
|
|
581
|
+
CHECK-SAME: size: 128
|
|
582
|
+
CHECK-SAME: tag: DW_TAG_structure_type
|
|
583
|
+
CHECK: [[DBG32:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
|
|
584
|
+
CHECK: [[DBG33:.+]] = !DILocalVariable(
|
|
585
|
+
CHECK-SAME: name: "a"
|
|
586
|
+
CHECK-SAME: type: [[DBG32]]
|
|
587
|
+
CHECK: [[DBG34:.+]] = !DIExpression()
|
|
588
|
+
CHECK: [[DBG35:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float64", size: 64)
|
|
589
|
+
CHECK: [[DBG36:.+]] = !DILocalVariable(
|
|
590
|
+
CHECK-SAME: name: "b"
|
|
591
|
+
CHECK-SAME: type: [[DBG35]]
|
|
592
|
+
CHECK: [[DBG37:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int8", size: 8)
|
|
593
|
+
CHECK: [[DBG38:.+]] = !DILocalVariable(
|
|
594
|
+
CHECK-SAME: name: "c"
|
|
595
|
+
CHECK-SAME: type: [[DBG37]]
|
|
596
|
+
CHECK: [[DBG39:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int16", size: 16)
|
|
597
|
+
CHECK: [[DBG40:.+]] = !DILocalVariable(
|
|
598
|
+
CHECK-SAME: name: "d"
|
|
599
|
+
CHECK-SAME: type: [[DBG39]]
|
|
600
|
+
CHECK: [[DBG41:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int32", size: 32)
|
|
601
|
+
CHECK: [[DBG42:.+]] = !DILocalVariable(
|
|
602
|
+
CHECK-SAME: name: "e"
|
|
603
|
+
CHECK-SAME: type: [[DBG41]]
|
|
604
|
+
CHECK: [[DBG43:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
|
|
605
|
+
CHECK: [[DBG44:.+]] = !DILocalVariable(
|
|
606
|
+
CHECK-SAME: name: "f"
|
|
607
|
+
CHECK-SAME: type: [[DBG43]]
|
|
608
|
+
CHECK: [[DBG45:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint8", size: 8)
|
|
609
|
+
CHECK: [[DBG46:.+]] = !DILocalVariable(
|
|
610
|
+
CHECK-SAME: name: "g"
|
|
611
|
+
CHECK-SAME: type: [[DBG45]]
|
|
612
|
+
CHECK: [[DBG47:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint16", size: 16)
|
|
613
|
+
CHECK: [[DBG48:.+]] = !DILocalVariable(
|
|
614
|
+
CHECK-SAME: name: "h"
|
|
615
|
+
CHECK-SAME: type: [[DBG47]]
|
|
616
|
+
CHECK: [[DBG49:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint32", size: 32)
|
|
617
|
+
CHECK: [[DBG50:.+]] = !DILocalVariable(
|
|
618
|
+
CHECK-SAME: name: "i"
|
|
619
|
+
CHECK-SAME: type: [[DBG49]]
|
|
620
|
+
CHECK: [[DBG51:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "uint64", size: 64)
|
|
621
|
+
CHECK: [[DBG52:.+]] = !DILocalVariable(
|
|
622
|
+
CHECK-SAME: name: "j"
|
|
623
|
+
CHECK-SAME: type: [[DBG51]]
|
|
624
|
+
CHECK: [[DBG53:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
|
|
625
|
+
CHECK: [[DBG54:.+]] = !DIDerivedType(
|
|
626
|
+
CHECK-SAME: baseType: [[DBG53]]
|
|
627
|
+
CHECK-SAME: name: "real"
|
|
628
|
+
CHECK-SAME: offset: 0
|
|
629
|
+
CHECK-SAME: size: 32
|
|
630
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
631
|
+
CHECK: [[DBG55:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float", size: 32)
|
|
632
|
+
CHECK: [[DBG56:.+]] = !DIDerivedType(
|
|
633
|
+
CHECK-SAME: baseType: [[DBG55]]
|
|
634
|
+
CHECK-SAME: name: "imag"
|
|
635
|
+
CHECK-SAME: offset: 32
|
|
636
|
+
CHECK-SAME: size: 32
|
|
637
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
638
|
+
CHECK: [[DBG57:.+]] = !{ [[DBG54]], [[DBG56]] }
|
|
639
|
+
CHECK: [[DBG58:.+]] = distinct !DICompositeType(
|
|
640
|
+
CHECK-SAME: elements: [[DBG57]]
|
|
641
|
+
CHECK-SAME: name: "complex64 ({float, float})"
|
|
642
|
+
CHECK-SAME: size: 64
|
|
643
|
+
CHECK-SAME: tag: DW_TAG_structure_type
|
|
644
|
+
CHECK: [[DBG59:.+]] = !DILocalVariable(
|
|
645
|
+
CHECK-SAME: name: "k"
|
|
646
|
+
CHECK-SAME: type: [[DBG58]]
|
|
647
|
+
CHECK: [[DBG60:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
|
|
648
|
+
CHECK: [[DBG61:.+]] = !DIDerivedType(
|
|
649
|
+
CHECK-SAME: baseType: [[DBG60]]
|
|
650
|
+
CHECK-SAME: name: "real"
|
|
651
|
+
CHECK-SAME: offset: 0
|
|
652
|
+
CHECK-SAME: size: 64
|
|
653
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
654
|
+
CHECK: [[DBG62:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "double", size: 64)
|
|
655
|
+
CHECK: [[DBG63:.+]] = !DIDerivedType(
|
|
656
|
+
CHECK-SAME: baseType: [[DBG62]]
|
|
657
|
+
CHECK-SAME: name: "imag"
|
|
658
|
+
CHECK-SAME: offset: 64
|
|
659
|
+
CHECK-SAME: size: 64
|
|
660
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
661
|
+
CHECK: [[DBG64:.+]] = !{ [[DBG61]], [[DBG63]] }
|
|
662
|
+
CHECK: [[DBG65:.+]] = distinct !DICompositeType(
|
|
663
|
+
CHECK-SAME: elements: [[DBG64]]
|
|
664
|
+
CHECK-SAME: name: "complex128 ({double, double})"
|
|
665
|
+
CHECK-SAME: size: 128
|
|
666
|
+
CHECK-SAME: tag: DW_TAG_structure_type
|
|
667
|
+
CHECK: [[DBG66:.+]] = !DILocalVariable(
|
|
668
|
+
CHECK-SAME: name: "l"
|
|
669
|
+
CHECK-SAME: type: [[DBG65]]
|
|
670
|
+
"""
|
|
671
|
+
pass
|
|
672
|
+
|
|
673
|
+
ir = foo.inspect_llvm()[sig]
|
|
674
|
+
self.assertFileCheckMatches(ir, foo.__doc__)
|
|
675
|
+
|
|
676
|
+
def test_arrays(self):
|
|
677
|
+
"""Tests that arrays are emitted as DIBasicType."""
|
|
678
|
+
sig = (types.float32[::1],)
|
|
679
|
+
|
|
680
|
+
@cuda.jit(sig, debug=True)
|
|
681
|
+
def foo(a):
|
|
682
|
+
"""
|
|
683
|
+
CHECK: distinct !DICompileUnit
|
|
684
|
+
CHECK: distinct !DISubprogram
|
|
685
|
+
CHECK: [[DBG127:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
|
|
686
|
+
CHECK: [[DBG128:.+]] = !DIDerivedType(
|
|
687
|
+
CHECK-SAME: baseType: [[DBG127]]
|
|
688
|
+
CHECK-SAME: size: 64
|
|
689
|
+
CHECK-SAME: tag: DW_TAG_pointer_type
|
|
690
|
+
CHECK-SAME: )
|
|
691
|
+
CHECK: [[DBG129:.+]] = !DIDerivedType(
|
|
692
|
+
CHECK-SAME: baseType: [[DBG128]]
|
|
693
|
+
CHECK-SAME: name: "meminfo"
|
|
694
|
+
CHECK-SAME: offset: 0
|
|
695
|
+
CHECK-SAME: size: 64
|
|
696
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
697
|
+
CHECK-SAME: )
|
|
698
|
+
CHECK: [[DBG130:.+]] = !DIBasicType(encoding: DW_ATE_boolean, name: "i8", size: 8)
|
|
699
|
+
CHECK: [[DBG131:.+]] = !DIDerivedType(
|
|
700
|
+
CHECK-SAME: baseType: [[DBG130]]
|
|
701
|
+
CHECK-SAME: size: 64
|
|
702
|
+
CHECK-SAME: tag: DW_TAG_pointer_type
|
|
703
|
+
CHECK-SAME: )
|
|
704
|
+
CHECK: [[DBG132:.+]] = !DIDerivedType(
|
|
705
|
+
CHECK-SAME: baseType: [[DBG131]]
|
|
706
|
+
CHECK-SAME: name: "parent"
|
|
707
|
+
CHECK-SAME: offset: 64
|
|
708
|
+
CHECK-SAME: size: 64
|
|
709
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
710
|
+
CHECK-SAME: )
|
|
711
|
+
CHECK: [[DBG133:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
|
|
712
|
+
CHECK: [[DBG134:.+]] = !DIDerivedType(
|
|
713
|
+
CHECK-SAME: baseType: [[DBG133]]
|
|
714
|
+
CHECK-SAME: name: "nitems"
|
|
715
|
+
CHECK-SAME: offset: 128
|
|
716
|
+
CHECK-SAME: size: 64
|
|
717
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
718
|
+
CHECK-SAME: )
|
|
719
|
+
CHECK: [[DBG135:.+]] = !DIBasicType(encoding: DW_ATE_signed, name: "int64", size: 64)
|
|
720
|
+
CHECK: [[DBG136:.+]] = !DIDerivedType(
|
|
721
|
+
CHECK-SAME: baseType: [[DBG135]]
|
|
722
|
+
CHECK-SAME: name: "itemsize"
|
|
723
|
+
CHECK-SAME: offset: 192
|
|
724
|
+
CHECK-SAME: size: 64
|
|
725
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
726
|
+
CHECK-SAME: )
|
|
727
|
+
CHECK: [[DBG137:.+]] = !DIBasicType(encoding: DW_ATE_float, name: "float32", size: 32)
|
|
728
|
+
CHECK: [[DBG138:.+]] = !DIDerivedType(
|
|
729
|
+
CHECK-SAME: baseType: [[DBG137]]
|
|
730
|
+
CHECK-SAME: size: 64
|
|
731
|
+
CHECK-SAME: tag: DW_TAG_pointer_type
|
|
732
|
+
CHECK-SAME: )
|
|
733
|
+
CHECK: [[DBG139:.+]] = !DIDerivedType(
|
|
734
|
+
CHECK-SAME: baseType: [[DBG138]]
|
|
735
|
+
CHECK-SAME: name: "data"
|
|
736
|
+
CHECK-SAME: offset: 256
|
|
737
|
+
CHECK-SAME: size: 64
|
|
738
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
739
|
+
CHECK-SAME: )
|
|
740
|
+
CHECK: [[DBG140:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "i64", size: 64)
|
|
741
|
+
CHECK: [[DBG141:.+]] = !DICompositeType(
|
|
742
|
+
CHECK-SAME: baseType: [[DBG140]]
|
|
743
|
+
CHECK-SAME: identifier: "[1 x i64]"
|
|
744
|
+
CHECK-SAME: name: "UniTuple(int64 x 1) ([1 x i64])"
|
|
745
|
+
CHECK-SAME: tag: DW_TAG_array_type
|
|
746
|
+
CHECK-SAME: )
|
|
747
|
+
CHECK: [[DBG142:.+]] = !DIDerivedType(
|
|
748
|
+
CHECK-SAME: baseType: [[DBG141]]
|
|
749
|
+
CHECK-SAME: name: "shape"
|
|
750
|
+
CHECK-SAME: offset: 320
|
|
751
|
+
CHECK-SAME: size: 64
|
|
752
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
753
|
+
CHECK-SAME: )
|
|
754
|
+
CHECK: [[DBG143:.+]] = !DIBasicType(encoding: DW_ATE_unsigned, name: "i64", size: 64)
|
|
755
|
+
CHECK: [[DBG144:.+]] = !DICompositeType(
|
|
756
|
+
CHECK-SAME: baseType: [[DBG143]]
|
|
757
|
+
CHECK-SAME: identifier: "[1 x i64]"
|
|
758
|
+
CHECK-SAME: name: "UniTuple(int64 x 1) ([1 x i64])"
|
|
759
|
+
CHECK-SAME: size: 64
|
|
760
|
+
CHECK-SAME: tag: DW_TAG_array_type
|
|
761
|
+
CHECK-SAME: )
|
|
762
|
+
CHECK: [[DBG145:.+]] = !DIDerivedType(
|
|
763
|
+
CHECK-SAME: baseType: [[DBG144]]
|
|
764
|
+
CHECK-SAME: name: "strides"
|
|
765
|
+
CHECK-SAME: offset: 384
|
|
766
|
+
CHECK-SAME: size: 64
|
|
767
|
+
CHECK-SAME: tag: DW_TAG_member
|
|
768
|
+
CHECK-SAME: )
|
|
769
|
+
CHECK: [[DBG146:.+]] = !{ [[DBG129]], [[DBG132]], [[DBG134]], [[DBG136]], [[DBG139]], [[DBG142]], [[DBG145]] }
|
|
770
|
+
CHECK: [[DBG147:.+]] = distinct !DICompositeType(
|
|
771
|
+
CHECK-SAME: elements: [[DBG146]]
|
|
772
|
+
CHECK-SAME: identifier: "{i8*, i8*, i64, i64, float*, [1 x i64], [1 x i64]}"
|
|
773
|
+
CHECK-SAME: name: "array(float32, 1d, C) ({i8*, i8*, i64, i64, float*, [1 x i64], [1 x i64]})"
|
|
774
|
+
CHECK-SAME: size: 448
|
|
775
|
+
CHECK-SAME: tag: DW_TAG_structure_type
|
|
776
|
+
CHECK-SAME: )
|
|
777
|
+
CHECK: !DILocalVariable(
|
|
778
|
+
CHECK-SAME: name: "a"
|
|
779
|
+
CHECK-SAME: type: [[DBG147]]
|
|
780
|
+
CHECK-SAME: )
|
|
781
|
+
"""
|
|
782
|
+
pass
|
|
783
|
+
|
|
784
|
+
ir = foo.inspect_llvm()[sig]
|
|
785
|
+
self.assertFileCheckMatches(ir, foo.__doc__)
|
|
786
|
+
|
|
406
787
|
|
|
407
788
|
if __name__ == "__main__":
|
|
408
789
|
unittest.main()
|
|
@@ -8,7 +8,7 @@ from numba import int16, int32
|
|
|
8
8
|
from numba import cuda, vectorize, njit
|
|
9
9
|
from numba.core import types
|
|
10
10
|
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
|
|
11
|
-
from numba.tests.enum_usecases import (
|
|
11
|
+
from numba.cuda.tests.enum_usecases import (
|
|
12
12
|
Color,
|
|
13
13
|
Shape,
|
|
14
14
|
Planet,
|
|
@@ -1,12 +1,18 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import cffi
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
|
|
3
6
|
from io import StringIO
|
|
4
7
|
from numba import cuda, float32, float64, int32, intp
|
|
8
|
+
from numba.types import float16, CPointer
|
|
9
|
+
from numba.cuda import declare_device
|
|
5
10
|
from numba.cuda.testing import unittest, CUDATestCase
|
|
6
11
|
from numba.cuda.testing import (
|
|
7
12
|
skip_on_cudasim,
|
|
8
13
|
skip_with_nvdisasm,
|
|
9
14
|
skip_without_nvdisasm,
|
|
15
|
+
skip_if_nvjitlink_missing,
|
|
10
16
|
)
|
|
11
17
|
|
|
12
18
|
|
|
@@ -21,6 +27,17 @@ class TestInspect(CUDATestCase):
|
|
|
21
27
|
|
|
22
28
|
@cuda.jit(sig)
|
|
23
29
|
def foo(x, y):
|
|
30
|
+
"""
|
|
31
|
+
// LLVM: define void
|
|
32
|
+
// LLVM-SAME: foo
|
|
33
|
+
// LLVM-LABEL: entry:
|
|
34
|
+
// LLVM-NEXT: br label %"[[VAL_0:.*]]"
|
|
35
|
+
// LLVM-NEXT: [[VAL_0]]:
|
|
36
|
+
// LLVM-NEXT: ret void
|
|
37
|
+
|
|
38
|
+
// ASM: Generated by NVIDIA NVVM Compiler
|
|
39
|
+
// ASM: foo
|
|
40
|
+
"""
|
|
24
41
|
pass
|
|
25
42
|
|
|
26
43
|
file = StringIO()
|
|
@@ -31,28 +48,43 @@ class TestInspect(CUDATestCase):
|
|
|
31
48
|
# Signature in annotation
|
|
32
49
|
self.assertIn("(float32, int32)", typeanno)
|
|
33
50
|
file.close()
|
|
34
|
-
# Function name in LLVM
|
|
35
|
-
llvm = foo.inspect_llvm(sig)
|
|
36
|
-
self.assertIn("foo", llvm)
|
|
37
|
-
|
|
38
|
-
# Kernel in LLVM
|
|
39
|
-
self.assertIn("define void @", llvm)
|
|
40
51
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
# Function name in PTX
|
|
44
|
-
self.assertIn("foo", asm)
|
|
45
|
-
# NVVM inserted comments in PTX
|
|
46
|
-
self.assertIn("Generated by NVIDIA NVVM Compiler", asm)
|
|
52
|
+
self.assertFileCheckLLVM(foo, sig)
|
|
53
|
+
self.assertFileCheckAsm(foo, sig)
|
|
47
54
|
|
|
48
55
|
def test_polytyped(self):
|
|
49
56
|
@cuda.jit
|
|
50
57
|
def foo(x, y):
|
|
58
|
+
"""
|
|
59
|
+
// LLVM: define void
|
|
60
|
+
// LLVM-SAME: foo
|
|
61
|
+
// LLVM_INT-SAME: i64
|
|
62
|
+
// LLVM_INT-SAME: i64
|
|
63
|
+
// LLVM_FLOAT-SAME: double
|
|
64
|
+
// LLVM_FLOAT-SAME: double
|
|
65
|
+
|
|
66
|
+
// ASM: Generated by NVIDIA NVVM Compiler
|
|
67
|
+
// ASM: .visible
|
|
68
|
+
// ASM-SAME: .entry
|
|
69
|
+
// ASM-SAME: foo
|
|
70
|
+
"""
|
|
51
71
|
pass
|
|
52
72
|
|
|
53
73
|
foo[1, 1](1, 1)
|
|
54
74
|
foo[1, 1](1.2, 2.4)
|
|
55
75
|
|
|
76
|
+
int_sig = (intp, intp)
|
|
77
|
+
float_sig = (float64, float64)
|
|
78
|
+
|
|
79
|
+
self.assertFileCheckLLVM(
|
|
80
|
+
foo, int_sig, check_prefixes=["LLVM", "LLVM_INT"]
|
|
81
|
+
)
|
|
82
|
+
self.assertFileCheckAsm(foo, int_sig, check_prefixes=["ASM"])
|
|
83
|
+
self.assertFileCheckLLVM(
|
|
84
|
+
foo, float_sig, check_prefixes=["LLVM", "LLVM_FLOAT"]
|
|
85
|
+
)
|
|
86
|
+
self.assertFileCheckAsm(foo, float_sig, check_prefixes=["ASM"])
|
|
87
|
+
|
|
56
88
|
file = StringIO()
|
|
57
89
|
foo.inspect_types(file=file)
|
|
58
90
|
typeanno = file.getvalue()
|
|
@@ -70,14 +102,6 @@ class TestInspect(CUDATestCase):
|
|
|
70
102
|
self.assertIn((intp, intp), llvmirs)
|
|
71
103
|
self.assertIn((float64, float64), llvmirs)
|
|
72
104
|
|
|
73
|
-
# Function name in LLVM
|
|
74
|
-
self.assertIn("foo", llvmirs[intp, intp])
|
|
75
|
-
self.assertIn("foo", llvmirs[float64, float64])
|
|
76
|
-
|
|
77
|
-
# Kernels in LLVM
|
|
78
|
-
self.assertIn("define void @", llvmirs[intp, intp])
|
|
79
|
-
self.assertIn("define void @", llvmirs[float64, float64])
|
|
80
|
-
|
|
81
105
|
asmdict = foo.inspect_asm()
|
|
82
106
|
|
|
83
107
|
# Signature in assembly dict
|
|
@@ -88,10 +112,6 @@ class TestInspect(CUDATestCase):
|
|
|
88
112
|
self.assertIn((intp, intp), asmdict)
|
|
89
113
|
self.assertIn((float64, float64), asmdict)
|
|
90
114
|
|
|
91
|
-
# NVVM inserted in PTX
|
|
92
|
-
self.assertIn("foo", asmdict[intp, intp])
|
|
93
|
-
self.assertIn("foo", asmdict[float64, float64])
|
|
94
|
-
|
|
95
115
|
def _test_inspect_sass(self, kernel, name, sass):
|
|
96
116
|
# Ensure function appears in output
|
|
97
117
|
seen_function = False
|
|
@@ -108,8 +128,68 @@ class TestInspect(CUDATestCase):
|
|
|
108
128
|
self.assertIn("BRA", sass) # Branch
|
|
109
129
|
self.assertIn("EXIT", sass) # Exit program
|
|
110
130
|
|
|
131
|
+
@skip_on_cudasim("Simulator does not generate code to be inspected")
|
|
132
|
+
@skip_if_nvjitlink_missing("nvJitLink is required for LTO")
|
|
133
|
+
def test_inspect_lto_asm(self):
|
|
134
|
+
ffi = cffi.FFI()
|
|
135
|
+
|
|
136
|
+
ext = cuda.CUSource("""
|
|
137
|
+
#include <cuda_fp16.h>
|
|
138
|
+
extern "C"
|
|
139
|
+
__device__ int add_f2_f2(__half * res, __half * a, __half *b) {
|
|
140
|
+
*res = *a + *b;
|
|
141
|
+
return 0;
|
|
142
|
+
}
|
|
143
|
+
""")
|
|
144
|
+
|
|
145
|
+
add = declare_device(
|
|
146
|
+
"add_f2_f2",
|
|
147
|
+
float16(CPointer(float16), CPointer(float16)),
|
|
148
|
+
link=ext,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
@cuda.jit
|
|
152
|
+
def k(arr):
|
|
153
|
+
local_arr = cuda.local.array(shape=1, dtype=np.float16)
|
|
154
|
+
local_arr2 = cuda.local.array(shape=1, dtype=np.float16)
|
|
155
|
+
local_arr[0] = 1
|
|
156
|
+
local_arr2[0] = 2
|
|
157
|
+
|
|
158
|
+
ptr = ffi.from_buffer(local_arr)
|
|
159
|
+
ptr2 = ffi.from_buffer(local_arr2)
|
|
160
|
+
|
|
161
|
+
arr[0] = add(ptr, ptr2)
|
|
162
|
+
|
|
163
|
+
arr = np.array([0], dtype=np.float16)
|
|
164
|
+
|
|
165
|
+
k[1, 1](arr)
|
|
166
|
+
|
|
167
|
+
allasms = k.inspect_asm()
|
|
168
|
+
asm = next(iter(allasms.values()))
|
|
169
|
+
|
|
170
|
+
regex = re.compile(r"call(.|\n)*add_f2_f2")
|
|
171
|
+
self.assertRegex(asm, regex)
|
|
172
|
+
|
|
173
|
+
all_ext_asms = k.inspect_lto_ptx()
|
|
174
|
+
lto_asm = next(iter(all_ext_asms.values()))
|
|
175
|
+
|
|
176
|
+
self.assertIn("add.f16", lto_asm)
|
|
177
|
+
self.assertNotIn("call", lto_asm)
|
|
178
|
+
|
|
179
|
+
np.testing.assert_equal(arr[0], np.float16(1) + np.float16(2))
|
|
180
|
+
|
|
181
|
+
def skip_on_cuda_version_issues(self):
|
|
182
|
+
# FIXME: This should be unskipped once the cause of certain nvdisasm
|
|
183
|
+
# versions failing to dump SASS with certain driver / nvJitLink
|
|
184
|
+
# versions is understood
|
|
185
|
+
self.skipTest(
|
|
186
|
+
"Relocation information required for analysis not preserved"
|
|
187
|
+
)
|
|
188
|
+
|
|
111
189
|
@skip_without_nvdisasm("nvdisasm needed for inspect_sass()")
|
|
112
190
|
def test_inspect_sass_eager(self):
|
|
191
|
+
self.skip_on_cuda_version_issues()
|
|
192
|
+
|
|
113
193
|
sig = (float32[::1], int32[::1])
|
|
114
194
|
|
|
115
195
|
@cuda.jit(sig, lineinfo=True)
|
|
@@ -122,6 +202,8 @@ class TestInspect(CUDATestCase):
|
|
|
122
202
|
|
|
123
203
|
@skip_without_nvdisasm("nvdisasm needed for inspect_sass()")
|
|
124
204
|
def test_inspect_sass_lazy(self):
|
|
205
|
+
self.skip_on_cuda_version_issues()
|
|
206
|
+
|
|
125
207
|
@cuda.jit(lineinfo=True)
|
|
126
208
|
def add(x, y):
|
|
127
209
|
i = cuda.grid(1)
|
|
@@ -150,6 +232,8 @@ class TestInspect(CUDATestCase):
|
|
|
150
232
|
|
|
151
233
|
@skip_without_nvdisasm("nvdisasm needed for inspect_sass_cfg()")
|
|
152
234
|
def test_inspect_sass_cfg(self):
|
|
235
|
+
self.skip_on_cuda_version_issues()
|
|
236
|
+
|
|
153
237
|
sig = (float32[::1], int32[::1])
|
|
154
238
|
|
|
155
239
|
@cuda.jit(sig)
|