torch-rb 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -338,7 +338,7 @@
338
338
  dispatch:
339
339
  CompositeExplicitAutograd: abs
340
340
  SparseCPU, SparseCUDA: abs_sparse
341
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr
341
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr
342
342
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs
343
343
  tags: [core, pointwise]
344
344
 
@@ -348,7 +348,7 @@
348
348
  dispatch:
349
349
  CompositeExplicitAutograd: abs_
350
350
  SparseCPU, SparseCUDA: abs_sparse_
351
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_
351
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_
352
352
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs_
353
353
 
354
354
  - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -357,7 +357,7 @@
357
357
  CPU, CUDA: abs_out
358
358
  MPS: abs_out_mps
359
359
  SparseCPU, SparseCUDA: abs_sparse_out
360
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_out
360
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_out
361
361
  tags: pointwise
362
362
 
363
363
  # Note [Adding an alias]
@@ -400,14 +400,14 @@
400
400
  variants: function, method
401
401
  dispatch:
402
402
  CPU, CUDA: angle
403
- SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr
403
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr
404
404
  tags: pointwise
405
405
 
406
406
  - func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
407
407
  device_check: NoCheck # TensorIterator
408
408
  dispatch:
409
409
  CPU, CUDA: angle_out
410
- SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr_out
410
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr_out
411
411
  tags: pointwise
412
412
 
413
413
  - func: view_as_real(Tensor(a) self) -> Tensor(a)
@@ -425,7 +425,7 @@
425
425
  structured_delegate: sgn.out
426
426
  dispatch:
427
427
  SparseCPU, SparseCUDA: sgn_sparse
428
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr
428
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr
429
429
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn
430
430
  tags: pointwise
431
431
 
@@ -434,7 +434,7 @@
434
434
  structured_delegate: sgn.out
435
435
  dispatch:
436
436
  SparseCPU, SparseCUDA: sgn_sparse_
437
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_
437
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_
438
438
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn_
439
439
  tags: pointwise
440
440
 
@@ -445,7 +445,7 @@
445
445
  CPU, CUDA: sgn_out
446
446
  MPS: sgn_out_mps
447
447
  SparseCPU, SparseCUDA: sgn_sparse_out
448
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
448
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_out
449
449
  tags: pointwise
450
450
 
451
451
  - func: chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
@@ -472,7 +472,7 @@
472
472
  variants: function, method
473
473
  dispatch:
474
474
  CompositeExplicitAutograd: _conj_physical
475
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr
475
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr
476
476
  autogen: _conj_physical.out
477
477
 
478
478
  - func: conj_physical(Tensor self) -> Tensor
@@ -484,14 +484,14 @@
484
484
  CPU, CUDA: conj_physical_out
485
485
  MPS: conj_physical_out_mps
486
486
  SparseCPU, SparseCUDA: conj_physical_out_sparse
487
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
487
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_out
488
488
  tags: pointwise
489
489
 
490
490
  - func: conj_physical_(Tensor(a!) self) -> Tensor(a!)
491
491
  variants: function, method
492
492
  dispatch:
493
493
  CompositeExplicitAutograd: conj_physical_
494
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_
494
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_
495
495
  tags: pointwise
496
496
 
497
497
  - func: resolve_conj(Tensor(a) self) -> Tensor(a)
@@ -537,9 +537,11 @@
537
537
 
538
538
  - func: avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor
539
539
  tags: core
540
+ autogen: avg_pool1d.out
540
541
 
541
542
  - func: adaptive_avg_pool1d(Tensor self, int[1] output_size) -> Tensor
542
543
  tags: core
544
+ autogen: adaptive_avg_pool1d.out
543
545
 
544
546
  # Return: (Tensor output, Tensor indices)
545
547
  - func: adaptive_max_pool1d(Tensor self, int[1] output_size) -> (Tensor, Tensor)
@@ -694,6 +696,9 @@
694
696
  device_check: NoCheck # TensorIterator
695
697
  structured_delegate: all.out
696
698
  variants: function, method
699
+ dispatch:
700
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_all
701
+
697
702
 
698
703
  - func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
699
704
  device_check: NoCheck # TensorIterator
@@ -863,7 +868,7 @@
863
868
  structured_delegate: asinh.out
864
869
  dispatch:
865
870
  SparseCPU, SparseCUDA: asinh_sparse
866
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr
871
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr
867
872
  tags: [core, pointwise]
868
873
 
869
874
  - func: asinh_(Tensor(a!) self) -> Tensor(a!)
@@ -871,7 +876,7 @@
871
876
  structured_delegate: asinh.out
872
877
  dispatch:
873
878
  SparseCPU, SparseCUDA: asinh_sparse_
874
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_
879
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_
875
880
  tags: pointwise
876
881
 
877
882
  - func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -881,7 +886,7 @@
881
886
  CPU, CUDA: asinh_out
882
887
  MPS: asinh_out_mps
883
888
  SparseCPU, SparseCUDA: asinh_sparse_out
884
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_out
889
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_out
885
890
  tags: pointwise
886
891
 
887
892
  # arcsinh, alias for asinh
@@ -898,7 +903,7 @@
898
903
  variants: function, method
899
904
  dispatch:
900
905
  SparseCPU, SparseCUDA: atanh_sparse
901
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr
906
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr
902
907
  tags: [core, pointwise]
903
908
 
904
909
  - func: atanh_(Tensor(a!) self) -> Tensor(a!)
@@ -906,7 +911,7 @@
906
911
  variants: function, method
907
912
  dispatch:
908
913
  SparseCPU, SparseCUDA: atanh_sparse_
909
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_
914
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_
910
915
  tags: pointwise
911
916
 
912
917
  - func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -916,7 +921,7 @@
916
921
  CPU, CUDA: atanh_out
917
922
  MPS: atanh_out_mps
918
923
  SparseCPU, SparseCUDA: atanh_sparse_out
919
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_out
924
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_out
920
925
  tags: pointwise
921
926
  # arctanh, alias for atanh
922
927
 
@@ -954,7 +959,7 @@
954
959
  structured_delegate: asin.out
955
960
  dispatch:
956
961
  SparseCPU, SparseCUDA: asin_sparse
957
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr
962
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr
958
963
  tags: [core, pointwise]
959
964
 
960
965
  - func: asin_(Tensor(a!) self) -> Tensor(a!)
@@ -963,7 +968,7 @@
963
968
  structured_delegate: asin.out
964
969
  dispatch:
965
970
  SparseCPU, SparseCUDA: asin_sparse_
966
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_
971
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_
967
972
  tags: pointwise
968
973
 
969
974
  - func: asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -974,7 +979,7 @@
974
979
  CPU, CUDA: asin_out
975
980
  MPS: asin_out_mps
976
981
  SparseCPU, SparseCUDA: asin_sparse_out
977
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_out
982
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_out
978
983
  tags: pointwise
979
984
 
980
985
  # arcsin, alias of asin
@@ -992,7 +997,7 @@
992
997
  variants: function, method
993
998
  dispatch:
994
999
  SparseCPU, SparseCUDA: atan_sparse
995
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr
1000
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr
996
1001
  tags: [core, pointwise]
997
1002
 
998
1003
  - func: atan_(Tensor(a!) self) -> Tensor(a!)
@@ -1001,7 +1006,7 @@
1001
1006
  variants: function, method
1002
1007
  dispatch:
1003
1008
  SparseCPU, SparseCUDA: atan_sparse_
1004
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_
1009
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_
1005
1010
  tags: pointwise
1006
1011
 
1007
1012
  - func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -1012,7 +1017,7 @@
1012
1017
  CPU, CUDA: atan_out
1013
1018
  MPS: atan_out_mps
1014
1019
  SparseCPU, SparseCUDA: atan_sparse_out
1015
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_out
1020
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_out
1016
1021
  tags: pointwise
1017
1022
 
1018
1023
  # arctan, alias of atan
@@ -1423,7 +1428,7 @@
1423
1428
  variants: function, method
1424
1429
  dispatch:
1425
1430
  SparseCPU, SparseCUDA: ceil_sparse
1426
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr
1431
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr
1427
1432
  tags: [core, pointwise]
1428
1433
 
1429
1434
  - func: ceil_(Tensor(a!) self) -> Tensor(a!)
@@ -1432,7 +1437,7 @@
1432
1437
  variants: function, method
1433
1438
  dispatch:
1434
1439
  SparseCPU, SparseCUDA: ceil_sparse_
1435
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_
1440
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_
1436
1441
  tags: pointwise
1437
1442
 
1438
1443
  - func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -1443,7 +1448,7 @@
1443
1448
  CPU, CUDA: ceil_out
1444
1449
  MPS: ceil_out_mps
1445
1450
  SparseCPU, SparseCUDA: ceil_sparse_out
1446
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_out
1451
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
1447
1452
  tags: pointwise
1448
1453
 
1449
1454
  # alias for torch.linalg.multi_dot
@@ -1762,7 +1767,7 @@
1762
1767
  MkldnnCPU: copy_mkldnn_
1763
1768
  SparseCPU, SparseCUDA: copy_sparse_wrapper_
1764
1769
  CompositeExplicitAutograd: copy_
1765
- SparseCsrCPU, SparseCsrCUDA: copy_sparse_compressed_
1770
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: copy_sparse_compressed_
1766
1771
  NestedTensorCPU, NestedTensorCUDA: copy_nested_
1767
1772
  autogen: copy.out
1768
1773
 
@@ -2338,7 +2343,7 @@
2338
2343
 
2339
2344
  - func: _embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
2340
2345
  dispatch:
2341
- CompositeImplicitAutograd: _embedding_bag_backward_symint
2346
+ CPU, CUDA: _embedding_bag_backward_symint
2342
2347
 
2343
2348
  - func: _embedding_bag_sparse_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
2344
2349
  dispatch:
@@ -2370,8 +2375,10 @@
2370
2375
  MPS: empty_mps
2371
2376
  Meta: empty_meta_symint
2372
2377
  MkldnnCPU: empty_mkldnn
2373
- SparseCPU, SparseCUDA, SparseMeta: empty_sparse
2374
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
2378
+ SparseCPU, SparseCUDA: empty_sparse
2379
+ SparseMeta: empty_sparse_symint
2380
+ SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
2381
+ SparseCsrMeta: empty_sparse_compressed_symint
2375
2382
  QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
2376
2383
  tags: core
2377
2384
 
@@ -2446,7 +2453,7 @@
2446
2453
  CUDA: resize_cuda_
2447
2454
  MPS: resize_mps_
2448
2455
  QuantizedCPU: quantized_resize_cpu_
2449
- SparseCsrCPU, SparseCsrCUDA: resize_sparse_csr_
2456
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: resize_sparse_csr_
2450
2457
  autogen: resize, resize.out
2451
2458
 
2452
2459
  # This is a utility function to enable users to resize out tensor while registering kernels for out variants.
@@ -2497,7 +2504,7 @@
2497
2504
  variants: function, method
2498
2505
  dispatch:
2499
2506
  SparseCPU, SparseCUDA: erf_sparse
2500
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr
2507
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr
2501
2508
  tags: [core, pointwise]
2502
2509
 
2503
2510
  - func: erf_(Tensor(a!) self) -> Tensor(a!)
@@ -2506,7 +2513,7 @@
2506
2513
  variants: function, method
2507
2514
  dispatch:
2508
2515
  SparseCPU, SparseCUDA: erf_sparse_
2509
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_
2516
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_
2510
2517
  tags: pointwise
2511
2518
 
2512
2519
  - func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2517,7 +2524,7 @@
2517
2524
  CPU, CUDA: erf_out
2518
2525
  MPS: erf_out_mps
2519
2526
  SparseCPU, SparseCUDA: erf_sparse_out
2520
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_out
2527
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_out
2521
2528
  tags: pointwise
2522
2529
 
2523
2530
  - func: erfc(Tensor self) -> Tensor
@@ -2585,7 +2592,7 @@
2585
2592
  variants: function, method
2586
2593
  dispatch:
2587
2594
  SparseCPU, SparseCUDA: expm1_sparse
2588
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
2595
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr
2589
2596
  tags: [core, pointwise]
2590
2597
 
2591
2598
  - func: expm1_(Tensor(a!) self) -> Tensor(a!)
@@ -2594,7 +2601,7 @@
2594
2601
  variants: function, method
2595
2602
  dispatch:
2596
2603
  SparseCPU, SparseCUDA: expm1_sparse_
2597
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_
2604
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_
2598
2605
  tags: pointwise
2599
2606
 
2600
2607
  - func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2605,7 +2612,7 @@
2605
2612
  CPU, CUDA: expm1_out
2606
2613
  MPS: expm1_out_mps
2607
2614
  SparseCPU, SparseCUDA: expm1_sparse_out
2608
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_out
2615
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_out
2609
2616
  tags: pointwise
2610
2617
 
2611
2618
  - func: expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)
@@ -2683,7 +2690,7 @@
2683
2690
  MPS: fill_scalar_mps
2684
2691
  QuantizedCPU, QuantizedCUDA: fill_quantized_
2685
2692
  Meta: fill_meta_
2686
- SparseCsrCPU, SparseCsrCUDA: fill_sparse_csr_
2693
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: fill_sparse_csr_
2687
2694
  NestedTensorCPU, NestedTensorCUDA: fill_nested_
2688
2695
  autogen: fill.Scalar_out
2689
2696
 
@@ -2704,7 +2711,7 @@
2704
2711
  variants: function, method
2705
2712
  dispatch:
2706
2713
  SparseCPU, SparseCUDA: floor_sparse
2707
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr
2714
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr
2708
2715
  tags: [core, pointwise]
2709
2716
 
2710
2717
  - func: floor_(Tensor(a!) self) -> Tensor(a!)
@@ -2713,7 +2720,7 @@
2713
2720
  variants: function, method
2714
2721
  dispatch:
2715
2722
  SparseCPU, SparseCUDA: floor_sparse_
2716
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_
2723
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_
2717
2724
  tags: pointwise
2718
2725
 
2719
2726
  - func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2724,7 +2731,7 @@
2724
2731
  CPU, CUDA: floor_out
2725
2732
  MPS: floor_out_mps
2726
2733
  SparseCPU, SparseCUDA: floor_sparse_out
2727
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_out
2734
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
2728
2735
  tags: pointwise
2729
2736
 
2730
2737
  - func: floor_divide(Tensor self, Tensor other) -> Tensor
@@ -2769,7 +2776,7 @@
2769
2776
  variants: function, method
2770
2777
  dispatch:
2771
2778
  SparseCPU, SparseCUDA: frac_sparse
2772
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr
2779
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr
2773
2780
  tags: pointwise
2774
2781
 
2775
2782
  - func: frac_(Tensor(a!) self) -> Tensor(a!)
@@ -2778,7 +2785,7 @@
2778
2785
  variants: function, method
2779
2786
  dispatch:
2780
2787
  SparseCPU, SparseCUDA: frac_sparse_
2781
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr_
2788
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_
2782
2789
  tags: pointwise
2783
2790
 
2784
2791
  - func: frac.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2789,7 +2796,7 @@
2789
2796
  CPU, CUDA: frac_out
2790
2797
  MPS: frac_out_mps
2791
2798
  SparseCPU, SparseCUDA: frac_sparse_out
2792
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr_out
2799
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_out
2793
2800
  tags: pointwise
2794
2801
 
2795
2802
  - func: full.names(int[] size, Scalar fill_value, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -3061,6 +3068,18 @@
3061
3068
  dispatch:
3062
3069
  CompositeExplicitAutograd: _unsafe_index
3063
3070
 
3071
+ # Used by inductor to generate masked loads
3072
+ # Note that we don't support boolean indexing, to avoid dynamic output shapes
3073
+ - func: _unsafe_masked_index(Tensor self, Tensor mask, Tensor?[] indices, Scalar fill) -> Tensor
3074
+ variants: function
3075
+ dispatch:
3076
+ CompositeExplicitAutograd: _unsafe_masked_index
3077
+
3078
+ - func: _unsafe_masked_index_put_accumulate(Tensor self, Tensor mask, Tensor?[] indices, Tensor values) -> Tensor
3079
+ variants: function
3080
+ dispatch:
3081
+ CompositeExplicitAutograd: _unsafe_masked_index_put_accumulate
3082
+
3064
3083
  - func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
3065
3084
  structured: True
3066
3085
  variants: function
@@ -3161,7 +3180,7 @@
3161
3180
  dispatch:
3162
3181
  CPU, CUDA, MPS: isnan
3163
3182
  SparseCPU, SparseCUDA: isnan_sparse
3164
- SparseCsrCPU, SparseCsrCUDA: isnan_sparse_csr
3183
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isnan_sparse_csr
3165
3184
  autogen: isnan.out
3166
3185
  tags: [core, pointwise]
3167
3186
 
@@ -3381,6 +3400,10 @@
3381
3400
 
3382
3401
  - func: fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor
3383
3402
 
3403
+ - func: _wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor
3404
+
3405
+ - func: _wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
3406
+
3384
3407
  - func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
3385
3408
 
3386
3409
  - func: fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
@@ -3487,7 +3510,7 @@
3487
3510
  variants: function, method
3488
3511
  dispatch:
3489
3512
  SparseCPU, SparseCUDA: log1p_sparse
3490
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
3513
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr
3491
3514
  tags: [core, pointwise]
3492
3515
 
3493
3516
  - func: log1p_(Tensor(a!) self) -> Tensor(a!)
@@ -3496,7 +3519,7 @@
3496
3519
  variants: function, method
3497
3520
  dispatch:
3498
3521
  SparseCPU, SparseCUDA: log1p_sparse_
3499
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_
3522
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_
3500
3523
  tags: pointwise
3501
3524
 
3502
3525
  - func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -3507,7 +3530,7 @@
3507
3530
  CPU, CUDA: log1p_out
3508
3531
  MPS: log1p_out_mps
3509
3532
  SparseCPU, SparseCUDA: log1p_sparse_out
3510
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_out
3533
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_out
3511
3534
  tags: pointwise
3512
3535
 
3513
3536
  - func: log2(Tensor self) -> Tensor
@@ -3899,11 +3922,10 @@
3899
3922
  tags: core
3900
3923
 
3901
3924
  # For normal naming convention this should be `mean.out`. However since we already have `mean.out` we have to rename this.
3902
- # FIXME: fix CI jobs and re-enable this
3903
- #- func: mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
3904
- # device_check: NoCheck # TensorIterator
3905
- # dispatch:
3906
- # CompositeExplicitAutograd: mean_dtype_out
3925
+ - func: mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
3926
+ device_check: NoCheck # TensorIterator
3927
+ dispatch:
3928
+ CompositeExplicitAutograd: mean_dtype_out
3907
3929
 
3908
3930
  - func: mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
3909
3931
  structured_delegate: mean.out
@@ -4095,7 +4117,7 @@
4095
4117
  variants: function, method
4096
4118
  dispatch:
4097
4119
  SparseCPU, SparseCUDA: _sparse_mm
4098
- SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm
4120
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm
4099
4121
  tags: core
4100
4122
 
4101
4123
  - func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
@@ -4105,7 +4127,7 @@
4105
4127
  CUDA: mm_out_cuda
4106
4128
  MPS: mm_out_mps
4107
4129
  SparseCPU, SparseCUDA: _sparse_mm_out
4108
- SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm_out
4130
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm_out
4109
4131
 
4110
4132
  - func: _int_mm(Tensor self, Tensor mat2) -> Tensor
4111
4133
  dispatch:
@@ -4121,6 +4143,7 @@
4121
4143
  dispatch:
4122
4144
  CPU: _convert_weight_to_int4pack_cpu
4123
4145
  CUDA: _convert_weight_to_int4pack_cuda
4146
+ MPS: _convert_weight_to_int4pack_mps
4124
4147
 
4125
4148
  - func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
4126
4149
  dispatch:
@@ -4165,7 +4188,7 @@
4165
4188
  variants: function, method
4166
4189
  dispatch:
4167
4190
  SparseCPU, SparseCUDA: mul_sparse
4168
- SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr
4191
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr
4169
4192
  MkldnnCPU: mkldnn_mul
4170
4193
  ZeroTensor: mul_zerotensor
4171
4194
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
@@ -4177,7 +4200,7 @@
4177
4200
  variants: method
4178
4201
  dispatch:
4179
4202
  SparseCPU, SparseCUDA: mul_sparse_
4180
- SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr_
4203
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr_
4181
4204
  MkldnnCPU: mkldnn_mul_
4182
4205
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Tensor
4183
4206
  tags: pointwise
@@ -4191,7 +4214,7 @@
4191
4214
  MPS: mul_out_mps
4192
4215
  SparseCPU: mul_out_sparse_cpu
4193
4216
  SparseCUDA: mul_out_sparse_cuda
4194
- SparseCsrCPU, SparseCsrCUDA: mul_out_sparse_csr
4217
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr
4195
4218
  MkldnnCPU: mkldnn_mul_out
4196
4219
  tags: pointwise
4197
4220
  # For C++ only, until we have conversion from C++ numbers to Tensor
@@ -4201,7 +4224,7 @@
4201
4224
  variants: function, method
4202
4225
  dispatch:
4203
4226
  CompositeExplicitAutograd: mul
4204
- SparseCsrCPU, SparseCsrCUDA: mul_scalar_sparse_csr
4227
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_scalar_sparse_csr
4205
4228
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Scalar
4206
4229
  tags: [core, pointwise]
4207
4230
 
@@ -4210,7 +4233,7 @@
4210
4233
  variants: method
4211
4234
  dispatch:
4212
4235
  CompositeExplicitAutograd: mul_
4213
- SparseCsrCPU, SparseCsrCUDA: mul__scalar_sparse_csr
4236
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul__scalar_sparse_csr
4214
4237
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Scalar
4215
4238
  autogen: mul.Scalar_out
4216
4239
  tags: pointwise
@@ -4530,9 +4553,11 @@
4530
4553
  - func: is_pinned(Tensor self, Device? device=None) -> bool
4531
4554
  variants: method
4532
4555
  dispatch:
4533
- NestedTensorCUDA, CUDA: is_pinned_cuda
4534
- MPS: is_pinned_mps
4535
- CompositeExplicitAutograd: is_pinned_default
4556
+ # the NestedTensor keys are necessary because NestedTensor has been removed
4557
+ # from the CompositeExplicitAutograd keyset see Note [NestedTensor Not Included in Backend Keys]
4558
+ CompositeExplicitAutograd, NestedTensorCPU: is_pinned
4559
+ SparseCsrCPU: is_pinned_sparse_compressed
4560
+ SparseCPU: is_pinned_sparse_coo
4536
4561
 
4537
4562
  # TODO: add a copy kwarg that guarantees that the tensor is put into fresh
4538
4563
  # pinned memory
@@ -4542,9 +4567,10 @@
4542
4567
  # Unlike pin_memory, this is guaranteed to give a new non-aliasing tensor
4543
4568
  - func: _pin_memory(Tensor self, Device? device=None) -> Tensor
4544
4569
  dispatch:
4545
- CUDA: _pin_memory_cuda
4546
- MPS: _pin_memory_mps
4547
- NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
4570
+ CompositeExplicitAutograd: _pin_memory
4571
+ NestedTensorCPU: _pin_memory_nested
4572
+ SparseCPU: _pin_memory_sparse_coo
4573
+ SparseCsrCPU: _pin_memory_sparse_compressed
4548
4574
  autogen: _pin_memory.out
4549
4575
 
4550
4576
  - func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
@@ -4558,27 +4584,27 @@
4558
4584
  dispatch:
4559
4585
  CompositeExplicitAutograd: rad2deg
4560
4586
  SparseCPU, SparseCUDA: rad2deg_sparse
4561
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr
4587
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr
4562
4588
 
4563
4589
  - func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
4564
4590
  variants: function, method
4565
4591
  dispatch:
4566
4592
  CompositeExplicitAutograd: rad2deg_
4567
4593
  SparseCPU, SparseCUDA: rad2deg_sparse_
4568
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_
4594
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_
4569
4595
 
4570
4596
  - func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4571
4597
  dispatch:
4572
4598
  CompositeExplicitAutograd: rad2deg_out
4573
4599
  SparseCPU, SparseCUDA: rad2deg_sparse_out
4574
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_out
4600
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_out
4575
4601
 
4576
4602
  - func: deg2rad(Tensor self) -> Tensor
4577
4603
  variants: function, method
4578
4604
  dispatch:
4579
4605
  CompositeExplicitAutograd: deg2rad
4580
4606
  SparseCPU, SparseCUDA: deg2rad_sparse
4581
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr
4607
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr
4582
4608
  tags: pointwise
4583
4609
 
4584
4610
  - func: deg2rad_(Tensor(a!) self) -> Tensor(a!)
@@ -4586,14 +4612,14 @@
4586
4612
  dispatch:
4587
4613
  CompositeExplicitAutograd: deg2rad_
4588
4614
  SparseCPU, SparseCUDA: deg2rad_sparse_
4589
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr_
4615
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_
4590
4616
  tags: pointwise
4591
4617
 
4592
4618
  - func: deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4593
4619
  dispatch:
4594
4620
  CompositeExplicitAutograd: deg2rad_out
4595
4621
  SparseCPU, SparseCUDA: deg2rad_sparse_out
4596
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr_out
4622
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_out
4597
4623
  tags: pointwise
4598
4624
 
4599
4625
  - func: scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -4811,7 +4837,7 @@
4811
4837
  variants: function, method
4812
4838
  dispatch:
4813
4839
  SparseCPU, SparseCUDA: neg_sparse
4814
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr
4840
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr
4815
4841
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg
4816
4842
  tags: [core, pointwise]
4817
4843
 
@@ -4821,7 +4847,7 @@
4821
4847
  variants: function, method
4822
4848
  dispatch:
4823
4849
  SparseCPU, SparseCUDA: neg_sparse_
4824
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_
4850
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_
4825
4851
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg_
4826
4852
  tags: pointwise
4827
4853
 
@@ -4833,7 +4859,7 @@
4833
4859
  CPU, CUDA: neg_out
4834
4860
  MPS: neg_out_mps
4835
4861
  SparseCPU, SparseCUDA: neg_out_sparse
4836
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_out
4862
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_out
4837
4863
  tags: pointwise
4838
4864
  # Alias for neg
4839
4865
 
@@ -4917,7 +4943,7 @@
4917
4943
  variants: function, method
4918
4944
  dispatch:
4919
4945
  SparseCPU, SparseCUDA: round_sparse
4920
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr
4946
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr
4921
4947
  tags: [core, pointwise]
4922
4948
 
4923
4949
  - func: round_(Tensor(a!) self) -> Tensor(a!)
@@ -4926,7 +4952,7 @@
4926
4952
  variants: function, method
4927
4953
  dispatch:
4928
4954
  SparseCPU, SparseCUDA: round_sparse_
4929
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_
4955
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_
4930
4956
  tags: pointwise
4931
4957
 
4932
4958
  - func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -4938,7 +4964,7 @@
4938
4964
  CUDA: round_out
4939
4965
  MPS: round_out_mps
4940
4966
  SparseCPU, SparseCUDA: round_sparse_out
4941
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_out
4967
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
4942
4968
  tags: pointwise
4943
4969
 
4944
4970
  - func: round.decimals(Tensor self, *, int decimals) -> Tensor
@@ -4981,7 +5007,7 @@
4981
5007
  QuantizedCUDA: relu_quantized_cuda
4982
5008
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu
4983
5009
  SparseCPU, SparseCUDA: relu_sparse
4984
- SparseCsrCPU, SparseCsrCUDA: relu_sparse_csr
5010
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr
4985
5011
  tags: [core, pointwise]
4986
5012
 
4987
5013
  - func: relu_(Tensor(a!) self) -> Tensor(a!)
@@ -4995,7 +5021,7 @@
4995
5021
  QuantizedCUDA: relu_quantized_cuda_
4996
5022
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu_
4997
5023
  SparseCPU, SparseCUDA: relu_sparse_
4998
- SparseCsrCPU, SparseCsrCUDA: relu_sparse_csr_
5024
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr_
4999
5025
  autogen: relu.out
5000
5026
  tags: pointwise
5001
5027
 
@@ -5128,7 +5154,7 @@
5128
5154
  device_guard: False
5129
5155
  dispatch:
5130
5156
  CompositeExplicitAutograd: select_symint
5131
- SparseCsrCPU, SparseCsrCUDA: select_sparse_csr
5157
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_sparse_csr
5132
5158
  NestedTensorCPU, NestedTensorCUDA: select_nested
5133
5159
  tags: core
5134
5160
 
@@ -5277,7 +5303,7 @@
5277
5303
  structured_delegate: sin.out
5278
5304
  variants: function, method
5279
5305
  dispatch:
5280
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
5306
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr
5281
5307
  SparseCPU, SparseCUDA: sin_sparse
5282
5308
  NestedTensorCPU, NestedTensorCUDA: sin_nested
5283
5309
  tags: [core, pointwise]
@@ -5287,7 +5313,7 @@
5287
5313
  structured_delegate: sin.out
5288
5314
  variants: function, method
5289
5315
  dispatch:
5290
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_
5316
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_
5291
5317
  SparseCPU, SparseCUDA: sin_sparse_
5292
5318
  tags: pointwise
5293
5319
 
@@ -5298,7 +5324,7 @@
5298
5324
  dispatch:
5299
5325
  CPU, CUDA: sin_out
5300
5326
  MPS: sin_out_mps
5301
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_out
5327
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_out
5302
5328
  SparseCPU, SparseCUDA: sin_sparse_out
5303
5329
  tags: pointwise
5304
5330
 
@@ -5325,7 +5351,7 @@
5325
5351
  variants: function, method
5326
5352
  dispatch:
5327
5353
  SparseCPU, SparseCUDA: sinh_sparse
5328
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr
5354
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr
5329
5355
  tags: [core, pointwise]
5330
5356
 
5331
5357
  - func: sinh_(Tensor(a!) self) -> Tensor(a!)
@@ -5334,7 +5360,7 @@
5334
5360
  variants: function, method
5335
5361
  dispatch:
5336
5362
  SparseCPU, SparseCUDA: sinh_sparse_
5337
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_
5363
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_
5338
5364
  tags: pointwise
5339
5365
 
5340
5366
  - func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5345,7 +5371,7 @@
5345
5371
  CPU, CUDA: sinh_out
5346
5372
  MPS: sinh_out_mps
5347
5373
  SparseCPU, SparseCUDA: sinh_sparse_out
5348
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_out
5374
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_out
5349
5375
 
5350
5376
  # Returns a copy of this `Variable` that is detached from its autograd graph.
5351
5377
  # This method is OK to call if the `Variable` is a view.
@@ -5732,7 +5758,7 @@
5732
5758
  dispatch:
5733
5759
  NestedTensorCPU: NestedTensor_sum_dim_CPU
5734
5760
  SparseCPU, SparseCUDA: sum_sparse_coo
5735
- SparseCsrCPU, SparseCsrCUDA: sum_sparse_compressed
5761
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_sparse_compressed
5736
5762
  tags: core
5737
5763
 
5738
5764
  - func: sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
@@ -5778,7 +5804,7 @@
5778
5804
  variants: function, method
5779
5805
  dispatch:
5780
5806
  SparseCPU, SparseCUDA: sqrt_sparse
5781
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr
5807
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr
5782
5808
  tags: [core, pointwise]
5783
5809
 
5784
5810
  - func: sqrt_(Tensor(a!) self) -> Tensor(a!)
@@ -5787,7 +5813,7 @@
5787
5813
  variants: function, method
5788
5814
  dispatch:
5789
5815
  SparseCPU, SparseCUDA: sqrt_sparse_
5790
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_
5816
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_
5791
5817
  tags: pointwise
5792
5818
 
5793
5819
  - func: sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5798,7 +5824,7 @@
5798
5824
  CPU, CUDA: sqrt_out
5799
5825
  MPS: sqrt_out_mps
5800
5826
  SparseCPU, SparseCUDA: sqrt_sparse_out
5801
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_out
5827
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
5802
5828
  tags: pointwise
5803
5829
 
5804
5830
  - func: square(Tensor self) -> Tensor
@@ -5936,7 +5962,7 @@
5936
5962
  variants: function, method
5937
5963
  dispatch:
5938
5964
  SparseCPU, SparseCUDA: tan_sparse
5939
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr
5965
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr
5940
5966
  tags: [core, pointwise]
5941
5967
 
5942
5968
  - func: tan_(Tensor(a!) self) -> Tensor(a!)
@@ -5945,7 +5971,7 @@
5945
5971
  variants: function, method
5946
5972
  dispatch:
5947
5973
  SparseCPU, SparseCUDA: tan_sparse_
5948
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_
5974
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_
5949
5975
  tags: pointwise
5950
5976
 
5951
5977
  - func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5956,7 +5982,7 @@
5956
5982
  CPU, CUDA: tan_out
5957
5983
  MPS: tan_out_mps
5958
5984
  SparseCPU, SparseCUDA: tan_sparse_out
5959
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_out
5985
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_out
5960
5986
  tags: pointwise
5961
5987
 
5962
5988
  - func: tanh(Tensor self) -> Tensor
@@ -5967,7 +5993,7 @@
5967
5993
  QuantizedCPU: tanh_quantized_cpu
5968
5994
  MkldnnCPU: mkldnn_tanh
5969
5995
  SparseCPU, SparseCUDA: tanh_sparse
5970
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr
5996
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr
5971
5997
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh
5972
5998
  tags: [core, pointwise]
5973
5999
 
@@ -5978,7 +6004,7 @@
5978
6004
  dispatch:
5979
6005
  MkldnnCPU: mkldnn_tanh_
5980
6006
  SparseCPU, SparseCUDA: tanh_sparse_
5981
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_
6007
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_
5982
6008
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh_
5983
6009
  tags: pointwise
5984
6010
 
@@ -5990,7 +6016,7 @@
5990
6016
  CPU, CUDA: tanh_out
5991
6017
  MPS: tanh_out_mps
5992
6018
  SparseCPU, SparseCUDA: tanh_sparse_out
5993
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_out
6019
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
5994
6020
  tags: pointwise
5995
6021
 
5996
6022
  - func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor
@@ -6027,7 +6053,7 @@
6027
6053
  CPU, CUDA: threshold_backward_out
6028
6054
  MPS: threshold_backward_out_mps
6029
6055
  SparseCPU, SparseCUDA: threshold_backward_sparse_out
6030
- SparseCsrCPU, SparseCsrCUDA: threshold_backward_sparse_compressed_out
6056
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed_out
6031
6057
 
6032
6058
  - func: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor
6033
6059
  variants: function
@@ -6035,7 +6061,7 @@
6035
6061
  dispatch:
6036
6062
  MkldnnCPU: mkldnn_relu_backward
6037
6063
  SparseCPU, SparseCUDA: threshold_backward_sparse
6038
- SparseCsrCPU, SparseCsrCUDA: threshold_backward_sparse_compressed
6064
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed
6039
6065
  NestedTensorCPU, NestedTensorCUDA: threshold_backwards_nested
6040
6066
  tags: pointwise
6041
6067
 
@@ -6185,12 +6211,12 @@
6185
6211
  CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
6186
6212
  autogen: _nested_view_from_buffer_copy.out
6187
6213
 
6188
- - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
6214
+ - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor(a)
6189
6215
  variants: function
6190
6216
  device_check: NoCheck
6191
6217
  dispatch: {}
6192
6218
 
6193
- - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
6219
+ - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor
6194
6220
  variants: function
6195
6221
  device_check: NoCheck
6196
6222
  tags: view_copy
@@ -6227,6 +6253,16 @@
6227
6253
  device_check: NoCheck
6228
6254
  dispatch: {}
6229
6255
 
6256
+ - func: _nested_get_min_seqlen(Tensor self) -> Tensor
6257
+ variants: function
6258
+ device_check: NoCheck
6259
+ dispatch: {}
6260
+
6261
+ - func: _nested_get_max_seqlen(Tensor self) -> Tensor
6262
+ variants: function
6263
+ device_check: NoCheck
6264
+ dispatch: {}
6265
+
6230
6266
  - func: _nested_get_jagged_dummy(Tensor any) -> Tensor
6231
6267
  category_override: dummy
6232
6268
  dispatch: {}
@@ -6251,7 +6287,7 @@
6251
6287
  variants: function, method
6252
6288
  dispatch:
6253
6289
  SparseCPU, SparseCUDA: trunc_sparse
6254
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
6290
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr
6255
6291
  tags: [core, pointwise]
6256
6292
 
6257
6293
  - func: trunc_(Tensor(a!) self) -> Tensor(a!)
@@ -6260,7 +6296,7 @@
6260
6296
  variants: function, method
6261
6297
  dispatch:
6262
6298
  SparseCPU, SparseCUDA: trunc_sparse_
6263
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_
6299
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_
6264
6300
  tags: pointwise
6265
6301
 
6266
6302
  - func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -6271,7 +6307,7 @@
6271
6307
  CPU, CUDA: trunc_out
6272
6308
  MPS: trunc_out_mps
6273
6309
  SparseCPU, SparseCUDA: trunc_sparse_out
6274
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_out
6310
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
6275
6311
  tags: pointwise
6276
6312
  # Alias for trunc
6277
6313
 
@@ -6443,6 +6479,7 @@
6443
6479
  variants: function, method
6444
6480
  dispatch:
6445
6481
  CPU, CUDA, MPS: where
6482
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_where
6446
6483
  tags: [core, pointwise]
6447
6484
 
6448
6485
  - func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -6780,7 +6817,7 @@
6780
6817
  dispatch:
6781
6818
  CompositeExplicitAutograd: clone
6782
6819
  SparseCPU, SparseCUDA: clone_sparse
6783
- SparseCsrCPU, SparseCsrCUDA: clone_sparse_compressed
6820
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: clone_sparse_compressed
6784
6821
  MkldnnCPU: mkldnn_clone
6785
6822
  QuantizedCPU, QuantizedCUDA: quantized_clone
6786
6823
  NestedTensorCPU, NestedTensorCUDA: clone_nested
@@ -6804,7 +6841,7 @@
6804
6841
  variants: function, method
6805
6842
  dispatch:
6806
6843
  SparseCPU, SparseCUDA: resize_as_sparse_
6807
- SparseCsrCPU, SparseCsrCUDA: resize_as_sparse_compressed_
6844
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: resize_as_sparse_compressed_
6808
6845
  autogen: resize_as_sparse, resize_as_sparse.out
6809
6846
 
6810
6847
  - func: zero_(Tensor(a!) self) -> Tensor(a!)
@@ -6962,7 +6999,7 @@
6962
6999
  dispatch:
6963
7000
  SparseCPU: addmm_sparse_dense_cpu
6964
7001
  SparseCUDA: addmm_sparse_dense_cuda
6965
- SparseCsrCPU, SparseCsrCUDA: addmm_sparse_compressed_dense
7002
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: addmm_sparse_compressed_dense
6966
7003
  tags: core
6967
7004
 
6968
7005
  - func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
@@ -6984,12 +7021,12 @@
6984
7021
  structured_delegate: _addmm_activation.out
6985
7022
  variants: function, method
6986
7023
 
6987
- - func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)
7024
+ - func: _scaled_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
6988
7025
  variants: function
6989
7026
  dispatch:
6990
7027
  CUDA: _scaled_mm_cuda
6991
7028
 
6992
- - func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
7029
+ - func: _scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
6993
7030
  variants: function
6994
7031
  dispatch:
6995
7032
  CUDA: _scaled_mm_out_cuda
@@ -7184,7 +7221,7 @@
7184
7221
  variants: method
7185
7222
  dispatch:
7186
7223
  SparseCPU, SparseCUDA: sparse_mask
7187
- SparseCsrCPU, SparseCsrCUDA: sparse_mask_sparse_compressed
7224
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_mask_sparse_compressed
7188
7225
  autogen: sparse_mask.out
7189
7226
 
7190
7227
  - func: _sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor
@@ -7204,7 +7241,7 @@
7204
7241
  variants: method
7205
7242
  dispatch:
7206
7243
  SparseCPU, SparseCUDA: sparse_to_dense
7207
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_dense
7244
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_dense
7208
7245
  MkldnnCPU: mkldnn_to_dense
7209
7246
  autogen: _to_dense.out
7210
7247
 
@@ -7385,7 +7422,7 @@
7385
7422
  dispatch:
7386
7423
  CPU, CUDA: dense_to_sparse
7387
7424
  SparseCPU, SparseCUDA: sparse_coo_to_sparse
7388
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
7425
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7389
7426
  autogen: _to_sparse.sparse_dim_out
7390
7427
 
7391
7428
  - func: to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
@@ -7397,7 +7434,7 @@
7397
7434
  dispatch:
7398
7435
  CPU, CUDA: dense_to_sparse
7399
7436
  SparseCPU, SparseCUDA: sparse_coo_to_sparse
7400
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
7437
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7401
7438
  autogen: _to_sparse.out
7402
7439
 
7403
7440
  - func: to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
@@ -7409,7 +7446,7 @@
7409
7446
  dispatch:
7410
7447
  CPU, CUDA: dense_to_sparse_csr
7411
7448
  SparseCPU, SparseCUDA: coo_to_sparse_csr
7412
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csr
7449
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_csr
7413
7450
  autogen: _to_sparse_csr.out
7414
7451
 
7415
7452
  - func: to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
@@ -7421,7 +7458,7 @@
7421
7458
  dispatch:
7422
7459
  CPU, CUDA: dense_to_sparse_csc
7423
7460
  SparseCPU, SparseCUDA: coo_to_sparse_csc
7424
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csc
7461
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_csc
7425
7462
  autogen: _to_sparse_csc.out
7426
7463
 
7427
7464
  - func: to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
@@ -7433,7 +7470,7 @@
7433
7470
  dispatch:
7434
7471
  CPU, CUDA: dense_to_sparse_bsr
7435
7472
  SparseCPU, SparseCUDA: coo_to_sparse_bsr
7436
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsr
7473
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_bsr
7437
7474
  autogen: _to_sparse_bsr.out
7438
7475
 
7439
7476
  - func: to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
@@ -7445,7 +7482,7 @@
7445
7482
  dispatch:
7446
7483
  CPU, CUDA: dense_to_sparse_bsc
7447
7484
  SparseCPU, SparseCUDA: coo_to_sparse_bsc
7448
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsc
7485
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_bsc
7449
7486
  autogen: _to_sparse_bsc.out
7450
7487
 
7451
7488
  - func: _to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor)
@@ -8431,21 +8468,21 @@
8431
8468
  device_check: NoCheck # TensorIterator
8432
8469
  variants: method, function
8433
8470
  dispatch:
8434
- CPU, CUDA: __lshift__
8471
+ CPU, CUDA, MPS: __lshift__
8435
8472
  tags: pointwise
8436
8473
 
8437
8474
  - func: __lshift__.Tensor(Tensor self, Tensor other) -> Tensor
8438
8475
  device_check: NoCheck # TensorIterator
8439
8476
  variants: method, function
8440
8477
  dispatch:
8441
- CPU, CUDA: __lshift__
8478
+ CPU, CUDA, MPS: __lshift__
8442
8479
  tags: pointwise
8443
8480
 
8444
8481
  - func: __ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8445
8482
  device_check: NoCheck # TensorIterator
8446
8483
  variants: method
8447
8484
  dispatch:
8448
- CPU, CUDA: __ilshift__
8485
+ CPU, CUDA, MPS: __ilshift__
8449
8486
  autogen: __lshift__.Scalar_out
8450
8487
  tags: pointwise
8451
8488
 
@@ -8453,7 +8490,7 @@
8453
8490
  device_check: NoCheck # TensorIterator
8454
8491
  variants: method
8455
8492
  dispatch:
8456
- CPU, CUDA: __ilshift__
8493
+ CPU, CUDA, MPS: __ilshift__
8457
8494
  autogen: __lshift__.Tensor_out
8458
8495
  tags: pointwise
8459
8496
 
@@ -8474,7 +8511,7 @@
8474
8511
  structured: True
8475
8512
  structured_inherits: TensorIteratorBase
8476
8513
  dispatch:
8477
- CPU, CUDA: bitwise_left_shift_out
8514
+ CPU, CUDA, MPS: bitwise_left_shift_out
8478
8515
  tags: pointwise
8479
8516
 
8480
8517
  - func: bitwise_left_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor
@@ -8510,28 +8547,28 @@
8510
8547
  device_check: NoCheck # TensorIterator
8511
8548
  variants: method, function
8512
8549
  dispatch:
8513
- CPU, CUDA: __rshift__
8550
+ CPU, CUDA, MPS: __rshift__
8514
8551
  tags: pointwise
8515
8552
 
8516
8553
  - func: __rshift__.Tensor(Tensor self, Tensor other) -> Tensor
8517
8554
  device_check: NoCheck # TensorIterator
8518
8555
  variants: method, function
8519
8556
  dispatch:
8520
- CPU, CUDA: __rshift__
8557
+ CPU, CUDA, MPS: __rshift__
8521
8558
  tags: pointwise
8522
8559
 
8523
8560
  - func: __irshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8524
8561
  device_check: NoCheck # TensorIterator
8525
8562
  variants: method
8526
8563
  dispatch:
8527
- CPU, CUDA: __irshift__
8564
+ CPU, CUDA, MPS: __irshift__
8528
8565
  autogen: __rshift__.Scalar_out
8529
8566
 
8530
8567
  - func: __irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
8531
8568
  device_check: NoCheck # TensorIterator
8532
8569
  variants: method
8533
8570
  dispatch:
8534
- CPU, CUDA: __irshift__
8571
+ CPU, CUDA, MPS: __irshift__
8535
8572
  autogen: __rshift__.Tensor_out
8536
8573
 
8537
8574
  - func: bitwise_right_shift.Tensor(Tensor self, Tensor other) -> Tensor
@@ -8551,7 +8588,7 @@
8551
8588
  structured: True
8552
8589
  structured_inherits: TensorIteratorBase
8553
8590
  dispatch:
8554
- CPU, CUDA: bitwise_right_shift_out
8591
+ CPU, CUDA, MPS: bitwise_right_shift_out
8555
8592
  tags: pointwise
8556
8593
 
8557
8594
  - func: bitwise_right_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor
@@ -8858,6 +8895,7 @@
8858
8895
  variants: method, function
8859
8896
  dispatch:
8860
8897
  QuantizedCPU: eq_quantized_cpu
8898
+ NestedTensorCPU, NestedTensorCUDA: eq_tensor_nested
8861
8899
  tags: [core, pointwise]
8862
8900
 
8863
8901
  - func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
@@ -9502,7 +9540,7 @@
9502
9540
  variants: method, function
9503
9541
  dispatch:
9504
9542
  SparseCPU, SparseCUDA: erfinv_sparse
9505
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr
9543
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr
9506
9544
  tags: pointwise
9507
9545
 
9508
9546
  - func: erfinv_(Tensor(a!) self) -> Tensor(a!)
@@ -9511,7 +9549,7 @@
9511
9549
  variants: method
9512
9550
  dispatch:
9513
9551
  SparseCPU, SparseCUDA: erfinv_sparse_
9514
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_
9552
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_
9515
9553
  tags: pointwise
9516
9554
 
9517
9555
  - func: erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9522,7 +9560,7 @@
9522
9560
  CPU, CUDA: erfinv_out
9523
9561
  MPS: erfinv_out_mps
9524
9562
  SparseCPU, SparseCUDA: erfinv_sparse_out
9525
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_out
9563
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_out
9526
9564
  tags: pointwise
9527
9565
 
9528
9566
  - func: i0(Tensor self) -> Tensor
@@ -9548,7 +9586,7 @@
9548
9586
  variants: function, method
9549
9587
  dispatch:
9550
9588
  SparseCPU, SparseCUDA: sign_sparse
9551
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr
9589
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr
9552
9590
  tags: [core, pointwise]
9553
9591
 
9554
9592
  - func: sign_(Tensor(a!) self) -> Tensor(a!)
@@ -9557,7 +9595,7 @@
9557
9595
  variants: method
9558
9596
  dispatch:
9559
9597
  SparseCPU, SparseCUDA: sign_sparse_
9560
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_
9598
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_
9561
9599
  tags: pointwise
9562
9600
 
9563
9601
  - func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9568,7 +9606,7 @@
9568
9606
  CPU, CUDA: sign_out
9569
9607
  MPS: sign_out_mps
9570
9608
  SparseCPU, SparseCUDA: sign_sparse_out
9571
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_out
9609
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_out
9572
9610
  tags: pointwise
9573
9611
 
9574
9612
  - func: signbit(Tensor self) -> Tensor
@@ -9576,7 +9614,7 @@
9576
9614
  structured_delegate: signbit.out
9577
9615
  dispatch:
9578
9616
  SparseCPU, SparseCUDA: signbit_sparse
9579
- SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr
9617
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr
9580
9618
  tags: pointwise
9581
9619
 
9582
9620
  - func: signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9587,7 +9625,7 @@
9587
9625
  CUDA: signbit_out
9588
9626
  MPS: signbit_out_mps
9589
9627
  SparseCPU, SparseCUDA: signbit_sparse_out
9590
- SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr_out
9628
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr_out
9591
9629
  tags: pointwise
9592
9630
 
9593
9631
  - func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor
@@ -10038,9 +10076,10 @@
10038
10076
  - func: argsort.stable(Tensor self, *, bool stable, int dim=-1, bool descending=False) -> Tensor
10039
10077
  device_check: NoCheck # TensorIterator
10040
10078
  variants: method, function
10041
- dispatch:
10042
- CPU, CUDA, MPS: argsort_stable
10043
- autogen: argsort.stable_out
10079
+
10080
+ - func: argsort.stable_out(Tensor self, *, bool stable, int dim=-1, bool descending=False, Tensor(a!) out) -> Tensor(a!)
10081
+ device_check: NoCheck # TensorIterator
10082
+ variants: function
10044
10083
 
10045
10084
  - func: argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor
10046
10085
  variants: method, function
@@ -10220,7 +10259,7 @@
10220
10259
  CPU, CUDA: normal_
10221
10260
  MPS: normal_mps_
10222
10261
  Meta: normal_meta_
10223
- SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
10262
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: normal_sparse_csr_
10224
10263
  NestedTensorCPU, NestedTensorCUDA: normal_nested_
10225
10264
  autogen: normal.out
10226
10265
 
@@ -13024,7 +13063,7 @@
13024
13063
  CompositeExplicitAutograd: isinf
13025
13064
  SparseCPU, SparseCUDA: isinf_sparse
13026
13065
  SparseMeta: isinf_sparse_meta
13027
- SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
13066
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isinf_sparse_csr
13028
13067
  autogen: isinf.out
13029
13068
  tags: [core, pointwise]
13030
13069
 
@@ -13038,7 +13077,7 @@
13038
13077
  structured_delegate: isposinf.out
13039
13078
  dispatch:
13040
13079
  SparseCPU, SparseCUDA: isposinf_sparse
13041
- SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr
13080
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr
13042
13081
  tags: pointwise
13043
13082
 
13044
13083
  - func: isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -13047,7 +13086,7 @@
13047
13086
  dispatch:
13048
13087
  CPU, CUDA: isposinf_out
13049
13088
  SparseCPU, SparseCUDA: isposinf_sparse_out
13050
- SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr_out
13089
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr_out
13051
13090
  tags: pointwise
13052
13091
 
13053
13092
  - func: isneginf(Tensor self) -> Tensor
@@ -13055,7 +13094,7 @@
13055
13094
  structured_delegate: isneginf.out
13056
13095
  dispatch:
13057
13096
  SparseCPU, SparseCUDA: isneginf_sparse
13058
- SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr
13097
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr
13059
13098
  tags: pointwise
13060
13099
 
13061
13100
  - func: isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -13064,7 +13103,7 @@
13064
13103
  dispatch:
13065
13104
  CPU, CUDA: isneginf_out
13066
13105
  SparseCPU, SparseCUDA: isneginf_sparse_out
13067
- SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr_out
13106
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr_out
13068
13107
  tags: pointwise
13069
13108
 
13070
13109
  # NOTE [_add_batch_dim and _remove_batch_dim]
@@ -13787,10 +13826,16 @@
13787
13826
  - func: linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots)
13788
13827
  python_module: linalg
13789
13828
  variants: function
13829
+ dispatch:
13830
+ CompositeImplicitAutograd: linalg_lu_factor
13831
+ MPS: linalg_lu_factor_mps
13790
13832
 
13791
13833
  - func: linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots)
13792
13834
  python_module: linalg
13793
13835
  variants: function
13836
+ dispatch:
13837
+ CompositeImplicitAutograd: linalg_lu_factor_out
13838
+ MPS: linalg_lu_factor_out_mps
13794
13839
 
13795
13840
  - func: linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)
13796
13841
  python_module: linalg
@@ -14176,6 +14221,11 @@
14176
14221
  - func: linalg_solve(Tensor A, Tensor B, *, bool left=True) -> Tensor
14177
14222
  python_module: linalg
14178
14223
 
14224
+ - func: _spsolve(Tensor A, Tensor B, *, bool left=True) -> Tensor
14225
+ python_module: sparse
14226
+ dispatch:
14227
+ SparseCsrCUDA: _sparse_csr_linear_solve
14228
+
14179
14229
  - func: linalg_solve.out(Tensor A, Tensor B, *, bool left=True, Tensor(a!) out) -> Tensor(a!)
14180
14230
  python_module: linalg
14181
14231
 
@@ -14352,7 +14402,7 @@
14352
14402
  CPU, CUDA: _segment_reduce_backward_kernel
14353
14403
  autogen: _segment_reduce_backward.out
14354
14404
 
14355
- - func: pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0) -> Tensor
14405
+ - func: pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0, str padding_side="right") -> Tensor
14356
14406
  python_module: nn
14357
14407
  variants: function
14358
14408
 
@@ -14458,7 +14508,7 @@
14458
14508
  variants: function
14459
14509
  dispatch:
14460
14510
  CompositeExplicitAutogradNonFunctional: select_copy_symint
14461
- SparseCsrCPU, SparseCsrCUDA: select_copy_sparse_csr
14511
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_copy_sparse_csr
14462
14512
  tags: view_copy
14463
14513
  autogen: select_copy.int_out
14464
14514
 
@@ -14648,11 +14698,13 @@
14648
14698
  variants: function
14649
14699
  dispatch:
14650
14700
  CUDA: _fbgemm_jagged_to_padded_dense_forward
14701
+ CPU: _jagged_to_padded_dense_forward_cpu
14651
14702
 
14652
14703
  - func: _padded_dense_to_jagged_forward(Tensor dense, Tensor[] offsets, SymInt? total_L=None) -> Tensor
14653
14704
  variants: function
14654
14705
  dispatch:
14655
14706
  CUDA: _fbgemm_dense_to_jagged_forward_symint
14707
+ CPU: _padded_dense_to_jagged_forward_cpu
14656
14708
 
14657
14709
  - func: _nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor
14658
14710
  dispatch:
@@ -14660,6 +14712,11 @@
14660
14712
  NestedTensorCUDA: NestedTensor_softmax_dropout_cuda
14661
14713
  tags: nondeterministic_seeded
14662
14714
 
14715
+ - func: _safe_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
14716
+ dispatch:
14717
+ CompositeExplicitAutograd: _safe_softmax
14718
+ NestedTensorCPU, NestedTensorCUDA: _safe_softmax
14719
+
14663
14720
  # Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
14664
14721
  - func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
14665
14722
  variants: function
@@ -14674,24 +14731,29 @@
14674
14731
  CUDA, NestedTensorCUDA: native_multi_head_attention_cuda
14675
14732
  autogen: _native_multi_head_attention.out
14676
14733
 
14677
- - func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> Tensor
14734
+ - func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> Tensor
14678
14735
  python_module: nn
14679
14736
  variants: function
14680
14737
  autogen: scaled_dot_product_attention.out
14681
14738
  tags: nondeterministic_seeded
14682
14739
 
14683
14740
  # This aten function is kept so that we can test the choice function from Python
14684
- - func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> int
14741
+ - func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> int
14685
14742
  dispatch:
14686
14743
  Meta: _fused_sdp_choice_meta
14687
14744
  CPU, NestedTensorCPU: _fused_sdp_choice_cpp
14688
14745
  CUDA, NestedTensorCUDA: _fused_sdp_choice_cuda
14689
14746
  tags: nondeterministic_seeded
14690
14747
 
14691
- - func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
14748
+ - func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None, bool enable_gqa=False) -> (Tensor, Tensor)
14692
14749
  variants: function
14693
14750
  tags: nondeterministic_seeded
14694
14751
 
14752
+ - func: _scaled_dot_product_attention_math_for_mps(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
14753
+ dispatch:
14754
+ MPS: _scaled_dot_product_attention_math_mps
14755
+ tags: nondeterministic_seeded
14756
+
14695
14757
  - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14696
14758
  dispatch:
14697
14759
  CUDA: _scaled_dot_product_flash_attention_cuda
@@ -14703,6 +14765,11 @@
14703
14765
  CPU: _scaled_dot_product_flash_attention_cpu
14704
14766
  tags: nondeterministic_seeded
14705
14767
 
14768
+ - func: _scaled_dot_product_fused_attention_overrideable(Tensor query, Tensor key, Tensor value, Tensor? attn_bias=None, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14769
+ dispatch:
14770
+ CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable
14771
+ tags: nondeterministic_seeded
14772
+
14706
14773
  - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14707
14774
  device_check: NoCheck
14708
14775
  variants: function
@@ -14716,6 +14783,12 @@
14716
14783
  dispatch:
14717
14784
  CPU: _scaled_dot_product_flash_attention_cpu_backward
14718
14785
 
14786
+ - func: _scaled_dot_product_fused_attention_overrideable_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor attn_bias, bool[4] grad_input_mask, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value, Tensor grad_attn_bias)
14787
+ device_check: NoCheck
14788
+ variants: function
14789
+ dispatch:
14790
+ CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable_backward
14791
+
14719
14792
  - func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
14720
14793
  dispatch:
14721
14794
  CUDA: _scaled_dot_product_efficient_attention_cuda
@@ -14728,12 +14801,12 @@
14728
14801
  CUDA: _scaled_dot_product_efficient_attention_backward_cuda
14729
14802
  tags: nondeterministic_seeded
14730
14803
 
14731
- - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14804
+ - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14732
14805
  dispatch:
14733
14806
  CUDA: _scaled_dot_product_cudnn_attention_cuda
14734
14807
  tags: nondeterministic_seeded
14735
14808
 
14736
- - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14809
+ - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14737
14810
  dispatch:
14738
14811
  CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
14739
14812
  tags: nondeterministic_seeded
@@ -15563,6 +15636,7 @@
15563
15636
  dispatch:
15564
15637
  CPU: _fused_adam_kernel_cpu_
15565
15638
  CUDA: _fused_adam_kernel_cuda_
15639
+ MPS: _fused_adam_kernel_mps_
15566
15640
  autogen: _fused_adam, _fused_adam.out
15567
15641
 
15568
15642
  - func: _fused_adam_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15573,6 +15647,7 @@
15573
15647
  dispatch:
15574
15648
  CPU: _fused_adam_kernel_cpu_
15575
15649
  CUDA: _fused_adam_kernel_cuda_
15650
+ MPS: _fused_adam_kernel_mps_
15576
15651
  autogen: _fused_adam.tensor_lr, _fused_adam.tensor_lr_out
15577
15652
 
15578
15653
  - func: _fused_adamw_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15581,6 +15656,7 @@
15581
15656
  dispatch:
15582
15657
  CPU: _fused_adamw_kernel_cpu_
15583
15658
  CUDA: _fused_adamw_kernel_cuda_
15659
+ MPS: _fused_adamw_kernel_mps_
15584
15660
  autogen: _fused_adamw, _fused_adamw.out
15585
15661
 
15586
15662
  - func: _fused_adamw_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15591,6 +15667,7 @@
15591
15667
  dispatch:
15592
15668
  CPU: _fused_adamw_kernel_cpu_
15593
15669
  CUDA: _fused_adamw_kernel_cuda_
15670
+ MPS: _fused_adamw_kernel_mps_
15594
15671
  autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
15595
15672
 
15596
15673
  - func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15599,6 +15676,7 @@
15599
15676
  dispatch:
15600
15677
  CPU: _fused_sgd_kernel_cpu_
15601
15678
  CUDA: _fused_sgd_kernel_cuda_
15679
+ MPS: _fused_sgd_kernel_mps_
15602
15680
  autogen: _fused_sgd, _fused_sgd.out
15603
15681
 
15604
15682
  - func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15609,6 +15687,7 @@
15609
15687
  dispatch:
15610
15688
  CPU: _fused_sgd_kernel_cpu_
15611
15689
  CUDA: _fused_sgd_kernel_cuda_
15690
+ MPS: _fused_sgd_kernel_mps_
15612
15691
  autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
15613
15692
 
15614
15693
  - func: _fused_adagrad_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor(d!)[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()