torch-rb 0.17.0 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -338,7 +338,7 @@
338
338
  dispatch:
339
339
  CompositeExplicitAutograd: abs
340
340
  SparseCPU, SparseCUDA: abs_sparse
341
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr
341
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr
342
342
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs
343
343
  tags: [core, pointwise]
344
344
 
@@ -348,7 +348,7 @@
348
348
  dispatch:
349
349
  CompositeExplicitAutograd: abs_
350
350
  SparseCPU, SparseCUDA: abs_sparse_
351
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_
351
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_
352
352
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs_
353
353
 
354
354
  - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -357,7 +357,7 @@
357
357
  CPU, CUDA: abs_out
358
358
  MPS: abs_out_mps
359
359
  SparseCPU, SparseCUDA: abs_sparse_out
360
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_out
360
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_out
361
361
  tags: pointwise
362
362
 
363
363
  # Note [Adding an alias]
@@ -400,14 +400,14 @@
400
400
  variants: function, method
401
401
  dispatch:
402
402
  CPU, CUDA: angle
403
- SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr
403
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr
404
404
  tags: pointwise
405
405
 
406
406
  - func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
407
407
  device_check: NoCheck # TensorIterator
408
408
  dispatch:
409
409
  CPU, CUDA: angle_out
410
- SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr_out
410
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr_out
411
411
  tags: pointwise
412
412
 
413
413
  - func: view_as_real(Tensor(a) self) -> Tensor(a)
@@ -425,7 +425,7 @@
425
425
  structured_delegate: sgn.out
426
426
  dispatch:
427
427
  SparseCPU, SparseCUDA: sgn_sparse
428
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr
428
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr
429
429
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn
430
430
  tags: pointwise
431
431
 
@@ -434,7 +434,7 @@
434
434
  structured_delegate: sgn.out
435
435
  dispatch:
436
436
  SparseCPU, SparseCUDA: sgn_sparse_
437
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_
437
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_
438
438
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn_
439
439
  tags: pointwise
440
440
 
@@ -445,7 +445,7 @@
445
445
  CPU, CUDA: sgn_out
446
446
  MPS: sgn_out_mps
447
447
  SparseCPU, SparseCUDA: sgn_sparse_out
448
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
448
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_out
449
449
  tags: pointwise
450
450
 
451
451
  - func: chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
@@ -472,7 +472,7 @@
472
472
  variants: function, method
473
473
  dispatch:
474
474
  CompositeExplicitAutograd: _conj_physical
475
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr
475
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr
476
476
  autogen: _conj_physical.out
477
477
 
478
478
  - func: conj_physical(Tensor self) -> Tensor
@@ -484,14 +484,14 @@
484
484
  CPU, CUDA: conj_physical_out
485
485
  MPS: conj_physical_out_mps
486
486
  SparseCPU, SparseCUDA: conj_physical_out_sparse
487
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
487
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_out
488
488
  tags: pointwise
489
489
 
490
490
  - func: conj_physical_(Tensor(a!) self) -> Tensor(a!)
491
491
  variants: function, method
492
492
  dispatch:
493
493
  CompositeExplicitAutograd: conj_physical_
494
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_
494
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_
495
495
  tags: pointwise
496
496
 
497
497
  - func: resolve_conj(Tensor(a) self) -> Tensor(a)
@@ -537,9 +537,11 @@
537
537
 
538
538
  - func: avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor
539
539
  tags: core
540
+ autogen: avg_pool1d.out
540
541
 
541
542
  - func: adaptive_avg_pool1d(Tensor self, int[1] output_size) -> Tensor
542
543
  tags: core
544
+ autogen: adaptive_avg_pool1d.out
543
545
 
544
546
  # Return: (Tensor output, Tensor indices)
545
547
  - func: adaptive_max_pool1d(Tensor self, int[1] output_size) -> (Tensor, Tensor)
@@ -694,6 +696,9 @@
694
696
  device_check: NoCheck # TensorIterator
695
697
  structured_delegate: all.out
696
698
  variants: function, method
699
+ dispatch:
700
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_all
701
+
697
702
 
698
703
  - func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
699
704
  device_check: NoCheck # TensorIterator
@@ -863,7 +868,7 @@
863
868
  structured_delegate: asinh.out
864
869
  dispatch:
865
870
  SparseCPU, SparseCUDA: asinh_sparse
866
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr
871
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr
867
872
  tags: [core, pointwise]
868
873
 
869
874
  - func: asinh_(Tensor(a!) self) -> Tensor(a!)
@@ -871,7 +876,7 @@
871
876
  structured_delegate: asinh.out
872
877
  dispatch:
873
878
  SparseCPU, SparseCUDA: asinh_sparse_
874
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_
879
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_
875
880
  tags: pointwise
876
881
 
877
882
  - func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -881,7 +886,7 @@
881
886
  CPU, CUDA: asinh_out
882
887
  MPS: asinh_out_mps
883
888
  SparseCPU, SparseCUDA: asinh_sparse_out
884
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_out
889
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_out
885
890
  tags: pointwise
886
891
 
887
892
  # arcsinh, alias for asinh
@@ -898,7 +903,7 @@
898
903
  variants: function, method
899
904
  dispatch:
900
905
  SparseCPU, SparseCUDA: atanh_sparse
901
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr
906
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr
902
907
  tags: [core, pointwise]
903
908
 
904
909
  - func: atanh_(Tensor(a!) self) -> Tensor(a!)
@@ -906,7 +911,7 @@
906
911
  variants: function, method
907
912
  dispatch:
908
913
  SparseCPU, SparseCUDA: atanh_sparse_
909
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_
914
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_
910
915
  tags: pointwise
911
916
 
912
917
  - func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -916,7 +921,7 @@
916
921
  CPU, CUDA: atanh_out
917
922
  MPS: atanh_out_mps
918
923
  SparseCPU, SparseCUDA: atanh_sparse_out
919
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_out
924
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_out
920
925
  tags: pointwise
921
926
  # arctanh, alias for atanh
922
927
 
@@ -954,7 +959,7 @@
954
959
  structured_delegate: asin.out
955
960
  dispatch:
956
961
  SparseCPU, SparseCUDA: asin_sparse
957
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr
962
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr
958
963
  tags: [core, pointwise]
959
964
 
960
965
  - func: asin_(Tensor(a!) self) -> Tensor(a!)
@@ -963,7 +968,7 @@
963
968
  structured_delegate: asin.out
964
969
  dispatch:
965
970
  SparseCPU, SparseCUDA: asin_sparse_
966
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_
971
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_
967
972
  tags: pointwise
968
973
 
969
974
  - func: asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -974,7 +979,7 @@
974
979
  CPU, CUDA: asin_out
975
980
  MPS: asin_out_mps
976
981
  SparseCPU, SparseCUDA: asin_sparse_out
977
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_out
982
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_out
978
983
  tags: pointwise
979
984
 
980
985
  # arcsin, alias of asin
@@ -992,7 +997,7 @@
992
997
  variants: function, method
993
998
  dispatch:
994
999
  SparseCPU, SparseCUDA: atan_sparse
995
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr
1000
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr
996
1001
  tags: [core, pointwise]
997
1002
 
998
1003
  - func: atan_(Tensor(a!) self) -> Tensor(a!)
@@ -1001,7 +1006,7 @@
1001
1006
  variants: function, method
1002
1007
  dispatch:
1003
1008
  SparseCPU, SparseCUDA: atan_sparse_
1004
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_
1009
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_
1005
1010
  tags: pointwise
1006
1011
 
1007
1012
  - func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -1012,7 +1017,7 @@
1012
1017
  CPU, CUDA: atan_out
1013
1018
  MPS: atan_out_mps
1014
1019
  SparseCPU, SparseCUDA: atan_sparse_out
1015
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_out
1020
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_out
1016
1021
  tags: pointwise
1017
1022
 
1018
1023
  # arctan, alias of atan
@@ -1423,7 +1428,7 @@
1423
1428
  variants: function, method
1424
1429
  dispatch:
1425
1430
  SparseCPU, SparseCUDA: ceil_sparse
1426
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr
1431
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr
1427
1432
  tags: [core, pointwise]
1428
1433
 
1429
1434
  - func: ceil_(Tensor(a!) self) -> Tensor(a!)
@@ -1432,7 +1437,7 @@
1432
1437
  variants: function, method
1433
1438
  dispatch:
1434
1439
  SparseCPU, SparseCUDA: ceil_sparse_
1435
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_
1440
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_
1436
1441
  tags: pointwise
1437
1442
 
1438
1443
  - func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -1443,7 +1448,7 @@
1443
1448
  CPU, CUDA: ceil_out
1444
1449
  MPS: ceil_out_mps
1445
1450
  SparseCPU, SparseCUDA: ceil_sparse_out
1446
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_out
1451
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
1447
1452
  tags: pointwise
1448
1453
 
1449
1454
  # alias for torch.linalg.multi_dot
@@ -1762,7 +1767,7 @@
1762
1767
  MkldnnCPU: copy_mkldnn_
1763
1768
  SparseCPU, SparseCUDA: copy_sparse_wrapper_
1764
1769
  CompositeExplicitAutograd: copy_
1765
- SparseCsrCPU, SparseCsrCUDA: copy_sparse_compressed_
1770
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: copy_sparse_compressed_
1766
1771
  NestedTensorCPU, NestedTensorCUDA: copy_nested_
1767
1772
  autogen: copy.out
1768
1773
 
@@ -2338,7 +2343,7 @@
2338
2343
 
2339
2344
  - func: _embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
2340
2345
  dispatch:
2341
- CompositeImplicitAutograd: _embedding_bag_backward_symint
2346
+ CPU, CUDA: _embedding_bag_backward_symint
2342
2347
 
2343
2348
  - func: _embedding_bag_sparse_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
2344
2349
  dispatch:
@@ -2370,8 +2375,10 @@
2370
2375
  MPS: empty_mps
2371
2376
  Meta: empty_meta_symint
2372
2377
  MkldnnCPU: empty_mkldnn
2373
- SparseCPU, SparseCUDA, SparseMeta: empty_sparse
2374
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
2378
+ SparseCPU, SparseCUDA: empty_sparse
2379
+ SparseMeta: empty_sparse_symint
2380
+ SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
2381
+ SparseCsrMeta: empty_sparse_compressed_symint
2375
2382
  QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
2376
2383
  tags: core
2377
2384
 
@@ -2446,7 +2453,7 @@
2446
2453
  CUDA: resize_cuda_
2447
2454
  MPS: resize_mps_
2448
2455
  QuantizedCPU: quantized_resize_cpu_
2449
- SparseCsrCPU, SparseCsrCUDA: resize_sparse_csr_
2456
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: resize_sparse_csr_
2450
2457
  autogen: resize, resize.out
2451
2458
 
2452
2459
  # This is a utility function to enable users to resize out tensor while registering kernels for out variants.
@@ -2497,7 +2504,7 @@
2497
2504
  variants: function, method
2498
2505
  dispatch:
2499
2506
  SparseCPU, SparseCUDA: erf_sparse
2500
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr
2507
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr
2501
2508
  tags: [core, pointwise]
2502
2509
 
2503
2510
  - func: erf_(Tensor(a!) self) -> Tensor(a!)
@@ -2506,7 +2513,7 @@
2506
2513
  variants: function, method
2507
2514
  dispatch:
2508
2515
  SparseCPU, SparseCUDA: erf_sparse_
2509
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_
2516
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_
2510
2517
  tags: pointwise
2511
2518
 
2512
2519
  - func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2517,7 +2524,7 @@
2517
2524
  CPU, CUDA: erf_out
2518
2525
  MPS: erf_out_mps
2519
2526
  SparseCPU, SparseCUDA: erf_sparse_out
2520
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_out
2527
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_out
2521
2528
  tags: pointwise
2522
2529
 
2523
2530
  - func: erfc(Tensor self) -> Tensor
@@ -2585,7 +2592,7 @@
2585
2592
  variants: function, method
2586
2593
  dispatch:
2587
2594
  SparseCPU, SparseCUDA: expm1_sparse
2588
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
2595
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr
2589
2596
  tags: [core, pointwise]
2590
2597
 
2591
2598
  - func: expm1_(Tensor(a!) self) -> Tensor(a!)
@@ -2594,7 +2601,7 @@
2594
2601
  variants: function, method
2595
2602
  dispatch:
2596
2603
  SparseCPU, SparseCUDA: expm1_sparse_
2597
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_
2604
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_
2598
2605
  tags: pointwise
2599
2606
 
2600
2607
  - func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2605,7 +2612,7 @@
2605
2612
  CPU, CUDA: expm1_out
2606
2613
  MPS: expm1_out_mps
2607
2614
  SparseCPU, SparseCUDA: expm1_sparse_out
2608
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_out
2615
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_out
2609
2616
  tags: pointwise
2610
2617
 
2611
2618
  - func: expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)
@@ -2683,7 +2690,7 @@
2683
2690
  MPS: fill_scalar_mps
2684
2691
  QuantizedCPU, QuantizedCUDA: fill_quantized_
2685
2692
  Meta: fill_meta_
2686
- SparseCsrCPU, SparseCsrCUDA: fill_sparse_csr_
2693
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: fill_sparse_csr_
2687
2694
  NestedTensorCPU, NestedTensorCUDA: fill_nested_
2688
2695
  autogen: fill.Scalar_out
2689
2696
 
@@ -2704,7 +2711,7 @@
2704
2711
  variants: function, method
2705
2712
  dispatch:
2706
2713
  SparseCPU, SparseCUDA: floor_sparse
2707
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr
2714
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr
2708
2715
  tags: [core, pointwise]
2709
2716
 
2710
2717
  - func: floor_(Tensor(a!) self) -> Tensor(a!)
@@ -2713,7 +2720,7 @@
2713
2720
  variants: function, method
2714
2721
  dispatch:
2715
2722
  SparseCPU, SparseCUDA: floor_sparse_
2716
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_
2723
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_
2717
2724
  tags: pointwise
2718
2725
 
2719
2726
  - func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2724,7 +2731,7 @@
2724
2731
  CPU, CUDA: floor_out
2725
2732
  MPS: floor_out_mps
2726
2733
  SparseCPU, SparseCUDA: floor_sparse_out
2727
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_out
2734
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
2728
2735
  tags: pointwise
2729
2736
 
2730
2737
  - func: floor_divide(Tensor self, Tensor other) -> Tensor
@@ -2769,7 +2776,7 @@
2769
2776
  variants: function, method
2770
2777
  dispatch:
2771
2778
  SparseCPU, SparseCUDA: frac_sparse
2772
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr
2779
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr
2773
2780
  tags: pointwise
2774
2781
 
2775
2782
  - func: frac_(Tensor(a!) self) -> Tensor(a!)
@@ -2778,7 +2785,7 @@
2778
2785
  variants: function, method
2779
2786
  dispatch:
2780
2787
  SparseCPU, SparseCUDA: frac_sparse_
2781
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr_
2788
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_
2782
2789
  tags: pointwise
2783
2790
 
2784
2791
  - func: frac.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2789,7 +2796,7 @@
2789
2796
  CPU, CUDA: frac_out
2790
2797
  MPS: frac_out_mps
2791
2798
  SparseCPU, SparseCUDA: frac_sparse_out
2792
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr_out
2799
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_out
2793
2800
  tags: pointwise
2794
2801
 
2795
2802
  - func: full.names(int[] size, Scalar fill_value, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -3061,6 +3068,18 @@
3061
3068
  dispatch:
3062
3069
  CompositeExplicitAutograd: _unsafe_index
3063
3070
 
3071
+ # Used by inductor to generate masked loads
3072
+ # Note that we don't support boolean indexing, to avoid dynamic output shapes
3073
+ - func: _unsafe_masked_index(Tensor self, Tensor mask, Tensor?[] indices, Scalar fill) -> Tensor
3074
+ variants: function
3075
+ dispatch:
3076
+ CompositeExplicitAutograd: _unsafe_masked_index
3077
+
3078
+ - func: _unsafe_masked_index_put_accumulate(Tensor self, Tensor mask, Tensor?[] indices, Tensor values) -> Tensor
3079
+ variants: function
3080
+ dispatch:
3081
+ CompositeExplicitAutograd: _unsafe_masked_index_put_accumulate
3082
+
3064
3083
  - func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
3065
3084
  structured: True
3066
3085
  variants: function
@@ -3161,7 +3180,7 @@
3161
3180
  dispatch:
3162
3181
  CPU, CUDA, MPS: isnan
3163
3182
  SparseCPU, SparseCUDA: isnan_sparse
3164
- SparseCsrCPU, SparseCsrCUDA: isnan_sparse_csr
3183
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isnan_sparse_csr
3165
3184
  autogen: isnan.out
3166
3185
  tags: [core, pointwise]
3167
3186
 
@@ -3381,6 +3400,10 @@
3381
3400
 
3382
3401
  - func: fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor
3383
3402
 
3403
+ - func: _wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor
3404
+
3405
+ - func: _wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
3406
+
3384
3407
  - func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
3385
3408
 
3386
3409
  - func: fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
@@ -3487,7 +3510,7 @@
3487
3510
  variants: function, method
3488
3511
  dispatch:
3489
3512
  SparseCPU, SparseCUDA: log1p_sparse
3490
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
3513
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr
3491
3514
  tags: [core, pointwise]
3492
3515
 
3493
3516
  - func: log1p_(Tensor(a!) self) -> Tensor(a!)
@@ -3496,7 +3519,7 @@
3496
3519
  variants: function, method
3497
3520
  dispatch:
3498
3521
  SparseCPU, SparseCUDA: log1p_sparse_
3499
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_
3522
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_
3500
3523
  tags: pointwise
3501
3524
 
3502
3525
  - func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -3507,7 +3530,7 @@
3507
3530
  CPU, CUDA: log1p_out
3508
3531
  MPS: log1p_out_mps
3509
3532
  SparseCPU, SparseCUDA: log1p_sparse_out
3510
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_out
3533
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_out
3511
3534
  tags: pointwise
3512
3535
 
3513
3536
  - func: log2(Tensor self) -> Tensor
@@ -3899,11 +3922,10 @@
3899
3922
  tags: core
3900
3923
 
3901
3924
  # For normal naming convention this should be `mean.out`. However since we already have `mean.out` we have to rename this.
3902
- # FIXME: fix CI jobs and re-enable this
3903
- #- func: mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
3904
- # device_check: NoCheck # TensorIterator
3905
- # dispatch:
3906
- # CompositeExplicitAutograd: mean_dtype_out
3925
+ - func: mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
3926
+ device_check: NoCheck # TensorIterator
3927
+ dispatch:
3928
+ CompositeExplicitAutograd: mean_dtype_out
3907
3929
 
3908
3930
  - func: mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
3909
3931
  structured_delegate: mean.out
@@ -4095,7 +4117,7 @@
4095
4117
  variants: function, method
4096
4118
  dispatch:
4097
4119
  SparseCPU, SparseCUDA: _sparse_mm
4098
- SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm
4120
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm
4099
4121
  tags: core
4100
4122
 
4101
4123
  - func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
@@ -4105,7 +4127,7 @@
4105
4127
  CUDA: mm_out_cuda
4106
4128
  MPS: mm_out_mps
4107
4129
  SparseCPU, SparseCUDA: _sparse_mm_out
4108
- SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm_out
4130
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm_out
4109
4131
 
4110
4132
  - func: _int_mm(Tensor self, Tensor mat2) -> Tensor
4111
4133
  dispatch:
@@ -4121,6 +4143,7 @@
4121
4143
  dispatch:
4122
4144
  CPU: _convert_weight_to_int4pack_cpu
4123
4145
  CUDA: _convert_weight_to_int4pack_cuda
4146
+ MPS: _convert_weight_to_int4pack_mps
4124
4147
 
4125
4148
  - func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
4126
4149
  dispatch:
@@ -4165,7 +4188,7 @@
4165
4188
  variants: function, method
4166
4189
  dispatch:
4167
4190
  SparseCPU, SparseCUDA: mul_sparse
4168
- SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr
4191
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr
4169
4192
  MkldnnCPU: mkldnn_mul
4170
4193
  ZeroTensor: mul_zerotensor
4171
4194
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
@@ -4177,7 +4200,7 @@
4177
4200
  variants: method
4178
4201
  dispatch:
4179
4202
  SparseCPU, SparseCUDA: mul_sparse_
4180
- SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr_
4203
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr_
4181
4204
  MkldnnCPU: mkldnn_mul_
4182
4205
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Tensor
4183
4206
  tags: pointwise
@@ -4191,7 +4214,7 @@
4191
4214
  MPS: mul_out_mps
4192
4215
  SparseCPU: mul_out_sparse_cpu
4193
4216
  SparseCUDA: mul_out_sparse_cuda
4194
- SparseCsrCPU, SparseCsrCUDA: mul_out_sparse_csr
4217
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr
4195
4218
  MkldnnCPU: mkldnn_mul_out
4196
4219
  tags: pointwise
4197
4220
  # For C++ only, until we have conversion from C++ numbers to Tensor
@@ -4201,7 +4224,7 @@
4201
4224
  variants: function, method
4202
4225
  dispatch:
4203
4226
  CompositeExplicitAutograd: mul
4204
- SparseCsrCPU, SparseCsrCUDA: mul_scalar_sparse_csr
4227
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_scalar_sparse_csr
4205
4228
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Scalar
4206
4229
  tags: [core, pointwise]
4207
4230
 
@@ -4210,7 +4233,7 @@
4210
4233
  variants: method
4211
4234
  dispatch:
4212
4235
  CompositeExplicitAutograd: mul_
4213
- SparseCsrCPU, SparseCsrCUDA: mul__scalar_sparse_csr
4236
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul__scalar_sparse_csr
4214
4237
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Scalar
4215
4238
  autogen: mul.Scalar_out
4216
4239
  tags: pointwise
@@ -4530,9 +4553,11 @@
4530
4553
  - func: is_pinned(Tensor self, Device? device=None) -> bool
4531
4554
  variants: method
4532
4555
  dispatch:
4533
- NestedTensorCUDA, CUDA: is_pinned_cuda
4534
- MPS: is_pinned_mps
4535
- CompositeExplicitAutograd: is_pinned_default
4556
+ # the NestedTensor keys are necessary because NestedTensor has been removed
4557
+ # from the CompositeExplicitAutograd keyset see Note [NestedTensor Not Included in Backend Keys]
4558
+ CompositeExplicitAutograd, NestedTensorCPU: is_pinned
4559
+ SparseCsrCPU: is_pinned_sparse_compressed
4560
+ SparseCPU: is_pinned_sparse_coo
4536
4561
 
4537
4562
  # TODO: add a copy kwarg that guarantees that the tensor is put into fresh
4538
4563
  # pinned memory
@@ -4542,9 +4567,10 @@
4542
4567
  # Unlike pin_memory, this is guaranteed to give a new non-aliasing tensor
4543
4568
  - func: _pin_memory(Tensor self, Device? device=None) -> Tensor
4544
4569
  dispatch:
4545
- CUDA: _pin_memory_cuda
4546
- MPS: _pin_memory_mps
4547
- NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
4570
+ CompositeExplicitAutograd: _pin_memory
4571
+ NestedTensorCPU: _pin_memory_nested
4572
+ SparseCPU: _pin_memory_sparse_coo
4573
+ SparseCsrCPU: _pin_memory_sparse_compressed
4548
4574
  autogen: _pin_memory.out
4549
4575
 
4550
4576
  - func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
@@ -4558,27 +4584,27 @@
4558
4584
  dispatch:
4559
4585
  CompositeExplicitAutograd: rad2deg
4560
4586
  SparseCPU, SparseCUDA: rad2deg_sparse
4561
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr
4587
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr
4562
4588
 
4563
4589
  - func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
4564
4590
  variants: function, method
4565
4591
  dispatch:
4566
4592
  CompositeExplicitAutograd: rad2deg_
4567
4593
  SparseCPU, SparseCUDA: rad2deg_sparse_
4568
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_
4594
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_
4569
4595
 
4570
4596
  - func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4571
4597
  dispatch:
4572
4598
  CompositeExplicitAutograd: rad2deg_out
4573
4599
  SparseCPU, SparseCUDA: rad2deg_sparse_out
4574
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_out
4600
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_out
4575
4601
 
4576
4602
  - func: deg2rad(Tensor self) -> Tensor
4577
4603
  variants: function, method
4578
4604
  dispatch:
4579
4605
  CompositeExplicitAutograd: deg2rad
4580
4606
  SparseCPU, SparseCUDA: deg2rad_sparse
4581
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr
4607
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr
4582
4608
  tags: pointwise
4583
4609
 
4584
4610
  - func: deg2rad_(Tensor(a!) self) -> Tensor(a!)
@@ -4586,14 +4612,14 @@
4586
4612
  dispatch:
4587
4613
  CompositeExplicitAutograd: deg2rad_
4588
4614
  SparseCPU, SparseCUDA: deg2rad_sparse_
4589
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr_
4615
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_
4590
4616
  tags: pointwise
4591
4617
 
4592
4618
  - func: deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4593
4619
  dispatch:
4594
4620
  CompositeExplicitAutograd: deg2rad_out
4595
4621
  SparseCPU, SparseCUDA: deg2rad_sparse_out
4596
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr_out
4622
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_out
4597
4623
  tags: pointwise
4598
4624
 
4599
4625
  - func: scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -4811,7 +4837,7 @@
4811
4837
  variants: function, method
4812
4838
  dispatch:
4813
4839
  SparseCPU, SparseCUDA: neg_sparse
4814
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr
4840
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr
4815
4841
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg
4816
4842
  tags: [core, pointwise]
4817
4843
 
@@ -4821,7 +4847,7 @@
4821
4847
  variants: function, method
4822
4848
  dispatch:
4823
4849
  SparseCPU, SparseCUDA: neg_sparse_
4824
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_
4850
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_
4825
4851
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg_
4826
4852
  tags: pointwise
4827
4853
 
@@ -4833,7 +4859,7 @@
4833
4859
  CPU, CUDA: neg_out
4834
4860
  MPS: neg_out_mps
4835
4861
  SparseCPU, SparseCUDA: neg_out_sparse
4836
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_out
4862
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_out
4837
4863
  tags: pointwise
4838
4864
  # Alias for neg
4839
4865
 
@@ -4917,7 +4943,7 @@
4917
4943
  variants: function, method
4918
4944
  dispatch:
4919
4945
  SparseCPU, SparseCUDA: round_sparse
4920
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr
4946
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr
4921
4947
  tags: [core, pointwise]
4922
4948
 
4923
4949
  - func: round_(Tensor(a!) self) -> Tensor(a!)
@@ -4926,7 +4952,7 @@
4926
4952
  variants: function, method
4927
4953
  dispatch:
4928
4954
  SparseCPU, SparseCUDA: round_sparse_
4929
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_
4955
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_
4930
4956
  tags: pointwise
4931
4957
 
4932
4958
  - func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -4938,7 +4964,7 @@
4938
4964
  CUDA: round_out
4939
4965
  MPS: round_out_mps
4940
4966
  SparseCPU, SparseCUDA: round_sparse_out
4941
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_out
4967
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
4942
4968
  tags: pointwise
4943
4969
 
4944
4970
  - func: round.decimals(Tensor self, *, int decimals) -> Tensor
@@ -4981,7 +5007,7 @@
4981
5007
  QuantizedCUDA: relu_quantized_cuda
4982
5008
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu
4983
5009
  SparseCPU, SparseCUDA: relu_sparse
4984
- SparseCsrCPU, SparseCsrCUDA: relu_sparse_csr
5010
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr
4985
5011
  tags: [core, pointwise]
4986
5012
 
4987
5013
  - func: relu_(Tensor(a!) self) -> Tensor(a!)
@@ -4995,7 +5021,7 @@
4995
5021
  QuantizedCUDA: relu_quantized_cuda_
4996
5022
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu_
4997
5023
  SparseCPU, SparseCUDA: relu_sparse_
4998
- SparseCsrCPU, SparseCsrCUDA: relu_sparse_csr_
5024
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr_
4999
5025
  autogen: relu.out
5000
5026
  tags: pointwise
5001
5027
 
@@ -5128,7 +5154,7 @@
5128
5154
  device_guard: False
5129
5155
  dispatch:
5130
5156
  CompositeExplicitAutograd: select_symint
5131
- SparseCsrCPU, SparseCsrCUDA: select_sparse_csr
5157
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_sparse_csr
5132
5158
  NestedTensorCPU, NestedTensorCUDA: select_nested
5133
5159
  tags: core
5134
5160
 
@@ -5277,7 +5303,7 @@
5277
5303
  structured_delegate: sin.out
5278
5304
  variants: function, method
5279
5305
  dispatch:
5280
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
5306
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr
5281
5307
  SparseCPU, SparseCUDA: sin_sparse
5282
5308
  NestedTensorCPU, NestedTensorCUDA: sin_nested
5283
5309
  tags: [core, pointwise]
@@ -5287,7 +5313,7 @@
5287
5313
  structured_delegate: sin.out
5288
5314
  variants: function, method
5289
5315
  dispatch:
5290
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_
5316
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_
5291
5317
  SparseCPU, SparseCUDA: sin_sparse_
5292
5318
  tags: pointwise
5293
5319
 
@@ -5298,7 +5324,7 @@
5298
5324
  dispatch:
5299
5325
  CPU, CUDA: sin_out
5300
5326
  MPS: sin_out_mps
5301
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_out
5327
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_out
5302
5328
  SparseCPU, SparseCUDA: sin_sparse_out
5303
5329
  tags: pointwise
5304
5330
 
@@ -5325,7 +5351,7 @@
5325
5351
  variants: function, method
5326
5352
  dispatch:
5327
5353
  SparseCPU, SparseCUDA: sinh_sparse
5328
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr
5354
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr
5329
5355
  tags: [core, pointwise]
5330
5356
 
5331
5357
  - func: sinh_(Tensor(a!) self) -> Tensor(a!)
@@ -5334,7 +5360,7 @@
5334
5360
  variants: function, method
5335
5361
  dispatch:
5336
5362
  SparseCPU, SparseCUDA: sinh_sparse_
5337
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_
5363
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_
5338
5364
  tags: pointwise
5339
5365
 
5340
5366
  - func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5345,7 +5371,7 @@
5345
5371
  CPU, CUDA: sinh_out
5346
5372
  MPS: sinh_out_mps
5347
5373
  SparseCPU, SparseCUDA: sinh_sparse_out
5348
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_out
5374
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_out
5349
5375
 
5350
5376
  # Returns a copy of this `Variable` that is detached from its autograd graph.
5351
5377
  # This method is OK to call if the `Variable` is a view.
@@ -5732,7 +5758,7 @@
5732
5758
  dispatch:
5733
5759
  NestedTensorCPU: NestedTensor_sum_dim_CPU
5734
5760
  SparseCPU, SparseCUDA: sum_sparse_coo
5735
- SparseCsrCPU, SparseCsrCUDA: sum_sparse_compressed
5761
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_sparse_compressed
5736
5762
  tags: core
5737
5763
 
5738
5764
  - func: sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
@@ -5778,7 +5804,7 @@
5778
5804
  variants: function, method
5779
5805
  dispatch:
5780
5806
  SparseCPU, SparseCUDA: sqrt_sparse
5781
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr
5807
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr
5782
5808
  tags: [core, pointwise]
5783
5809
 
5784
5810
  - func: sqrt_(Tensor(a!) self) -> Tensor(a!)
@@ -5787,7 +5813,7 @@
5787
5813
  variants: function, method
5788
5814
  dispatch:
5789
5815
  SparseCPU, SparseCUDA: sqrt_sparse_
5790
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_
5816
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_
5791
5817
  tags: pointwise
5792
5818
 
5793
5819
  - func: sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5798,7 +5824,7 @@
5798
5824
  CPU, CUDA: sqrt_out
5799
5825
  MPS: sqrt_out_mps
5800
5826
  SparseCPU, SparseCUDA: sqrt_sparse_out
5801
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_out
5827
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
5802
5828
  tags: pointwise
5803
5829
 
5804
5830
  - func: square(Tensor self) -> Tensor
@@ -5936,7 +5962,7 @@
5936
5962
  variants: function, method
5937
5963
  dispatch:
5938
5964
  SparseCPU, SparseCUDA: tan_sparse
5939
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr
5965
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr
5940
5966
  tags: [core, pointwise]
5941
5967
 
5942
5968
  - func: tan_(Tensor(a!) self) -> Tensor(a!)
@@ -5945,7 +5971,7 @@
5945
5971
  variants: function, method
5946
5972
  dispatch:
5947
5973
  SparseCPU, SparseCUDA: tan_sparse_
5948
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_
5974
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_
5949
5975
  tags: pointwise
5950
5976
 
5951
5977
  - func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5956,7 +5982,7 @@
5956
5982
  CPU, CUDA: tan_out
5957
5983
  MPS: tan_out_mps
5958
5984
  SparseCPU, SparseCUDA: tan_sparse_out
5959
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_out
5985
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_out
5960
5986
  tags: pointwise
5961
5987
 
5962
5988
  - func: tanh(Tensor self) -> Tensor
@@ -5967,7 +5993,7 @@
5967
5993
  QuantizedCPU: tanh_quantized_cpu
5968
5994
  MkldnnCPU: mkldnn_tanh
5969
5995
  SparseCPU, SparseCUDA: tanh_sparse
5970
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr
5996
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr
5971
5997
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh
5972
5998
  tags: [core, pointwise]
5973
5999
 
@@ -5978,7 +6004,7 @@
5978
6004
  dispatch:
5979
6005
  MkldnnCPU: mkldnn_tanh_
5980
6006
  SparseCPU, SparseCUDA: tanh_sparse_
5981
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_
6007
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_
5982
6008
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh_
5983
6009
  tags: pointwise
5984
6010
 
@@ -5990,7 +6016,7 @@
5990
6016
  CPU, CUDA: tanh_out
5991
6017
  MPS: tanh_out_mps
5992
6018
  SparseCPU, SparseCUDA: tanh_sparse_out
5993
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_out
6019
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
5994
6020
  tags: pointwise
5995
6021
 
5996
6022
  - func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor
@@ -6027,7 +6053,7 @@
6027
6053
  CPU, CUDA: threshold_backward_out
6028
6054
  MPS: threshold_backward_out_mps
6029
6055
  SparseCPU, SparseCUDA: threshold_backward_sparse_out
6030
- SparseCsrCPU, SparseCsrCUDA: threshold_backward_sparse_compressed_out
6056
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed_out
6031
6057
 
6032
6058
  - func: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor
6033
6059
  variants: function
@@ -6035,7 +6061,7 @@
6035
6061
  dispatch:
6036
6062
  MkldnnCPU: mkldnn_relu_backward
6037
6063
  SparseCPU, SparseCUDA: threshold_backward_sparse
6038
- SparseCsrCPU, SparseCsrCUDA: threshold_backward_sparse_compressed
6064
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed
6039
6065
  NestedTensorCPU, NestedTensorCUDA: threshold_backwards_nested
6040
6066
  tags: pointwise
6041
6067
 
@@ -6185,12 +6211,12 @@
6185
6211
  CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
6186
6212
  autogen: _nested_view_from_buffer_copy.out
6187
6213
 
6188
- - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
6214
+ - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor(a)
6189
6215
  variants: function
6190
6216
  device_check: NoCheck
6191
6217
  dispatch: {}
6192
6218
 
6193
- - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
6219
+ - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor
6194
6220
  variants: function
6195
6221
  device_check: NoCheck
6196
6222
  tags: view_copy
@@ -6227,6 +6253,16 @@
6227
6253
  device_check: NoCheck
6228
6254
  dispatch: {}
6229
6255
 
6256
+ - func: _nested_get_min_seqlen(Tensor self) -> Tensor
6257
+ variants: function
6258
+ device_check: NoCheck
6259
+ dispatch: {}
6260
+
6261
+ - func: _nested_get_max_seqlen(Tensor self) -> Tensor
6262
+ variants: function
6263
+ device_check: NoCheck
6264
+ dispatch: {}
6265
+
6230
6266
  - func: _nested_get_jagged_dummy(Tensor any) -> Tensor
6231
6267
  category_override: dummy
6232
6268
  dispatch: {}
@@ -6251,7 +6287,7 @@
6251
6287
  variants: function, method
6252
6288
  dispatch:
6253
6289
  SparseCPU, SparseCUDA: trunc_sparse
6254
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
6290
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr
6255
6291
  tags: [core, pointwise]
6256
6292
 
6257
6293
  - func: trunc_(Tensor(a!) self) -> Tensor(a!)
@@ -6260,7 +6296,7 @@
6260
6296
  variants: function, method
6261
6297
  dispatch:
6262
6298
  SparseCPU, SparseCUDA: trunc_sparse_
6263
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_
6299
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_
6264
6300
  tags: pointwise
6265
6301
 
6266
6302
  - func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -6271,7 +6307,7 @@
6271
6307
  CPU, CUDA: trunc_out
6272
6308
  MPS: trunc_out_mps
6273
6309
  SparseCPU, SparseCUDA: trunc_sparse_out
6274
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_out
6310
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
6275
6311
  tags: pointwise
6276
6312
  # Alias for trunc
6277
6313
 
@@ -6443,6 +6479,7 @@
6443
6479
  variants: function, method
6444
6480
  dispatch:
6445
6481
  CPU, CUDA, MPS: where
6482
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_where
6446
6483
  tags: [core, pointwise]
6447
6484
 
6448
6485
  - func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -6780,7 +6817,7 @@
6780
6817
  dispatch:
6781
6818
  CompositeExplicitAutograd: clone
6782
6819
  SparseCPU, SparseCUDA: clone_sparse
6783
- SparseCsrCPU, SparseCsrCUDA: clone_sparse_compressed
6820
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: clone_sparse_compressed
6784
6821
  MkldnnCPU: mkldnn_clone
6785
6822
  QuantizedCPU, QuantizedCUDA: quantized_clone
6786
6823
  NestedTensorCPU, NestedTensorCUDA: clone_nested
@@ -6804,7 +6841,7 @@
6804
6841
  variants: function, method
6805
6842
  dispatch:
6806
6843
  SparseCPU, SparseCUDA: resize_as_sparse_
6807
- SparseCsrCPU, SparseCsrCUDA: resize_as_sparse_compressed_
6844
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: resize_as_sparse_compressed_
6808
6845
  autogen: resize_as_sparse, resize_as_sparse.out
6809
6846
 
6810
6847
  - func: zero_(Tensor(a!) self) -> Tensor(a!)
@@ -6962,7 +6999,7 @@
6962
6999
  dispatch:
6963
7000
  SparseCPU: addmm_sparse_dense_cpu
6964
7001
  SparseCUDA: addmm_sparse_dense_cuda
6965
- SparseCsrCPU, SparseCsrCUDA: addmm_sparse_compressed_dense
7002
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: addmm_sparse_compressed_dense
6966
7003
  tags: core
6967
7004
 
6968
7005
  - func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
@@ -6984,12 +7021,12 @@
6984
7021
  structured_delegate: _addmm_activation.out
6985
7022
  variants: function, method
6986
7023
 
6987
- - func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)
7024
+ - func: _scaled_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
6988
7025
  variants: function
6989
7026
  dispatch:
6990
7027
  CUDA: _scaled_mm_cuda
6991
7028
 
6992
- - func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
7029
+ - func: _scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
6993
7030
  variants: function
6994
7031
  dispatch:
6995
7032
  CUDA: _scaled_mm_out_cuda
@@ -7184,7 +7221,7 @@
7184
7221
  variants: method
7185
7222
  dispatch:
7186
7223
  SparseCPU, SparseCUDA: sparse_mask
7187
- SparseCsrCPU, SparseCsrCUDA: sparse_mask_sparse_compressed
7224
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_mask_sparse_compressed
7188
7225
  autogen: sparse_mask.out
7189
7226
 
7190
7227
  - func: _sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor
@@ -7204,7 +7241,7 @@
7204
7241
  variants: method
7205
7242
  dispatch:
7206
7243
  SparseCPU, SparseCUDA: sparse_to_dense
7207
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_dense
7244
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_dense
7208
7245
  MkldnnCPU: mkldnn_to_dense
7209
7246
  autogen: _to_dense.out
7210
7247
 
@@ -7385,7 +7422,7 @@
7385
7422
  dispatch:
7386
7423
  CPU, CUDA: dense_to_sparse
7387
7424
  SparseCPU, SparseCUDA: sparse_coo_to_sparse
7388
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
7425
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7389
7426
  autogen: _to_sparse.sparse_dim_out
7390
7427
 
7391
7428
  - func: to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
@@ -7397,7 +7434,7 @@
7397
7434
  dispatch:
7398
7435
  CPU, CUDA: dense_to_sparse
7399
7436
  SparseCPU, SparseCUDA: sparse_coo_to_sparse
7400
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
7437
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7401
7438
  autogen: _to_sparse.out
7402
7439
 
7403
7440
  - func: to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
@@ -7409,7 +7446,7 @@
7409
7446
  dispatch:
7410
7447
  CPU, CUDA: dense_to_sparse_csr
7411
7448
  SparseCPU, SparseCUDA: coo_to_sparse_csr
7412
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csr
7449
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_csr
7413
7450
  autogen: _to_sparse_csr.out
7414
7451
 
7415
7452
  - func: to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
@@ -7421,7 +7458,7 @@
7421
7458
  dispatch:
7422
7459
  CPU, CUDA: dense_to_sparse_csc
7423
7460
  SparseCPU, SparseCUDA: coo_to_sparse_csc
7424
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csc
7461
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_csc
7425
7462
  autogen: _to_sparse_csc.out
7426
7463
 
7427
7464
  - func: to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
@@ -7433,7 +7470,7 @@
7433
7470
  dispatch:
7434
7471
  CPU, CUDA: dense_to_sparse_bsr
7435
7472
  SparseCPU, SparseCUDA: coo_to_sparse_bsr
7436
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsr
7473
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_bsr
7437
7474
  autogen: _to_sparse_bsr.out
7438
7475
 
7439
7476
  - func: to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
@@ -7445,7 +7482,7 @@
7445
7482
  dispatch:
7446
7483
  CPU, CUDA: dense_to_sparse_bsc
7447
7484
  SparseCPU, SparseCUDA: coo_to_sparse_bsc
7448
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsc
7485
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_bsc
7449
7486
  autogen: _to_sparse_bsc.out
7450
7487
 
7451
7488
  - func: _to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor)
@@ -8431,21 +8468,21 @@
8431
8468
  device_check: NoCheck # TensorIterator
8432
8469
  variants: method, function
8433
8470
  dispatch:
8434
- CPU, CUDA: __lshift__
8471
+ CPU, CUDA, MPS: __lshift__
8435
8472
  tags: pointwise
8436
8473
 
8437
8474
  - func: __lshift__.Tensor(Tensor self, Tensor other) -> Tensor
8438
8475
  device_check: NoCheck # TensorIterator
8439
8476
  variants: method, function
8440
8477
  dispatch:
8441
- CPU, CUDA: __lshift__
8478
+ CPU, CUDA, MPS: __lshift__
8442
8479
  tags: pointwise
8443
8480
 
8444
8481
  - func: __ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8445
8482
  device_check: NoCheck # TensorIterator
8446
8483
  variants: method
8447
8484
  dispatch:
8448
- CPU, CUDA: __ilshift__
8485
+ CPU, CUDA, MPS: __ilshift__
8449
8486
  autogen: __lshift__.Scalar_out
8450
8487
  tags: pointwise
8451
8488
 
@@ -8453,7 +8490,7 @@
8453
8490
  device_check: NoCheck # TensorIterator
8454
8491
  variants: method
8455
8492
  dispatch:
8456
- CPU, CUDA: __ilshift__
8493
+ CPU, CUDA, MPS: __ilshift__
8457
8494
  autogen: __lshift__.Tensor_out
8458
8495
  tags: pointwise
8459
8496
 
@@ -8474,7 +8511,7 @@
8474
8511
  structured: True
8475
8512
  structured_inherits: TensorIteratorBase
8476
8513
  dispatch:
8477
- CPU, CUDA: bitwise_left_shift_out
8514
+ CPU, CUDA, MPS: bitwise_left_shift_out
8478
8515
  tags: pointwise
8479
8516
 
8480
8517
  - func: bitwise_left_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor
@@ -8510,28 +8547,28 @@
8510
8547
  device_check: NoCheck # TensorIterator
8511
8548
  variants: method, function
8512
8549
  dispatch:
8513
- CPU, CUDA: __rshift__
8550
+ CPU, CUDA, MPS: __rshift__
8514
8551
  tags: pointwise
8515
8552
 
8516
8553
  - func: __rshift__.Tensor(Tensor self, Tensor other) -> Tensor
8517
8554
  device_check: NoCheck # TensorIterator
8518
8555
  variants: method, function
8519
8556
  dispatch:
8520
- CPU, CUDA: __rshift__
8557
+ CPU, CUDA, MPS: __rshift__
8521
8558
  tags: pointwise
8522
8559
 
8523
8560
  - func: __irshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8524
8561
  device_check: NoCheck # TensorIterator
8525
8562
  variants: method
8526
8563
  dispatch:
8527
- CPU, CUDA: __irshift__
8564
+ CPU, CUDA, MPS: __irshift__
8528
8565
  autogen: __rshift__.Scalar_out
8529
8566
 
8530
8567
  - func: __irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
8531
8568
  device_check: NoCheck # TensorIterator
8532
8569
  variants: method
8533
8570
  dispatch:
8534
- CPU, CUDA: __irshift__
8571
+ CPU, CUDA, MPS: __irshift__
8535
8572
  autogen: __rshift__.Tensor_out
8536
8573
 
8537
8574
  - func: bitwise_right_shift.Tensor(Tensor self, Tensor other) -> Tensor
@@ -8551,7 +8588,7 @@
8551
8588
  structured: True
8552
8589
  structured_inherits: TensorIteratorBase
8553
8590
  dispatch:
8554
- CPU, CUDA: bitwise_right_shift_out
8591
+ CPU, CUDA, MPS: bitwise_right_shift_out
8555
8592
  tags: pointwise
8556
8593
 
8557
8594
  - func: bitwise_right_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor
@@ -8858,6 +8895,7 @@
8858
8895
  variants: method, function
8859
8896
  dispatch:
8860
8897
  QuantizedCPU: eq_quantized_cpu
8898
+ NestedTensorCPU, NestedTensorCUDA: eq_tensor_nested
8861
8899
  tags: [core, pointwise]
8862
8900
 
8863
8901
  - func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
@@ -9502,7 +9540,7 @@
9502
9540
  variants: method, function
9503
9541
  dispatch:
9504
9542
  SparseCPU, SparseCUDA: erfinv_sparse
9505
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr
9543
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr
9506
9544
  tags: pointwise
9507
9545
 
9508
9546
  - func: erfinv_(Tensor(a!) self) -> Tensor(a!)
@@ -9511,7 +9549,7 @@
9511
9549
  variants: method
9512
9550
  dispatch:
9513
9551
  SparseCPU, SparseCUDA: erfinv_sparse_
9514
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_
9552
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_
9515
9553
  tags: pointwise
9516
9554
 
9517
9555
  - func: erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9522,7 +9560,7 @@
9522
9560
  CPU, CUDA: erfinv_out
9523
9561
  MPS: erfinv_out_mps
9524
9562
  SparseCPU, SparseCUDA: erfinv_sparse_out
9525
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_out
9563
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_out
9526
9564
  tags: pointwise
9527
9565
 
9528
9566
  - func: i0(Tensor self) -> Tensor
@@ -9548,7 +9586,7 @@
9548
9586
  variants: function, method
9549
9587
  dispatch:
9550
9588
  SparseCPU, SparseCUDA: sign_sparse
9551
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr
9589
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr
9552
9590
  tags: [core, pointwise]
9553
9591
 
9554
9592
  - func: sign_(Tensor(a!) self) -> Tensor(a!)
@@ -9557,7 +9595,7 @@
9557
9595
  variants: method
9558
9596
  dispatch:
9559
9597
  SparseCPU, SparseCUDA: sign_sparse_
9560
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_
9598
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_
9561
9599
  tags: pointwise
9562
9600
 
9563
9601
  - func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9568,7 +9606,7 @@
9568
9606
  CPU, CUDA: sign_out
9569
9607
  MPS: sign_out_mps
9570
9608
  SparseCPU, SparseCUDA: sign_sparse_out
9571
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_out
9609
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_out
9572
9610
  tags: pointwise
9573
9611
 
9574
9612
  - func: signbit(Tensor self) -> Tensor
@@ -9576,7 +9614,7 @@
9576
9614
  structured_delegate: signbit.out
9577
9615
  dispatch:
9578
9616
  SparseCPU, SparseCUDA: signbit_sparse
9579
- SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr
9617
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr
9580
9618
  tags: pointwise
9581
9619
 
9582
9620
  - func: signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9587,7 +9625,7 @@
9587
9625
  CUDA: signbit_out
9588
9626
  MPS: signbit_out_mps
9589
9627
  SparseCPU, SparseCUDA: signbit_sparse_out
9590
- SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr_out
9628
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr_out
9591
9629
  tags: pointwise
9592
9630
 
9593
9631
  - func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor
@@ -10038,9 +10076,10 @@
10038
10076
  - func: argsort.stable(Tensor self, *, bool stable, int dim=-1, bool descending=False) -> Tensor
10039
10077
  device_check: NoCheck # TensorIterator
10040
10078
  variants: method, function
10041
- dispatch:
10042
- CPU, CUDA, MPS: argsort_stable
10043
- autogen: argsort.stable_out
10079
+
10080
+ - func: argsort.stable_out(Tensor self, *, bool stable, int dim=-1, bool descending=False, Tensor(a!) out) -> Tensor(a!)
10081
+ device_check: NoCheck # TensorIterator
10082
+ variants: function
10044
10083
 
10045
10084
  - func: argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor
10046
10085
  variants: method, function
@@ -10220,7 +10259,7 @@
10220
10259
  CPU, CUDA: normal_
10221
10260
  MPS: normal_mps_
10222
10261
  Meta: normal_meta_
10223
- SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
10262
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: normal_sparse_csr_
10224
10263
  NestedTensorCPU, NestedTensorCUDA: normal_nested_
10225
10264
  autogen: normal.out
10226
10265
 
@@ -13024,7 +13063,7 @@
13024
13063
  CompositeExplicitAutograd: isinf
13025
13064
  SparseCPU, SparseCUDA: isinf_sparse
13026
13065
  SparseMeta: isinf_sparse_meta
13027
- SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
13066
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isinf_sparse_csr
13028
13067
  autogen: isinf.out
13029
13068
  tags: [core, pointwise]
13030
13069
 
@@ -13038,7 +13077,7 @@
13038
13077
  structured_delegate: isposinf.out
13039
13078
  dispatch:
13040
13079
  SparseCPU, SparseCUDA: isposinf_sparse
13041
- SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr
13080
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr
13042
13081
  tags: pointwise
13043
13082
 
13044
13083
  - func: isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -13047,7 +13086,7 @@
13047
13086
  dispatch:
13048
13087
  CPU, CUDA: isposinf_out
13049
13088
  SparseCPU, SparseCUDA: isposinf_sparse_out
13050
- SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr_out
13089
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr_out
13051
13090
  tags: pointwise
13052
13091
 
13053
13092
  - func: isneginf(Tensor self) -> Tensor
@@ -13055,7 +13094,7 @@
13055
13094
  structured_delegate: isneginf.out
13056
13095
  dispatch:
13057
13096
  SparseCPU, SparseCUDA: isneginf_sparse
13058
- SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr
13097
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr
13059
13098
  tags: pointwise
13060
13099
 
13061
13100
  - func: isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -13064,7 +13103,7 @@
13064
13103
  dispatch:
13065
13104
  CPU, CUDA: isneginf_out
13066
13105
  SparseCPU, SparseCUDA: isneginf_sparse_out
13067
- SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr_out
13106
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr_out
13068
13107
  tags: pointwise
13069
13108
 
13070
13109
  # NOTE [_add_batch_dim and _remove_batch_dim]
@@ -13787,10 +13826,16 @@
13787
13826
  - func: linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots)
13788
13827
  python_module: linalg
13789
13828
  variants: function
13829
+ dispatch:
13830
+ CompositeImplicitAutograd: linalg_lu_factor
13831
+ MPS: linalg_lu_factor_mps
13790
13832
 
13791
13833
  - func: linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots)
13792
13834
  python_module: linalg
13793
13835
  variants: function
13836
+ dispatch:
13837
+ CompositeImplicitAutograd: linalg_lu_factor_out
13838
+ MPS: linalg_lu_factor_out_mps
13794
13839
 
13795
13840
  - func: linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)
13796
13841
  python_module: linalg
@@ -14176,6 +14221,11 @@
14176
14221
  - func: linalg_solve(Tensor A, Tensor B, *, bool left=True) -> Tensor
14177
14222
  python_module: linalg
14178
14223
 
14224
+ - func: _spsolve(Tensor A, Tensor B, *, bool left=True) -> Tensor
14225
+ python_module: sparse
14226
+ dispatch:
14227
+ SparseCsrCUDA: _sparse_csr_linear_solve
14228
+
14179
14229
  - func: linalg_solve.out(Tensor A, Tensor B, *, bool left=True, Tensor(a!) out) -> Tensor(a!)
14180
14230
  python_module: linalg
14181
14231
 
@@ -14352,7 +14402,7 @@
14352
14402
  CPU, CUDA: _segment_reduce_backward_kernel
14353
14403
  autogen: _segment_reduce_backward.out
14354
14404
 
14355
- - func: pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0) -> Tensor
14405
+ - func: pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0, str padding_side="right") -> Tensor
14356
14406
  python_module: nn
14357
14407
  variants: function
14358
14408
 
@@ -14458,7 +14508,7 @@
14458
14508
  variants: function
14459
14509
  dispatch:
14460
14510
  CompositeExplicitAutogradNonFunctional: select_copy_symint
14461
- SparseCsrCPU, SparseCsrCUDA: select_copy_sparse_csr
14511
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_copy_sparse_csr
14462
14512
  tags: view_copy
14463
14513
  autogen: select_copy.int_out
14464
14514
 
@@ -14648,11 +14698,13 @@
14648
14698
  variants: function
14649
14699
  dispatch:
14650
14700
  CUDA: _fbgemm_jagged_to_padded_dense_forward
14701
+ CPU: _jagged_to_padded_dense_forward_cpu
14651
14702
 
14652
14703
  - func: _padded_dense_to_jagged_forward(Tensor dense, Tensor[] offsets, SymInt? total_L=None) -> Tensor
14653
14704
  variants: function
14654
14705
  dispatch:
14655
14706
  CUDA: _fbgemm_dense_to_jagged_forward_symint
14707
+ CPU: _padded_dense_to_jagged_forward_cpu
14656
14708
 
14657
14709
  - func: _nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor
14658
14710
  dispatch:
@@ -14660,6 +14712,11 @@
14660
14712
  NestedTensorCUDA: NestedTensor_softmax_dropout_cuda
14661
14713
  tags: nondeterministic_seeded
14662
14714
 
14715
+ - func: _safe_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
14716
+ dispatch:
14717
+ CompositeExplicitAutograd: _safe_softmax
14718
+ NestedTensorCPU, NestedTensorCUDA: _safe_softmax
14719
+
14663
14720
  # Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
14664
14721
  - func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
14665
14722
  variants: function
@@ -14674,24 +14731,29 @@
14674
14731
  CUDA, NestedTensorCUDA: native_multi_head_attention_cuda
14675
14732
  autogen: _native_multi_head_attention.out
14676
14733
 
14677
- - func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> Tensor
14734
+ - func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> Tensor
14678
14735
  python_module: nn
14679
14736
  variants: function
14680
14737
  autogen: scaled_dot_product_attention.out
14681
14738
  tags: nondeterministic_seeded
14682
14739
 
14683
14740
  # This aten function is kept so that we can test the choice function from Python
14684
- - func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> int
14741
+ - func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> int
14685
14742
  dispatch:
14686
14743
  Meta: _fused_sdp_choice_meta
14687
14744
  CPU, NestedTensorCPU: _fused_sdp_choice_cpp
14688
14745
  CUDA, NestedTensorCUDA: _fused_sdp_choice_cuda
14689
14746
  tags: nondeterministic_seeded
14690
14747
 
14691
- - func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
14748
+ - func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None, bool enable_gqa=False) -> (Tensor, Tensor)
14692
14749
  variants: function
14693
14750
  tags: nondeterministic_seeded
14694
14751
 
14752
+ - func: _scaled_dot_product_attention_math_for_mps(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
14753
+ dispatch:
14754
+ MPS: _scaled_dot_product_attention_math_mps
14755
+ tags: nondeterministic_seeded
14756
+
14695
14757
  - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14696
14758
  dispatch:
14697
14759
  CUDA: _scaled_dot_product_flash_attention_cuda
@@ -14703,6 +14765,11 @@
14703
14765
  CPU: _scaled_dot_product_flash_attention_cpu
14704
14766
  tags: nondeterministic_seeded
14705
14767
 
14768
+ - func: _scaled_dot_product_fused_attention_overrideable(Tensor query, Tensor key, Tensor value, Tensor? attn_bias=None, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14769
+ dispatch:
14770
+ CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable
14771
+ tags: nondeterministic_seeded
14772
+
14706
14773
  - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14707
14774
  device_check: NoCheck
14708
14775
  variants: function
@@ -14716,6 +14783,12 @@
14716
14783
  dispatch:
14717
14784
  CPU: _scaled_dot_product_flash_attention_cpu_backward
14718
14785
 
14786
+ - func: _scaled_dot_product_fused_attention_overrideable_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor attn_bias, bool[4] grad_input_mask, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value, Tensor grad_attn_bias)
14787
+ device_check: NoCheck
14788
+ variants: function
14789
+ dispatch:
14790
+ CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable_backward
14791
+
14719
14792
  - func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
14720
14793
  dispatch:
14721
14794
  CUDA: _scaled_dot_product_efficient_attention_cuda
@@ -14728,12 +14801,12 @@
14728
14801
  CUDA: _scaled_dot_product_efficient_attention_backward_cuda
14729
14802
  tags: nondeterministic_seeded
14730
14803
 
14731
- - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14804
+ - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14732
14805
  dispatch:
14733
14806
  CUDA: _scaled_dot_product_cudnn_attention_cuda
14734
14807
  tags: nondeterministic_seeded
14735
14808
 
14736
- - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14809
+ - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14737
14810
  dispatch:
14738
14811
  CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
14739
14812
  tags: nondeterministic_seeded
@@ -15563,6 +15636,7 @@
15563
15636
  dispatch:
15564
15637
  CPU: _fused_adam_kernel_cpu_
15565
15638
  CUDA: _fused_adam_kernel_cuda_
15639
+ MPS: _fused_adam_kernel_mps_
15566
15640
  autogen: _fused_adam, _fused_adam.out
15567
15641
 
15568
15642
  - func: _fused_adam_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15573,6 +15647,7 @@
15573
15647
  dispatch:
15574
15648
  CPU: _fused_adam_kernel_cpu_
15575
15649
  CUDA: _fused_adam_kernel_cuda_
15650
+ MPS: _fused_adam_kernel_mps_
15576
15651
  autogen: _fused_adam.tensor_lr, _fused_adam.tensor_lr_out
15577
15652
 
15578
15653
  - func: _fused_adamw_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15581,6 +15656,7 @@
15581
15656
  dispatch:
15582
15657
  CPU: _fused_adamw_kernel_cpu_
15583
15658
  CUDA: _fused_adamw_kernel_cuda_
15659
+ MPS: _fused_adamw_kernel_mps_
15584
15660
  autogen: _fused_adamw, _fused_adamw.out
15585
15661
 
15586
15662
  - func: _fused_adamw_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15591,6 +15667,7 @@
15591
15667
  dispatch:
15592
15668
  CPU: _fused_adamw_kernel_cpu_
15593
15669
  CUDA: _fused_adamw_kernel_cuda_
15670
+ MPS: _fused_adamw_kernel_mps_
15594
15671
  autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
15595
15672
 
15596
15673
  - func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15599,6 +15676,7 @@
15599
15676
  dispatch:
15600
15677
  CPU: _fused_sgd_kernel_cpu_
15601
15678
  CUDA: _fused_sgd_kernel_cuda_
15679
+ MPS: _fused_sgd_kernel_mps_
15602
15680
  autogen: _fused_sgd, _fused_sgd.out
15603
15681
 
15604
15682
  - func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15609,6 +15687,7 @@
15609
15687
  dispatch:
15610
15688
  CPU: _fused_sgd_kernel_cpu_
15611
15689
  CUDA: _fused_sgd_kernel_cuda_
15690
+ MPS: _fused_sgd_kernel_mps_
15612
15691
  autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
15613
15692
 
15614
15693
  - func: _fused_adagrad_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor(d!)[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()