torch-rb 0.22.2 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -403,16 +403,14 @@
403
403
  device_check: NoCheck # TensorIterator
404
404
  variants: function, method
405
405
  dispatch:
406
- CPU, CUDA: angle
407
- MPS: angle_mps
406
+ CPU, CUDA, MPS: angle
408
407
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr
409
408
  tags: pointwise
410
409
 
411
410
  - func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
412
411
  device_check: NoCheck # TensorIterator
413
412
  dispatch:
414
- CPU, CUDA: angle_out
415
- MPS: angle_out_mps
413
+ CPU, CUDA, MPS: angle_out
416
414
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr_out
417
415
  tags: pointwise
418
416
 
@@ -706,6 +704,7 @@
706
704
  variants: function, method
707
705
  dispatch:
708
706
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_all
707
+ tags: reduction
709
708
 
710
709
 
711
710
  - func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
@@ -715,6 +714,7 @@
715
714
  cpp_no_default_args: ['dim']
716
715
  dispatch:
717
716
  CompositeExplicitAutograd: all_dims_default
717
+ tags: reduction
718
718
 
719
719
  - func: all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
720
720
  device_check: NoCheck # TensorIterator
@@ -723,6 +723,7 @@
723
723
  CPU, CUDA: all_out
724
724
  MPS: all_out_mps
725
725
  MTIA: all_out_mtia
726
+ tags: reduction
726
727
 
727
728
  - func: all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
728
729
  device_check: NoCheck # TensorIterator
@@ -731,13 +732,16 @@
731
732
  CPU, CUDA: all_dims_out
732
733
  CompositeExplicitAutograd: all_dims_out_default
733
734
  cpp_no_default_args: ['dim']
735
+ tags: reduction
734
736
 
735
737
  - func: all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
736
738
  device_check: NoCheck # TensorIterator
737
739
  variants: function, method
740
+ tags: reduction
738
741
 
739
742
  - func: all.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
740
743
  device_check: NoCheck # TensorIterator
744
+ tags: reduction
741
745
 
742
746
  - func: allclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> bool
743
747
  variants: function, method
@@ -749,14 +753,14 @@
749
753
  device_check: NoCheck # TensorIterator
750
754
  structured_delegate: any.out
751
755
  variants: function, method
752
- tags: core
756
+ tags: [core, reduction]
753
757
 
754
758
  - func: any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
755
759
  device_check: NoCheck # TensorIterator
756
760
  structured_delegate: any.dims_out
757
761
  variants: function, method
758
762
  cpp_no_default_args: ['dim']
759
- tags: core
763
+ tags: [core, reduction]
760
764
  dispatch:
761
765
  CompositeExplicitAutograd: any_dims_default
762
766
 
@@ -766,6 +770,7 @@
766
770
  dispatch:
767
771
  CPU, CUDA: any_out
768
772
  MPS: any_out_mps
773
+ tags: reduction
769
774
 
770
775
  - func: any.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
771
776
  device_check: NoCheck # TensorIterator
@@ -774,13 +779,16 @@
774
779
  CPU, CUDA: any_dims_out
775
780
  CompositeExplicitAutograd: any_dims_out_default
776
781
  cpp_no_default_args: ['dim']
782
+ tags: reduction
777
783
 
778
784
  - func: any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
779
785
  device_check: NoCheck # TensorIterator
780
786
  variants: function, method
787
+ tags: reduction
781
788
 
782
789
  - func: any.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
783
790
  device_check: NoCheck # TensorIterator
791
+ tags: reduction
784
792
 
785
793
  - func: arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
786
794
  dispatch:
@@ -826,25 +834,27 @@
826
834
  structured_delegate: argmax.out
827
835
  device_check: NoCheck # TensorIterator
828
836
  variants: function, method
829
- tags: core
837
+ tags: [core, reduction]
830
838
 
831
839
  - func: argmax.out(Tensor self, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
832
840
  structured: True
833
841
  dispatch:
834
842
  CPU, CUDA: argmax_out
835
843
  MPS: argmax_out_mps
844
+ tags: reduction
836
845
 
837
846
  - func: argmin(Tensor self, int? dim=None, bool keepdim=False) -> Tensor
838
847
  structured_delegate: argmin.out
839
848
  device_check: NoCheck # TensorIterator
840
849
  variants: function, method
841
- tags: core
850
+ tags: [core, reduction]
842
851
 
843
852
  - func: argmin.out(Tensor self, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
844
853
  structured: True
845
854
  dispatch:
846
855
  CPU, CUDA: argmin_out
847
856
  MPS: argmin_out_mps
857
+ tags: reduction
848
858
 
849
859
  - func: acosh(Tensor self) -> Tensor
850
860
  variants: function, method
@@ -1370,6 +1380,7 @@
1370
1380
  dispatch:
1371
1381
  SparseCPU: bmm_sparse_cpu
1372
1382
  SparseCUDA: bmm_sparse_cuda
1383
+ SparseMPS: bmm_sparse_mps
1373
1384
  NestedTensorCPU: bmm_nested
1374
1385
  NestedTensorCUDA: bmm_nested_cuda
1375
1386
  tags: core
@@ -1385,6 +1396,7 @@
1385
1396
  MTIA: bmm_out_mtia
1386
1397
  SparseCPU: bmm_out_sparse_cpu
1387
1398
  SparseCUDA: bmm_out_sparse_cuda
1399
+ SparseMPS: bmm_out_sparse_mps
1388
1400
  SparseCsrCUDA: bmm_out_sparse_csr_cuda
1389
1401
 
1390
1402
  - func: bmm.dtype(Tensor self, Tensor mat2, ScalarType out_dtype) -> Tensor
@@ -1409,12 +1421,12 @@
1409
1421
  - func: _sparse_broadcast_to(Tensor(a) self, int[] size) -> Tensor(a)
1410
1422
  variants: function
1411
1423
  dispatch:
1412
- SparseCPU, SparseCUDA: sparse_broadcast_to
1424
+ SparseCPU, SparseCUDA, SparseMPS: sparse_broadcast_to
1413
1425
 
1414
1426
  - func: cat(Tensor[] tensors, int dim=0) -> Tensor
1415
1427
  structured_delegate: cat.out
1416
1428
  dispatch:
1417
- SparseCPU, SparseCUDA: cat_sparse
1429
+ SparseCPU, SparseCUDA, SparseMPS: cat_sparse
1418
1430
  QuantizedCPU: cat_quantized_cpu
1419
1431
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: cat_nested
1420
1432
  tags: core
@@ -1551,8 +1563,7 @@
1551
1563
  structured: True
1552
1564
  structured_inherits: TensorIteratorBase
1553
1565
  dispatch:
1554
- CPU, CUDA, MTIA: clamp_out
1555
- MPS: clamp_out_mps
1566
+ CPU, CUDA, MTIA, MPS: clamp_out
1556
1567
  tags: pointwise
1557
1568
 
1558
1569
  - func: clamp.Tensor_out(Tensor self, Tensor? min=None, Tensor? max=None, *, Tensor(a!) out) -> Tensor(a!)
@@ -1560,8 +1571,7 @@
1560
1571
  structured: True
1561
1572
  structured_inherits: TensorIteratorBase
1562
1573
  dispatch:
1563
- CPU, CUDA: clamp_Tensor_out
1564
- MPS: clamp_Tensor_out_mps
1574
+ CPU, CUDA, MPS: clamp_Tensor_out
1565
1575
  tags: pointwise
1566
1576
 
1567
1577
  - func: clamp_max(Tensor self, Scalar max) -> Tensor
@@ -1591,8 +1601,7 @@
1591
1601
  structured: True
1592
1602
  structured_inherits: TensorIteratorBase
1593
1603
  dispatch:
1594
- CPU, CUDA, MTIA: clamp_max_out
1595
- MPS: clamp_max_out_mps
1604
+ CPU, CUDA, MTIA, MPS: clamp_max_out
1596
1605
  tags: pointwise
1597
1606
 
1598
1607
  - func: clamp_max.Tensor_out(Tensor self, Tensor max, *, Tensor(a!) out) -> Tensor(a!)
@@ -1600,8 +1609,7 @@
1600
1609
  structured: True
1601
1610
  structured_inherits: TensorIteratorBase
1602
1611
  dispatch:
1603
- CPU, CUDA: clamp_max_Tensor_out
1604
- MPS: clamp_max_Tensor_out_mps
1612
+ CPU, CUDA, MPS: clamp_max_Tensor_out
1605
1613
  tags: pointwise
1606
1614
 
1607
1615
  - func: clamp_min(Tensor self, Scalar min) -> Tensor
@@ -1631,8 +1639,7 @@
1631
1639
  structured: True
1632
1640
  structured_inherits: TensorIteratorBase
1633
1641
  dispatch:
1634
- CPU, CUDA, MTIA: clamp_min_out
1635
- MPS: clamp_min_out_mps
1642
+ CPU, CUDA, MTIA, MPS: clamp_min_out
1636
1643
  tags: pointwise
1637
1644
 
1638
1645
  - func: clamp_min.Tensor_out(Tensor self, Tensor min, *, Tensor(a!) out) -> Tensor(a!)
@@ -1640,8 +1647,7 @@
1640
1647
  structured: True
1641
1648
  structured_inherits: TensorIteratorBase
1642
1649
  dispatch:
1643
- CPU, CUDA: clamp_min_Tensor_out
1644
- MPS: clamp_min_Tensor_out_mps
1650
+ CPU, CUDA, MPS: clamp_min_Tensor_out
1645
1651
  tags: pointwise
1646
1652
 
1647
1653
  # clip is an alias for clamp
@@ -1798,7 +1804,7 @@
1798
1804
  device_guard: False
1799
1805
  dispatch:
1800
1806
  MkldnnCPU: copy_mkldnn_
1801
- SparseCPU, SparseCUDA: copy_sparse_wrapper_
1807
+ SparseCPU, SparseCUDA, SparseMPS: copy_sparse_wrapper_
1802
1808
  CompositeExplicitAutograd: copy_
1803
1809
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: copy_sparse_compressed_
1804
1810
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: copy_nested_
@@ -1867,12 +1873,14 @@
1867
1873
  CUDA: count_nonzero_cuda
1868
1874
  MPS: count_nonzero_mps
1869
1875
  autogen: count_nonzero.dim_IntList_out
1876
+ tags: reduction
1870
1877
 
1871
1878
  - func: count_nonzero(Tensor self, int? dim=None) -> Tensor
1872
1879
  variants: function, method
1873
1880
  dispatch:
1874
1881
  CompositeExplicitAutograd: count_nonzero
1875
1882
  autogen: count_nonzero.out
1883
+ tags: reduction
1876
1884
 
1877
1885
  - func: cov(Tensor self, *, int correction=1, Tensor? fweights=None, Tensor? aweights=None) -> Tensor
1878
1886
  variants: function, method
@@ -2160,7 +2168,7 @@
2160
2168
  variants: function, method
2161
2169
  structured_delegate: div.out
2162
2170
  dispatch:
2163
- SparseCPU, SparseCUDA: div_sparse
2171
+ SparseCPU, SparseCUDA, SparseMPS: div_sparse
2164
2172
  ZeroTensor: div_zerotensor
2165
2173
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_div_Tensor
2166
2174
  tags: [core, pointwise]
@@ -2170,7 +2178,7 @@
2170
2178
  variants: method
2171
2179
  structured_delegate: div.out
2172
2180
  dispatch:
2173
- SparseCPU, SparseCUDA: div_sparse_
2181
+ SparseCPU, SparseCUDA, SparseMPS: div_sparse_
2174
2182
  tags: pointwise
2175
2183
 
2176
2184
  - func: div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -2179,7 +2187,7 @@
2179
2187
  structured_inherits: TensorIteratorBase
2180
2188
  dispatch:
2181
2189
  CPU, CUDA, MPS, MTIA: div_out
2182
- SparseCPU, SparseCUDA: div_out_sparse_zerodim
2190
+ SparseCPU, SparseCUDA, SparseMPS: div_out_sparse_zerodim
2183
2191
  tags: pointwise
2184
2192
 
2185
2193
  - func: div.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor
@@ -2187,7 +2195,7 @@
2187
2195
  variants: function, method
2188
2196
  structured_delegate: div.out_mode
2189
2197
  dispatch:
2190
- SparseCPU, SparseCUDA: div_sparse
2198
+ SparseCPU, SparseCUDA, SparseMPS: div_sparse
2191
2199
  tags: [core, pointwise]
2192
2200
 
2193
2201
  - func: div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!)
@@ -2195,7 +2203,7 @@
2195
2203
  variants: method
2196
2204
  structured_delegate: div.out_mode
2197
2205
  dispatch:
2198
- SparseCPU, SparseCUDA: div_sparse_
2206
+ SparseCPU, SparseCUDA, SparseMPS: div_sparse_
2199
2207
  tags: pointwise
2200
2208
 
2201
2209
  - func: div.out_mode(Tensor self, Tensor other, *, str? rounding_mode, Tensor(a!) out) -> Tensor(a!)
@@ -2204,7 +2212,7 @@
2204
2212
  structured_inherits: TensorIteratorBase
2205
2213
  dispatch:
2206
2214
  CPU, CUDA, MPS: div_out_mode
2207
- SparseCPU, SparseCUDA: div_out_sparse_zerodim
2215
+ SparseCPU, SparseCUDA, SparseMPS: div_out_sparse_zerodim
2208
2216
  tags: pointwise
2209
2217
 
2210
2218
  # For C++ only, until we have conversion from C++ numbers to Tensor
@@ -2351,6 +2359,7 @@
2351
2359
  dispatch:
2352
2360
  CPU: _embedding_bag_forward_only_cpu
2353
2361
  CUDA: _embedding_bag_forward_only_cuda
2362
+ MPS: _embedding_bag_forward_only_mps
2354
2363
  autogen: _embedding_bag_forward_only.out
2355
2364
 
2356
2365
  - func: _rowwise_prune(Tensor weight, Tensor mask, ScalarType compressed_indices_dtype) -> (Tensor, Tensor)
@@ -2372,12 +2381,13 @@
2372
2381
  dispatch:
2373
2382
  CPU: _embedding_bag_cpu
2374
2383
  CUDA: _embedding_bag_cuda
2384
+ MPS: _embedding_bag_mps
2375
2385
  autogen: _embedding_bag.out
2376
2386
  tags: core
2377
2387
 
2378
2388
  - func: _embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
2379
2389
  dispatch:
2380
- CPU, CUDA: _embedding_bag_backward_symint
2390
+ CPU, CUDA, MPS: _embedding_bag_backward_symint
2381
2391
 
2382
2392
  - func: _embedding_bag_sparse_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
2383
2393
  dispatch:
@@ -2387,12 +2397,14 @@
2387
2397
  dispatch:
2388
2398
  CPU: _embedding_bag_dense_backward_cpu
2389
2399
  CUDA: _embedding_bag_dense_backward_cuda
2400
+ MPS: _embedding_bag_dense_backward_mps
2390
2401
  autogen: _embedding_bag_dense_backward.out
2391
2402
 
2392
2403
  - func: _embedding_bag_per_sample_weights_backward(Tensor grad, Tensor weight, Tensor indices, Tensor offsets, Tensor offset2bag, int mode, int padding_idx=-1) -> Tensor
2393
2404
  dispatch:
2394
2405
  CPU: _embedding_bag_per_sample_weights_backward_cpu
2395
2406
  CUDA: _embedding_bag_per_sample_weights_backward_cuda
2407
+ MPS: _embedding_bag_per_sample_weights_backward_mps
2396
2408
  autogen: _embedding_bag_per_sample_weights_backward.out
2397
2409
 
2398
2410
  - func: empty.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
@@ -2517,7 +2529,7 @@
2517
2529
  dispatch:
2518
2530
  CompositeExplicitAutograd: empty_like
2519
2531
  QuantizedCPU, QuantizedCUDA: empty_like_quantized
2520
- SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
2532
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: empty_like_sparse_coo
2521
2533
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
2522
2534
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: empty_like_nested
2523
2535
  autogen: empty_like.out
@@ -2768,20 +2780,20 @@
2768
2780
  variants: function, method
2769
2781
  dispatch:
2770
2782
  CPU, CUDA, MPS, MTIA: floor_divide
2771
- SparseCPU, SparseCUDA: floor_divide_sparse
2783
+ SparseCPU, SparseCUDA, SparseMPS: floor_divide_sparse
2772
2784
 
2773
2785
  - func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
2774
2786
  device_check: NoCheck # TensorIterator
2775
2787
  variants: method
2776
2788
  dispatch:
2777
2789
  CPU, CUDA, MPS: floor_divide_
2778
- SparseCPU, SparseCUDA: floor_divide_sparse_
2790
+ SparseCPU, SparseCUDA, SparseMPS: floor_divide_sparse_
2779
2791
 
2780
2792
  - func: floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
2781
2793
  device_check: NoCheck # TensorIterator
2782
2794
  dispatch:
2783
- CPU, CUDA, MPS: floor_divide_out
2784
- SparseCPU, SparseCUDA: floor_divide_out_sparse_zerodim
2795
+ CPU, CUDA, MPS, MTIA: floor_divide_out
2796
+ SparseCPU, SparseCUDA, SparseMPS: floor_divide_out_sparse_zerodim
2785
2797
 
2786
2798
  - func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
2787
2799
  device_check: NoCheck # TensorIterator
@@ -3604,8 +3616,7 @@
3604
3616
  structured: True
3605
3617
  structured_inherits: TensorIteratorBase
3606
3618
  dispatch:
3607
- CPU, CUDA: logaddexp_out
3608
- MPS: logaddexp_out_mps
3619
+ CPU, CUDA, MPS: logaddexp_out
3609
3620
  tags: pointwise
3610
3621
 
3611
3622
  - func: logaddexp(Tensor self, Tensor other) -> Tensor
@@ -3617,8 +3628,7 @@
3617
3628
  structured: True
3618
3629
  structured_inherits: TensorIteratorBase
3619
3630
  dispatch:
3620
- CPU, CUDA: logaddexp2_out
3621
- MPS: logaddexp2_out_mps
3631
+ CPU, CUDA, MPS: logaddexp2_out
3622
3632
  tags: pointwise
3623
3633
 
3624
3634
  - func: logaddexp2(Tensor self, Tensor other) -> Tensor
@@ -3789,19 +3799,23 @@
3789
3799
  variants: function, method
3790
3800
  dispatch:
3791
3801
  CompositeExplicitAutograd: logsumexp
3802
+ tags: reduction
3792
3803
 
3793
3804
  - func: logsumexp.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
3794
3805
  device_check: NoCheck # TensorIterator
3795
3806
  dispatch:
3796
3807
  # calls squeeze
3797
3808
  CompositeExplicitAutogradNonFunctional: logsumexp_out
3809
+ tags: reduction
3798
3810
 
3799
3811
  - func: logsumexp.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor
3800
3812
  device_check: NoCheck # TensorIterator
3801
3813
  variants: function, method
3814
+ tags: reduction
3802
3815
 
3803
3816
  - func: logsumexp.names_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
3804
3817
  device_check: NoCheck # TensorIterator
3818
+ tags: reduction
3805
3819
 
3806
3820
  - func: margin_ranking_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor
3807
3821
 
@@ -3851,13 +3865,15 @@
3851
3865
  device_check: NoCheck # TensorIterator
3852
3866
  structured_delegate: aminmax.out
3853
3867
  variants: function, method
3868
+ tags: reduction
3854
3869
 
3855
3870
  - func: aminmax.out(Tensor self, *, int? dim=None, bool keepdim=False, Tensor(a!) min, Tensor(b!) max) -> (Tensor(a!) min, Tensor(b!) max)
3856
3871
  device_check: NoCheck # TensorIterator
3857
3872
  structured: True
3858
3873
  dispatch:
3859
- CPU, CUDA: aminmax_out
3874
+ CPU, CUDA, MTIA: aminmax_out
3860
3875
  MPS: aminmax_out_mps
3876
+ tags: reduction
3861
3877
 
3862
3878
  - func: _compute_linear_combination(Tensor input, Tensor coefficients) -> Tensor
3863
3879
  dispatch:
@@ -3873,7 +3889,7 @@
3873
3889
  variants: function, method
3874
3890
  dispatch:
3875
3891
  QuantizedCPU, QuantizedCUDA: qmax
3876
- tags: core
3892
+ tags: [core, reduction]
3877
3893
 
3878
3894
  - func: max.dim_max(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)
3879
3895
  device_check: NoCheck # TensorIterator
@@ -3883,13 +3899,16 @@
3883
3899
  dispatch:
3884
3900
  CPU, CUDA, MTIA: max_out
3885
3901
  MPS: max_out_mps
3902
+ tags: reduction
3886
3903
 
3887
3904
  - func: max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
3888
3905
  device_check: NoCheck # TensorIterator
3889
3906
  variants: function, method
3907
+ tags: reduction
3890
3908
 
3891
3909
  - func: max.names_dim_max(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)
3892
3910
  device_check: NoCheck # TensorIterator
3911
+ tags: reduction
3893
3912
 
3894
3913
  - func: value_selecting_reduction_backward(Tensor grad, int dim, Tensor indices, SymInt[] sizes, bool keepdim) -> Tensor
3895
3914
  variants: function
@@ -3902,13 +3921,14 @@
3902
3921
  - func: amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
3903
3922
  variants: function, method
3904
3923
  structured_delegate: amax.out
3905
- tags: core
3924
+ tags: [core, reduction]
3906
3925
 
3907
3926
  - func: amax.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
3908
3927
  structured: True
3909
3928
  dispatch:
3910
- CPU, CUDA: amax_out
3929
+ CPU, CUDA, MTIA: amax_out
3911
3930
  MPS: amax_out_mps
3931
+ tags: reduction
3912
3932
 
3913
3933
  # Return: (Tensor output, Tensor indices)
3914
3934
  - func: max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
@@ -3970,13 +3990,14 @@
3970
3990
  variants: function, method
3971
3991
  dispatch:
3972
3992
  CompositeExplicitAutograd: mean
3973
- tags: core
3993
+ tags: [core, reduction]
3974
3994
 
3975
3995
  # For normal naming convention this should be `mean.out`. However since we already have `mean.out` we have to rename this.
3976
3996
  - func: mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
3977
3997
  device_check: NoCheck # TensorIterator
3978
3998
  dispatch:
3979
3999
  CompositeExplicitAutograd: mean_dtype_out
4000
+ tags: reduction
3980
4001
 
3981
4002
  - func: mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
3982
4003
  structured_delegate: mean.out
@@ -3984,7 +4005,7 @@
3984
4005
  variants: function, method
3985
4006
  dispatch:
3986
4007
  QuantizedCPU: mean_quantized_cpu
3987
- tags: core
4008
+ tags: [core, reduction]
3988
4009
 
3989
4010
  - func: mean.out(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
3990
4011
  structured: True
@@ -3993,13 +4014,16 @@
3993
4014
  CPU, CUDA: mean_out
3994
4015
  MPS: mean_out_mps
3995
4016
  QuantizedCPU: mean_out_quantized_cpu
4017
+ tags: reduction
3996
4018
 
3997
4019
  - func: mean.names_dim(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
3998
4020
  device_check: NoCheck # TensorIterator
3999
4021
  variants: function, method
4022
+ tags: reduction
4000
4023
 
4001
4024
  - func: mean.names_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
4002
4025
  device_check: NoCheck # TensorIterator
4026
+ tags: reduction
4003
4027
 
4004
4028
  - func: nanmean(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
4005
4029
  device_check: NoCheck # Composite
@@ -4062,7 +4086,7 @@
4062
4086
  variants: function, method
4063
4087
  dispatch:
4064
4088
  QuantizedCPU, QuantizedCUDA: qmin
4065
- tags: core
4089
+ tags: [core, reduction]
4066
4090
 
4067
4091
  - func: min.dim_min(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)
4068
4092
  device_check: NoCheck # TensorIterator
@@ -4072,24 +4096,28 @@
4072
4096
  dispatch:
4073
4097
  CPU, CUDA, MTIA: min_out
4074
4098
  MPS: min_out_mps
4099
+ tags: reduction
4075
4100
 
4076
4101
  - func: min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
4077
4102
  device_check: NoCheck # TensorIterator
4078
4103
  variants: function, method
4104
+ tags: reduction
4079
4105
 
4080
4106
  - func: min.names_dim_min(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)
4081
4107
  device_check: NoCheck # TensorIterator
4108
+ tags: reduction
4082
4109
 
4083
4110
  - func: amin(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
4084
4111
  variants: function, method
4085
4112
  structured_delegate: amin.out
4086
- tags: core
4113
+ tags: [core, reduction]
4087
4114
 
4088
4115
  - func: amin.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
4089
4116
  structured: True
4090
4117
  dispatch:
4091
- CPU, CUDA: amin_out
4118
+ CPU, CUDA, MTIA: amin_out
4092
4119
  MPS: amin_out_mps
4120
+ tags: reduction
4093
4121
 
4094
4122
  # TODO: Add this function to MPS dispatch key so that we avoid declaring it in
4095
4123
  # native_functions.yaml
@@ -4169,7 +4197,7 @@
4169
4197
  structured_delegate: mm.out
4170
4198
  variants: function, method
4171
4199
  dispatch:
4172
- SparseCPU, SparseCUDA: _sparse_mm
4200
+ SparseCPU, SparseCUDA, SparseMPS: _sparse_mm
4173
4201
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm
4174
4202
  tags: core
4175
4203
 
@@ -4181,7 +4209,7 @@
4181
4209
  MTIA: mm_out_mtia
4182
4210
  MPS: mm_out_mps
4183
4211
  XPU: mm_out_xpu
4184
- SparseCPU, SparseCUDA: _sparse_mm_out
4212
+ SparseCPU, SparseCUDA, SparseMPS: _sparse_mm_out
4185
4213
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm_out
4186
4214
 
4187
4215
  - func: mm.dtype(Tensor self, Tensor mat2, ScalarType out_dtype) -> Tensor
@@ -4241,6 +4269,7 @@
4241
4269
  CPU: _weight_int8pack_mm_cpu
4242
4270
  CUDA: _weight_int8pack_mm_cuda
4243
4271
  MPS: _weight_int8pack_mm_mps
4272
+ XPU: _weight_int8pack_mm_xpu
4244
4273
 
4245
4274
  - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
4246
4275
  python_module: sparse
@@ -4252,6 +4281,7 @@
4252
4281
  dispatch:
4253
4282
  SparseCPU: sparse_sparse_matmul_cpu
4254
4283
  SparseCUDA: sparse_sparse_matmul_cuda
4284
+ SparseMPS: sparse_sparse_matmul_mps
4255
4285
  autogen: _sparse_sparse_matmul.out
4256
4286
 
4257
4287
  - func: mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
@@ -4273,7 +4303,7 @@
4273
4303
  structured_delegate: mul.out
4274
4304
  variants: function, method
4275
4305
  dispatch:
4276
- SparseCPU, SparseCUDA: mul_sparse
4306
+ SparseCPU, SparseCUDA, SparseMPS: mul_sparse
4277
4307
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr
4278
4308
  MkldnnCPU: mkldnn_mul
4279
4309
  ZeroTensor: mul_zerotensor
@@ -4285,7 +4315,7 @@
4285
4315
  structured_delegate: mul.out
4286
4316
  variants: method
4287
4317
  dispatch:
4288
- SparseCPU, SparseCUDA: mul_sparse_
4318
+ SparseCPU, SparseCUDA, SparseMPS: mul_sparse_
4289
4319
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr_
4290
4320
  MkldnnCPU: mkldnn_mul_
4291
4321
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul__Tensor
@@ -4299,6 +4329,7 @@
4299
4329
  CPU, CUDA, MPS, MTIA: mul_out
4300
4330
  SparseCPU: mul_out_sparse_cpu
4301
4331
  SparseCUDA: mul_out_sparse_cuda
4332
+ SparseMPS: mul_out_sparse_mps
4302
4333
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr
4303
4334
  MkldnnCPU: mkldnn_mul_out
4304
4335
  tags: pointwise
@@ -4342,7 +4373,7 @@
4342
4373
  variants: function, method
4343
4374
  dispatch:
4344
4375
  CompositeExplicitAutograd: mv
4345
- SparseCPU, SparseCUDA: mv_sparse
4376
+ SparseCPU, SparseCUDA, SparseMPS: mv_sparse
4346
4377
 
4347
4378
  - func: mv.out(Tensor self, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)
4348
4379
  dispatch:
@@ -4371,7 +4402,7 @@
4371
4402
  variants: function, method
4372
4403
  dispatch:
4373
4404
  CPU: narrow_copy_dense_cpu
4374
- SparseCPU, SparseCUDA: narrow_copy_sparse
4405
+ SparseCPU, SparseCUDA, SparseMPS: narrow_copy_sparse
4375
4406
  CompositeExplicitAutogradNonFunctional: narrow_copy_dense_symint
4376
4407
  tags: view_copy
4377
4408
 
@@ -4539,6 +4570,7 @@
4539
4570
  - func: _cdist_forward(Tensor x1, Tensor x2, float p, int? compute_mode) -> Tensor
4540
4571
  dispatch:
4541
4572
  CPU, CUDA: _cdist_forward
4573
+ MTIA: _cdist_forward_mtia
4542
4574
  MPS: _cdist_forward_mps
4543
4575
  autogen: _cdist_forward.out
4544
4576
  tags: core
@@ -4569,7 +4601,7 @@
4569
4601
  dispatch:
4570
4602
  CompositeExplicitAutograd: permute
4571
4603
  MPS: permute_mps
4572
- SparseCPU, SparseCUDA: permute_sparse_coo
4604
+ SparseCPU, SparseCUDA, SparseMPS: permute_sparse_coo
4573
4605
  tags: core
4574
4606
 
4575
4607
  - func: movedim.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a)
@@ -4758,6 +4790,12 @@
4758
4790
  CompositeExplicitAutograd: rand_like
4759
4791
  autogen: rand_like.out
4760
4792
 
4793
+ - func: rand_like.generator(Tensor self, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
4794
+ tags: nondeterministic_seeded
4795
+ dispatch:
4796
+ CompositeExplicitAutograd: rand_like
4797
+ autogen: rand_like.generator_out
4798
+
4761
4799
  - func: randint(SymInt high, SymInt[] size, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
4762
4800
  tags: nondeterministic_seeded
4763
4801
  dispatch:
@@ -4806,6 +4844,14 @@
4806
4844
  CompositeExplicitAutograd: randint_like
4807
4845
  autogen: randint_like.out
4808
4846
 
4847
+ - func: randint_like.generator(Tensor self, SymInt high, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
4848
+ tags: nondeterministic_seeded
4849
+ dispatch:
4850
+ # NB: Although this composite mutates on the inside, it is
4851
+ # non-differentiable so NonFunctional doesn't apply
4852
+ CompositeExplicitAutograd: randint_like
4853
+ autogen: randint_like.generator_out
4854
+
4809
4855
  - func: randint_like.Tensor(Tensor self, Tensor high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
4810
4856
  tags: nondeterministic_seeded
4811
4857
  dispatch:
@@ -4814,6 +4860,14 @@
4814
4860
  CompositeExplicitAutograd: randint_like
4815
4861
  autogen: randint_like.Tensor_out
4816
4862
 
4863
+ - func: randint_like.Tensor_generator(Tensor self, Tensor high, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
4864
+ tags: nondeterministic_seeded
4865
+ dispatch:
4866
+ # NB: Although this composite mutates on the inside, it is
4867
+ # non-differentiable so NonFunctional doesn't apply
4868
+ CompositeExplicitAutograd: randint_like
4869
+ autogen: randint_like.Tensor_generator_out
4870
+
4817
4871
  - func: randint_like.low_dtype(Tensor self, SymInt low, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
4818
4872
  tags: nondeterministic_seeded
4819
4873
  dispatch:
@@ -4822,6 +4876,14 @@
4822
4876
  CompositeExplicitAutograd: randint_like
4823
4877
  autogen: randint_like.low_dtype_out
4824
4878
 
4879
+ - func: randint_like.low_generator_dtype(Tensor self, SymInt low, SymInt high, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
4880
+ tags: nondeterministic_seeded
4881
+ dispatch:
4882
+ # NB: Although this composite mutates on the inside, it is
4883
+ # non-differentiable so NonFunctional doesn't apply
4884
+ CompositeExplicitAutograd: randint_like
4885
+ autogen: randint_like.low_generator_dtype_out
4886
+
4825
4887
  - func: randn(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
4826
4888
  tags: [core, nondeterministic_seeded]
4827
4889
  dispatch:
@@ -4862,6 +4924,14 @@
4862
4924
  CompositeExplicitAutograd, CompositeImplicitAutogradNestedTensor: randn_like
4863
4925
  autogen: randn_like.out
4864
4926
 
4927
+ - func: randn_like.generator(Tensor self, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
4928
+ tags: nondeterministic_seeded
4929
+ dispatch:
4930
+ # NB: Although this composite mutates on the inside, it is
4931
+ # non-differentiable so NonFunctional doesn't apply
4932
+ CompositeExplicitAutograd, CompositeImplicitAutogradNestedTensor: randn_like
4933
+ autogen: randn_like.generator_out
4934
+
4865
4935
  - func: randperm(SymInt n, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
4866
4936
  tags: [core, nondeterministic_seeded]
4867
4937
  dispatch:
@@ -5848,9 +5918,10 @@
5848
5918
  variants: function, method
5849
5919
  dispatch:
5850
5920
  CompositeExplicitAutograd: sum
5851
- SparseCPU, SparseCUDA, SparseMeta: sum_coo
5921
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: sum_coo
5852
5922
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_csr
5853
5923
  autogen: sum.out
5924
+ tags: reduction
5854
5925
 
5855
5926
  - func: sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
5856
5927
  # TODO: Align the signature of sum.dim_IntList and _sparse_csr_sum.dim_dtype
@@ -5859,13 +5930,14 @@
5859
5930
  variants: function, method
5860
5931
  dispatch:
5861
5932
  NestedTensorCPU: NestedTensor_sum_dim_CPU
5862
- SparseCPU, SparseCUDA: sum_sparse_coo
5933
+ SparseCPU, SparseCUDA, SparseMPS: sum_sparse_coo
5863
5934
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_sparse_compressed
5864
- tags: core
5935
+ tags: [core, reduction]
5865
5936
 
5866
5937
  - func: sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
5867
5938
  device_check: NoCheck # TensorIterator
5868
5939
  variants: function, method
5940
+ tags: reduction
5869
5941
 
5870
5942
  - func: sum.IntList_out(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
5871
5943
  structured: True
@@ -5873,9 +5945,11 @@
5873
5945
  dispatch:
5874
5946
  CPU, CUDA: sum_out
5875
5947
  MPS: sum_out_mps
5948
+ tags: reduction
5876
5949
 
5877
5950
  - func: sum.DimnameList_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
5878
5951
  device_check: NoCheck # TensorIterator
5952
+ tags: reduction
5879
5953
 
5880
5954
  # TODO: this function will be replaced once nested expand semantics have been settled on
5881
5955
  - func: _nested_sum_backward(Tensor grad, Tensor self, int[1]? dim, bool keepdim=False) -> Tensor
@@ -5887,11 +5961,13 @@
5887
5961
  dispatch:
5888
5962
  CPU, CUDA: nansum
5889
5963
  MPS: nansum_mps
5964
+ tags: reduction
5890
5965
 
5891
5966
  - func: nansum.out(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
5892
5967
  dispatch:
5893
5968
  CPU, CUDA: nansum_out
5894
5969
  MPS: nansum_out_mps
5970
+ tags: reduction
5895
5971
 
5896
5972
  - func: hash_tensor(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0) -> Tensor
5897
5973
  variants: function, method
@@ -5955,11 +6031,13 @@
5955
6031
  device_check: NoCheck # TensorIterator
5956
6032
  variants: function, method
5957
6033
  cpp_no_default_args: ["unbiased"]
6034
+ tags: reduction
5958
6035
 
5959
6036
  - func: std.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> Tensor
5960
6037
  device_check: NoCheck # TensorIterator
5961
6038
  variants: function, method
5962
6039
  cpp_no_default_args: ["unbiased"]
6040
+ tags: reduction
5963
6041
 
5964
6042
  - func: std.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor
5965
6043
  device_check: NoCheck # TensorIterator
@@ -5968,16 +6046,19 @@
5968
6046
  CPU, CUDA: std
5969
6047
  MPS: std_mps
5970
6048
  QuantizedCPU: std_quantized_cpu
6049
+ tags: reduction
5971
6050
 
5972
6051
  - func: std_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)
5973
6052
  device_check: NoCheck # TensorIterator
5974
6053
  variants: function
5975
6054
  cpp_no_default_args: ["unbiased"]
6055
+ tags: reduction
5976
6056
 
5977
6057
  - func: std_mean.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
5978
6058
  device_check: NoCheck # TensorIterator
5979
6059
  variants: function
5980
6060
  cpp_no_default_args: ["unbiased"]
6061
+ tags: reduction
5981
6062
 
5982
6063
  - func: std_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
5983
6064
  device_check: NoCheck # TensorIterator
@@ -5986,42 +6067,51 @@
5986
6067
  CPU, CUDA: std_mean
5987
6068
  MPS: std_mean_mps
5988
6069
  autogen: std_mean.correction_out
6070
+ tags: reduction
5989
6071
 
5990
6072
  - func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
5991
6073
  device_check: NoCheck # TensorIterator
5992
6074
  variants: function
5993
6075
  cpp_no_default_args: ["unbiased"]
6076
+ tags: reduction
5994
6077
 
5995
6078
  - func: std_mean.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
5996
6079
  device_check: NoCheck # TensorIterator
5997
6080
  variants: function
6081
+ tags: reduction
5998
6082
 
5999
6083
  - func: std.out(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
6000
6084
  device_check: NoCheck # TensorIterator
6001
6085
  cpp_no_default_args: ["unbiased"]
6086
+ tags: reduction
6002
6087
 
6003
6088
  - func: std.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
6004
6089
  device_check: NoCheck # TensorIterator
6005
6090
  dispatch:
6006
6091
  CPU, CUDA: std_out
6007
6092
  QuantizedCPU: std_out_quantized_cpu
6093
+ tags: reduction
6008
6094
 
6009
6095
  - func: std.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor
6010
6096
  device_check: NoCheck # TensorIterator
6011
6097
  variants: function, method
6012
6098
  cpp_no_default_args: ["unbiased"]
6099
+ tags: reduction
6013
6100
 
6014
6101
  - func: std.names_out(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
6015
6102
  device_check: NoCheck # TensorIterator
6016
6103
  cpp_no_default_args: ["unbiased"]
6104
+ tags: reduction
6017
6105
 
6018
6106
  - func: std.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor
6019
6107
  device_check: NoCheck # TensorIterator
6020
6108
  variants: function, method
6109
+ tags: reduction
6021
6110
 
6022
6111
  - func: std.correction_names_out(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
6023
6112
  device_check: NoCheck # TensorIterator
6024
6113
  variants: function
6114
+ tags: reduction
6025
6115
 
6026
6116
  - func: prod(Tensor self, *, ScalarType? dtype=None) -> Tensor
6027
6117
  device_check: NoCheck # TensorIterator
@@ -6030,13 +6120,13 @@
6030
6120
  CPU, CUDA: prod
6031
6121
  MPS: prod_mps
6032
6122
  autogen: prod.out
6033
- tags: core
6123
+ tags: [core, reduction]
6034
6124
 
6035
6125
  - func: prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
6036
6126
  structured_delegate: prod.int_out
6037
6127
  device_check: NoCheck # TensorIterator
6038
6128
  variants: function, method
6039
- tags: core
6129
+ tags: [core, reduction]
6040
6130
 
6041
6131
  - func: prod.int_out(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
6042
6132
  structured: True
@@ -6044,13 +6134,16 @@
6044
6134
  dispatch:
6045
6135
  CPU, CUDA: prod_out
6046
6136
  MPS: prod_out_mps
6137
+ tags: reduction
6047
6138
 
6048
6139
  - func: prod.dim_Dimname(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
6049
6140
  device_check: NoCheck # TensorIterator
6050
6141
  variants: function, method
6142
+ tags: reduction
6051
6143
 
6052
6144
  - func: prod.Dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
6053
6145
  device_check: NoCheck # TensorIterator
6146
+ tags: reduction
6054
6147
 
6055
6148
  - func: t(Tensor(a) self) -> Tensor(a)
6056
6149
  device_check: NoCheck
@@ -6446,6 +6539,7 @@
6446
6539
  dispatch:
6447
6540
  CPU: unique_dim_cpu
6448
6541
  CUDA: unique_dim_cuda
6542
+ MPS: unique_dim_mps
6449
6543
  tags: dynamic_output_shape
6450
6544
  autogen: unique_dim.out
6451
6545
 
@@ -6491,7 +6585,7 @@
6491
6585
  device_guard: False
6492
6586
  dispatch:
6493
6587
  CompositeExplicitAutograd: unsqueeze
6494
- SparseCPU, SparseCUDA: unsqueeze_sparse
6588
+ SparseCPU, SparseCUDA, SparseMPS: unsqueeze_sparse
6495
6589
  QuantizedCPU, QuantizedCUDA: unsqueeze_quantized
6496
6590
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: unsqueeze_nested
6497
6591
  tags: core
@@ -6510,11 +6604,12 @@
6510
6604
  device_check: NoCheck # TensorIterator
6511
6605
  variants: function, method
6512
6606
  cpp_no_default_args: ["unbiased"]
6607
+ tags: reduction
6513
6608
 
6514
6609
  - func: var.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> Tensor
6515
6610
  device_check: NoCheck # TensorIterator
6516
6611
  variants: function, method
6517
- tags: core
6612
+ tags: [core, reduction]
6518
6613
  cpp_no_default_args: ["unbiased"]
6519
6614
 
6520
6615
  - func: var.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor
@@ -6523,43 +6618,52 @@
6523
6618
  dispatch:
6524
6619
  CPU, CUDA: var
6525
6620
  MPS: var_mps
6526
- tags: core
6621
+ MTIA: var_mtia
6622
+ tags: [core, reduction]
6527
6623
 
6528
6624
  - func: var.out(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
6529
6625
  device_check: NoCheck # TensorIterator
6530
6626
  cpp_no_default_args: ["unbiased"]
6627
+ tags: reduction
6531
6628
 
6532
6629
  - func: var.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
6533
6630
  device_check: NoCheck # TensorIterator
6534
6631
  dispatch:
6535
6632
  CPU, CUDA: var_out
6633
+ tags: reduction
6536
6634
 
6537
6635
  - func: var.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor
6538
6636
  device_check: NoCheck # TensorIterator
6539
6637
  variants: function, method
6540
6638
  cpp_no_default_args: ["unbiased"]
6639
+ tags: reduction
6541
6640
 
6542
6641
  - func: var.names_out(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
6543
6642
  device_check: NoCheck # TensorIterator
6544
6643
  cpp_no_default_args: ["unbiased"]
6644
+ tags: reduction
6545
6645
 
6546
6646
  - func: var.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor
6547
6647
  device_check: NoCheck # TensorIterator
6548
6648
  variants: function, method
6649
+ tags: reduction
6549
6650
 
6550
6651
  - func: var.correction_names_out(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
6551
6652
  device_check: NoCheck # TensorIterator
6552
6653
  variants: function
6654
+ tags: reduction
6553
6655
 
6554
6656
  - func: var_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)
6555
6657
  device_check: NoCheck # TensorIterator
6556
6658
  variants: function
6557
6659
  cpp_no_default_args: ["unbiased"]
6660
+ tags: reduction
6558
6661
 
6559
6662
  - func: var_mean.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
6560
6663
  device_check: NoCheck # TensorIterator
6561
6664
  variants: function
6562
6665
  cpp_no_default_args: ["unbiased"]
6666
+ tags: reduction
6563
6667
 
6564
6668
  - func: var_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
6565
6669
  device_check: NoCheck # TensorIterator
@@ -6568,15 +6672,18 @@
6568
6672
  CPU, CUDA: var_mean
6569
6673
  MPS: var_mean_mps
6570
6674
  autogen: var_mean.correction_out
6675
+ tags: reduction
6571
6676
 
6572
6677
  - func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
6573
6678
  device_check: NoCheck # TensorIterator
6574
6679
  variants: function
6575
6680
  cpp_no_default_args: ["unbiased"]
6681
+ tags: reduction
6576
6682
 
6577
6683
  - func: var_mean.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
6578
6684
  device_check: NoCheck # TensorIterator
6579
6685
  variants: function
6686
+ tags: reduction
6580
6687
 
6581
6688
  - func: view_as(Tensor(a) self, Tensor other) -> Tensor(a)
6582
6689
  variants: method
@@ -6659,7 +6766,7 @@
6659
6766
  - func: zeros.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)
6660
6767
  dispatch:
6661
6768
  CompositeExplicitAutograd: zeros_out
6662
- SparseCPU, SparseCUDA, SparseMeta: zeros_sparse_out
6769
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: zeros_sparse_out
6663
6770
 
6664
6771
  - func: zeros_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
6665
6772
  dispatch:
@@ -6718,12 +6825,12 @@
6718
6825
 
6719
6826
  - func: native_norm(Tensor self, Scalar p=2) -> Tensor
6720
6827
  dispatch:
6721
- SparseCPU, SparseCUDA: norm_sparse
6828
+ SparseCPU, SparseCUDA, SparseMPS: norm_sparse
6722
6829
  autogen: native_norm.out
6723
6830
 
6724
6831
  - func: native_norm.ScalarOpt_dim_dtype(Tensor self, Scalar? p, int[1] dim, bool keepdim, ScalarType? dtype) -> Tensor
6725
6832
  dispatch:
6726
- SparseCPU, SparseCUDA: norm_sparse
6833
+ SparseCPU, SparseCUDA, SparseMPS: norm_sparse
6727
6834
  autogen: native_norm.ScalarOpt_dim_dtype_out
6728
6835
 
6729
6836
  - func: _batch_norm_with_update(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor)
@@ -6768,6 +6875,7 @@
6768
6875
  dispatch:
6769
6876
  SparseCPU: _sparse_sum_backward_cpu
6770
6877
  SparseCUDA: _sparse_sum_backward_cuda
6878
+ SparseMPS: _sparse_sum_backward_mps
6771
6879
  autogen: _sparse_sum_backward.out
6772
6880
 
6773
6881
  - func: _sparse_csr_sum.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
@@ -6795,12 +6903,14 @@
6795
6903
  dispatch:
6796
6904
  SparseCPU: softmax_sparse_cpu
6797
6905
  SparseCUDA: softmax_sparse_cuda
6906
+ SparseMPS: softmax_sparse_mps
6798
6907
  autogen: _sparse_softmax.out
6799
6908
 
6800
6909
  - func: _sparse_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
6801
6910
  dispatch:
6802
6911
  SparseCPU: softmax_backward_sparse_cpu
6803
6912
  SparseCUDA: softmax_backward_sparse_cuda
6913
+ SparseMPS: softmax_backward_sparse_mps
6804
6914
  autogen: _sparse_softmax_backward_data.out
6805
6915
 
6806
6916
  - func: _sparse_log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
@@ -6816,12 +6926,14 @@
6816
6926
  dispatch:
6817
6927
  SparseCPU: log_softmax_sparse_cpu
6818
6928
  SparseCUDA: log_softmax_sparse_cuda
6929
+ SparseMPS: log_softmax_sparse_mps
6819
6930
  autogen: _sparse_log_softmax.out
6820
6931
 
6821
6932
  - func: _sparse_log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
6822
6933
  dispatch:
6823
6934
  SparseCPU: log_softmax_backward_sparse_cpu
6824
6935
  SparseCUDA: log_softmax_backward_sparse_cuda
6936
+ SparseMPS: log_softmax_backward_sparse_mps
6825
6937
  autogen: _sparse_log_softmax_backward_data.out
6826
6938
 
6827
6939
  - func: _spdiags(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None) -> Tensor
@@ -6836,6 +6948,7 @@
6836
6948
  dispatch:
6837
6949
  CompositeExplicitAutograd: norm
6838
6950
  autogen: norm.ScalarOpt_dtype_out
6951
+ tags: reduction
6839
6952
 
6840
6953
  - func: norm.Scalar(Tensor self, Scalar p=2) -> Tensor
6841
6954
  device_check: NoCheck # TensorIterator
@@ -6843,20 +6956,23 @@
6843
6956
  dispatch:
6844
6957
  CompositeExplicitAutograd: norm
6845
6958
  autogen: norm.Scalar_out
6959
+ tags: reduction
6846
6960
 
6847
6961
  - func: norm.ScalarOpt_dim_dtype(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor
6848
6962
  structured_delegate: norm.dtype_out
6849
6963
  device_check: NoCheck # TensorIterator
6850
6964
  variants: function, method
6851
6965
  dispatch:
6852
- SparseCPU, SparseCUDA: sparse_dtype_norm
6966
+ SparseCPU, SparseCUDA, SparseMPS: sparse_dtype_norm
6967
+ tags: reduction
6853
6968
 
6854
6969
  - func: norm.ScalarOpt_dim(Tensor self, Scalar? p, int[1] dim, bool keepdim=False) -> Tensor
6855
6970
  structured_delegate: norm.out
6856
6971
  device_check: NoCheck # TensorIterator
6857
6972
  variants: function, method
6858
6973
  dispatch:
6859
- SparseCPU, SparseCUDA: sparse_norm
6974
+ SparseCPU, SparseCUDA, SparseMPS: sparse_norm
6975
+ tags: reduction
6860
6976
 
6861
6977
  - func: norm.dtype_out(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)
6862
6978
  structured: True
@@ -6864,6 +6980,7 @@
6864
6980
  dispatch:
6865
6981
  CPU, CUDA: norm_dtype_out
6866
6982
  MPS: norm_dtype_out_mps
6983
+ tags: reduction
6867
6984
 
6868
6985
  - func: norm.out(Tensor self, Scalar? p, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
6869
6986
  structured: True
@@ -6871,21 +6988,26 @@
6871
6988
  dispatch:
6872
6989
  CPU, CUDA: norm_out
6873
6990
  MPS: norm_out_mps
6991
+ tags: reduction
6874
6992
 
6875
6993
  # These four redispatch in their implementation, so OK to be CompositeImplicitAutograd
6876
6994
  - func: norm.names_ScalarOpt_dim_dtype(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor
6877
6995
  device_check: NoCheck # TensorIterator
6878
6996
  variants: function, method
6997
+ tags: reduction
6879
6998
 
6880
6999
  - func: norm.names_ScalarOpt_dim(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim=False) -> Tensor
6881
7000
  device_check: NoCheck # TensorIterator
6882
7001
  variants: function, method
7002
+ tags: reduction
6883
7003
 
6884
7004
  - func: norm.names_dtype_out(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)
6885
7005
  device_check: NoCheck # TensorIterator
7006
+ tags: reduction
6886
7007
 
6887
7008
  - func: norm.names_out(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
6888
7009
  device_check: NoCheck # TensorIterator
7010
+ tags: reduction
6889
7011
 
6890
7012
  - func: frexp.Tensor(Tensor self) -> (Tensor mantissa, Tensor exponent)
6891
7013
  variants: method, function
@@ -6975,7 +7097,7 @@
6975
7097
  CPU, CUDA: sub_out
6976
7098
  MPS: sub_out_mps
6977
7099
  MTIA: sub_out_mtia
6978
- SparseCPU, SparseCUDA: sub_out_sparse
7100
+ SparseCPU, SparseCUDA, SparseMPS: sub_out_sparse
6979
7101
  tags: pointwise
6980
7102
 
6981
7103
  - func: sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
@@ -6983,7 +7105,7 @@
6983
7105
  variants: function, method
6984
7106
  structured_delegate: sub.out
6985
7107
  dispatch:
6986
- SparseCPU, SparseCUDA: sub_sparse
7108
+ SparseCPU, SparseCUDA, SparseMPS: sub_sparse
6987
7109
  ZeroTensor: sub_zerotensor
6988
7110
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sub_Tensor
6989
7111
  tags: [core, pointwise]
@@ -6993,7 +7115,7 @@
6993
7115
  variants: method
6994
7116
  structured_delegate: sub.out
6995
7117
  dispatch:
6996
- SparseCPU, SparseCUDA: sub_sparse_
7118
+ SparseCPU, SparseCUDA, SparseMPS: sub_sparse_
6997
7119
  tags: pointwise
6998
7120
  # For C++ only, until we have conversion from C++ numbers to Tensor
6999
7121
 
@@ -7103,6 +7225,7 @@
7103
7225
  MTIA: addmm_out_mtia
7104
7226
  SparseCPU: addmm_out_sparse_dense_cpu
7105
7227
  SparseCUDA: addmm_out_sparse_dense_cuda
7228
+ SparseMPS: addmm_out_sparse_dense_mps
7106
7229
  SparseCsrCPU: addmm_out_sparse_compressed_cpu
7107
7230
  SparseCsrCUDA: addmm_out_sparse_compressed_cuda
7108
7231
 
@@ -7112,6 +7235,7 @@
7112
7235
  dispatch:
7113
7236
  SparseCPU: addmm_sparse_dense_cpu
7114
7237
  SparseCUDA: addmm_sparse_dense_cuda
7238
+ SparseMPS: addmm_sparse_dense_mps
7115
7239
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: addmm_sparse_compressed_dense
7116
7240
  tags: core
7117
7241
 
@@ -7148,15 +7272,30 @@
7148
7272
  dispatch:
7149
7273
  CPU: _scaled_mm_cpu
7150
7274
  CUDA: _scaled_mm_cuda
7275
+ XPU: _scaled_mm_xpu
7151
7276
  tags: needs_exact_strides
7152
7277
 
7278
+
7153
7279
  - func: _scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
7154
7280
  variants: function
7155
7281
  dispatch:
7156
7282
  CPU: _scaled_mm_out_cpu
7157
7283
  CUDA: _scaled_mm_out_cuda
7284
+ XPU: _scaled_mm_out_xpu
7158
7285
  tags: needs_exact_strides
7159
7286
 
7287
+ - func: _scaled_mm_v2(Tensor self, Tensor mat2, Tensor[] scale_a, int[] recipe_a, int[] swizzle_a, Tensor[] scale_b, int[] recipe_b, int[] swizzle_b, Tensor? bias, ScalarType? out_dtype, int[] contraction_dim=[], bool use_fast_accum=False) -> Tensor
7288
+ variants: function
7289
+ dispatch:
7290
+ CUDA: _scaled_mm_cuda_v2
7291
+ XPU: _scaled_mm_xpu_v2
7292
+
7293
+ - func: _scaled_mm_v2.out(Tensor self, Tensor mat2, Tensor[] scale_a, int[] recipe_a, int[] swizzle_a, Tensor[] scale_b, int[] recipe_b, int[] swizzle_b, Tensor? bias, ScalarType? out_dtype, int[] contraction_dim=[], bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
7294
+ variants: function
7295
+ dispatch:
7296
+ CUDA: _scaled_mm_cuda_v2_out
7297
+ XPU: _scaled_mm_xpu_v2_out
7298
+
7160
7299
 
7161
7300
  - func: _scaled_grouped_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? offs=None, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
7162
7301
  variants: function
@@ -7164,6 +7303,12 @@
7164
7303
  CUDA: _scaled_grouped_mm_cuda
7165
7304
  tags: needs_exact_strides
7166
7305
 
7306
+ - func: _scaled_grouped_mm_v2(Tensor self, Tensor mat2, Tensor[] scale_a, int[] recipe_a, int[] swizzle_a, Tensor[] scale_b, int[] recipe_b, int[] swizzle_b, Tensor? offs=None, Tensor? bias=None, ScalarType? out_dtype=None, int[] contraction_dim=[], bool use_fast_accum=False) -> Tensor
7307
+ variants: function
7308
+ dispatch:
7309
+ CUDA: _scaled_grouped_mm_cuda_v2
7310
+ tags: needs_exact_strides
7311
+
7167
7312
  - func: _grouped_mm(Tensor self, Tensor mat2, Tensor? offs=None, Tensor? bias=None, ScalarType? out_dtype=None) -> Tensor
7168
7313
  variants: function
7169
7314
  dispatch:
@@ -7359,14 +7504,14 @@
7359
7504
  - func: sparse_mask(Tensor self, Tensor mask) -> Tensor
7360
7505
  variants: method
7361
7506
  dispatch:
7362
- SparseCPU, SparseCUDA: sparse_mask
7507
+ SparseCPU, SparseCUDA, SparseMPS: sparse_mask
7363
7508
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_mask_sparse_compressed
7364
7509
  autogen: sparse_mask.out
7365
7510
 
7366
7511
  - func: _sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor
7367
7512
  variants: method
7368
7513
  dispatch:
7369
- SparseCPU, SparseCUDA: sparse_mask_projection
7514
+ SparseCPU, SparseCUDA, SparseMPS: sparse_mask_projection
7370
7515
  autogen: _sparse_mask_projection.out
7371
7516
 
7372
7517
  - func: _to_cpu(Tensor[] tensors) -> Tensor[]
@@ -8762,11 +8907,11 @@
8762
8907
  autogen: bitwise_right_shift.Scalar_Tensor_out
8763
8908
  tags: pointwise
8764
8909
 
8765
- - func: tril_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
8910
+ - func: tril_(Tensor(a!) self, SymInt diagonal=0) -> Tensor(a!)
8766
8911
  structured_delegate: tril.out
8767
8912
  variants: method
8768
8913
 
8769
- - func: triu_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
8914
+ - func: triu_(Tensor(a!) self, SymInt diagonal=0) -> Tensor(a!)
8770
8915
  structured_delegate: triu.out
8771
8916
  variants: method
8772
8917
 
@@ -8890,25 +9035,25 @@
8890
9035
  - func: cross(Tensor self, Tensor other, int? dim=None) -> Tensor
8891
9036
  variants: method, function
8892
9037
 
8893
- - func: triu.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
9038
+ - func: triu.out(Tensor self, SymInt diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
8894
9039
  structured: True
8895
9040
  dispatch:
8896
9041
  CPU: triu_cpu
8897
9042
  CUDA: triu_cuda
8898
9043
  MPS: triu_mps_out
8899
9044
 
8900
- - func: triu(Tensor self, int diagonal=0) -> Tensor
9045
+ - func: triu(Tensor self, SymInt diagonal=0) -> Tensor
8901
9046
  structured_delegate: triu.out
8902
9047
  variants: method, function
8903
9048
 
8904
- - func: tril.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
9049
+ - func: tril.out(Tensor self, SymInt diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
8905
9050
  structured: True
8906
9051
  dispatch:
8907
9052
  CPU: tril_cpu
8908
9053
  CUDA: tril_cuda
8909
9054
  MPS: tril_mps_out
8910
9055
 
8911
- - func: tril(Tensor self, int diagonal=0) -> Tensor
9056
+ - func: tril(Tensor self, SymInt diagonal=0) -> Tensor
8912
9057
  structured_delegate: tril.out
8913
9058
  variants: method, function
8914
9059
 
@@ -9325,6 +9470,7 @@
9325
9470
  QuantizedCUDA: index_select_quantized_cuda
9326
9471
  SparseCPU: index_select_sparse_cpu
9327
9472
  SparseCUDA: index_select_sparse_cuda
9473
+ SparseMPS: index_select_sparse_mps
9328
9474
  MPS: index_select_mps
9329
9475
  tags: core
9330
9476
 
@@ -9606,8 +9752,7 @@
9606
9752
  variants: function
9607
9753
  structured: True
9608
9754
  dispatch:
9609
- CPU, CUDA: lu_unpack_out
9610
- MPS: lu_unpack_out_mps
9755
+ CPU, CUDA, MPS: lu_unpack_out
9611
9756
 
9612
9757
  # TODO: remove dispatch section when porting TH CUDA to ATen
9613
9758
  - func: multinomial.out(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
@@ -9686,7 +9831,7 @@
9686
9831
  structured_delegate: erfinv.out
9687
9832
  variants: method, function
9688
9833
  dispatch:
9689
- SparseCPU, SparseCUDA: erfinv_sparse
9834
+ SparseCPU, SparseCUDA, SparseMPS: erfinv_sparse
9690
9835
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr
9691
9836
  tags: pointwise
9692
9837
 
@@ -9695,7 +9840,7 @@
9695
9840
  structured_delegate: erfinv.out
9696
9841
  variants: method
9697
9842
  dispatch:
9698
- SparseCPU, SparseCUDA: erfinv_sparse_
9843
+ SparseCPU, SparseCUDA, SparseMPS: erfinv_sparse_
9699
9844
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_
9700
9845
  tags: pointwise
9701
9846
 
@@ -9705,7 +9850,7 @@
9705
9850
  structured_inherits: TensorIteratorBase
9706
9851
  dispatch:
9707
9852
  CPU, CUDA, MPS: erfinv_out
9708
- SparseCPU, SparseCUDA: erfinv_sparse_out
9853
+ SparseCPU, SparseCUDA, SparseMPS: erfinv_sparse_out
9709
9854
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_out
9710
9855
  tags: pointwise
9711
9856
 
@@ -9932,19 +10077,21 @@
9932
10077
  tags: pointwise
9933
10078
 
9934
10079
  - func: hypot.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
10080
+ device_check: NoCheck # TensorIterator
9935
10081
  structured: True
9936
10082
  structured_inherits: TensorIteratorBase
9937
10083
  dispatch:
9938
- CPU, CUDA: hypot_out
9939
- MPS: hypot_out_mps
10084
+ CPU, CUDA, MPS: hypot_out
9940
10085
  tags: pointwise
9941
10086
 
9942
10087
  - func: hypot(Tensor self, Tensor other) -> Tensor
10088
+ device_check: NoCheck # TensorIterator
9943
10089
  structured_delegate: hypot.out
9944
10090
  variants: method, function
9945
10091
  tags: pointwise
9946
10092
 
9947
10093
  - func: hypot_(Tensor(a!) self, Tensor other) -> Tensor(a!)
10094
+ device_check: NoCheck # TensorIterator
9948
10095
  structured_delegate: hypot.out
9949
10096
  variants: method
9950
10097
  tags: pointwise
@@ -10052,12 +10199,14 @@
10052
10199
  CPU, CUDA: min
10053
10200
  MPS: min_mps
10054
10201
  QuantizedCPU: min_quantized_cpu
10202
+ tags: [reduction]
10055
10203
 
10056
10204
  - func: min.unary_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
10057
10205
  device_check: NoCheck # TensorIterator
10058
10206
  dispatch:
10059
10207
  CPU, CUDA: min_unary_out
10060
10208
  QuantizedCPU: min_quantized_unary_out
10209
+ tags: [reduction]
10061
10210
 
10062
10211
  - func: fmin(Tensor self, Tensor other) -> Tensor
10063
10212
  structured_delegate: fmin.out
@@ -10080,6 +10229,7 @@
10080
10229
  CPU, CUDA: max
10081
10230
  MPS: max_mps
10082
10231
  QuantizedCPU: max_quantized_cpu
10232
+ tags: [reduction]
10083
10233
 
10084
10234
  - func: fmax(Tensor self, Tensor other) -> Tensor
10085
10235
  structured_delegate: fmax.out
@@ -10126,6 +10276,7 @@
10126
10276
  dispatch:
10127
10277
  CPU, CUDA: max_unary_out
10128
10278
  QuantizedCPU: max_quantized_unary_out
10279
+ tags: [reduction]
10129
10280
 
10130
10281
  - func: minimum(Tensor self, Tensor other) -> Tensor
10131
10282
  structured_delegate: minimum.out
@@ -10245,21 +10396,24 @@
10245
10396
  device_check: NoCheck # TensorIterator
10246
10397
  structured_delegate: all.all_out
10247
10398
  variants: method, function
10399
+ tags: reduction
10248
10400
 
10249
10401
  - func: all.all_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
10250
10402
  device_check: NoCheck
10251
10403
  structured: True
10252
10404
  dispatch:
10253
10405
  CPU, CUDA: all_all_out
10406
+ MTIA: all_all_out_mtia
10254
10407
  MPS: all_all_out_mps
10408
+ tags: reduction
10255
10409
 
10256
10410
  - func: any(Tensor self) -> Tensor
10257
10411
  device_check: NoCheck # TensorIterator
10258
10412
  structured_delegate: any.all_out
10259
10413
  variants: method, function
10260
10414
  dispatch:
10261
- SparseCPU, SparseCUDA: any_sparse
10262
- tags: core
10415
+ SparseCPU, SparseCUDA, SparseMPS: any_sparse
10416
+ tags: [core, reduction]
10263
10417
 
10264
10418
  - func: any.all_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
10265
10419
  device_check: NoCheck
@@ -10267,6 +10421,7 @@
10267
10421
  dispatch:
10268
10422
  CPU, CUDA: any_all_out
10269
10423
  MPS: any_all_out_mps
10424
+ tags: reduction
10270
10425
 
10271
10426
  - func: renorm.out(Tensor self, Scalar p, int dim, Scalar maxnorm, *, Tensor(a!) out) -> Tensor(a!)
10272
10427
  device_check: NoCheck # TensorIterator
@@ -10342,7 +10497,7 @@
10342
10497
  structured_inherits: TensorIteratorBase
10343
10498
  dispatch:
10344
10499
  CPU, CUDA: pow_Tensor_Scalar_out
10345
- SparseCPU, SparseCUDA: pow_out_sparse_scalar
10500
+ SparseCPU, SparseCUDA, SparseMPS: pow_out_sparse_scalar
10346
10501
  MPS: pow_tensor_scalar_out_mps
10347
10502
  tags: pointwise
10348
10503
 
@@ -10351,7 +10506,7 @@
10351
10506
  structured_delegate: pow.Tensor_Scalar_out
10352
10507
  variants: function, method
10353
10508
  dispatch:
10354
- SparseCPU, SparseCUDA: pow_sparse_scalar
10509
+ SparseCPU, SparseCUDA, SparseMPS: pow_sparse_scalar
10355
10510
  tags: [core, pointwise]
10356
10511
 
10357
10512
  - func: pow_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!)
@@ -10698,6 +10853,7 @@
10698
10853
  dispatch:
10699
10854
  CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow
10700
10855
  CUDA: foreach_tensor_div_list_kernel_cuda
10856
+ MTIA: foreach_tensor_div_list_kernel_mtia
10701
10857
 
10702
10858
  - func: _foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> ()
10703
10859
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10705,6 +10861,7 @@
10705
10861
  dispatch:
10706
10862
  CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow_
10707
10863
  CUDA: foreach_tensor_div_list_kernel_cuda_
10864
+ MTIA: foreach_tensor_div_list_kernel_mtia_
10708
10865
  autogen: _foreach_div.List_out
10709
10866
 
10710
10867
  - func: _foreach_div.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
@@ -10728,6 +10885,7 @@
10728
10885
  dispatch:
10729
10886
  CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow
10730
10887
  CUDA: foreach_tensor_div_tensor_kernel_cuda
10888
+ MTIA: foreach_tensor_div_tensor_kernel_mtia
10731
10889
 
10732
10890
  - func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
10733
10891
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10735,6 +10893,7 @@
10735
10893
  dispatch:
10736
10894
  CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow_
10737
10895
  CUDA: foreach_tensor_div_tensor_kernel_cuda_
10896
+ MTIA: foreach_tensor_div_tensor_kernel_mtia_
10738
10897
  autogen: _foreach_div.Tensor_out
10739
10898
 
10740
10899
  - func: _foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
@@ -10841,6 +11000,7 @@
10841
11000
  dispatch:
10842
11001
  CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow_
10843
11002
  CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
11003
+ MTIA: foreach_tensor_maximum_scalar_kernel_mtia_
10844
11004
  autogen: _foreach_maximum.Scalar_out
10845
11005
 
10846
11006
  # foreach_minimum/maximum dispatches to clamp_max/min
@@ -11900,8 +12060,7 @@
11900
12060
  device_check: NoCheck # TensorIterator
11901
12061
  python_module: nn
11902
12062
  dispatch:
11903
- CPU, CUDA: elu_out
11904
- MPS: elu_out_mps
12063
+ CPU, CUDA, MPS: elu_out
11905
12064
 
11906
12065
  - func: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor
11907
12066
  structured_delegate: elu.out
@@ -11914,8 +12073,7 @@
11914
12073
  structured_inherits: TensorIteratorBase
11915
12074
  python_module: nn
11916
12075
  dispatch:
11917
- CPU, CUDA: elu_backward_out
11918
- MPS: elu_backward_out_mps
12076
+ CPU, CUDA, MPS: elu_backward_out
11919
12077
 
11920
12078
  - func: elu_backward(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result) -> Tensor
11921
12079
  structured_delegate: elu_backward.grad_input
@@ -14037,16 +14195,10 @@
14037
14195
  - func: linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots)
14038
14196
  python_module: linalg
14039
14197
  variants: function
14040
- dispatch:
14041
- CompositeImplicitAutograd: linalg_lu_factor
14042
- MPS: linalg_lu_factor_mps
14043
14198
 
14044
14199
  - func: linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots)
14045
14200
  python_module: linalg
14046
14201
  variants: function
14047
- dispatch:
14048
- CompositeImplicitAutograd: linalg_lu_factor_out
14049
- MPS: linalg_lu_factor_out_mps
14050
14202
 
14051
14203
  - func: linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)
14052
14204
  python_module: linalg
@@ -14072,7 +14224,7 @@
14072
14224
  variants: function
14073
14225
  structured: True
14074
14226
  dispatch:
14075
- CPU, CUDA: linalg_lu_out
14227
+ CPU, CUDA, MPS: linalg_lu_out
14076
14228
 
14077
14229
  # linalg.lu_solve
14078
14230
  - func: linalg_lu_solve(Tensor LU, Tensor pivots, Tensor B, *, bool left=True, bool adjoint=False) -> Tensor
@@ -14248,12 +14400,12 @@
14248
14400
  python_module: linalg
14249
14401
  variants: function
14250
14402
  dispatch:
14251
- CPU, CUDA: linalg_householder_product
14403
+ CPU, CUDA, MPS: linalg_householder_product
14252
14404
 
14253
14405
  - func: linalg_householder_product.out(Tensor input, Tensor tau, *, Tensor(a!) out) -> Tensor(a!)
14254
14406
  python_module: linalg
14255
14407
  dispatch:
14256
- CPU, CUDA: linalg_householder_product_out
14408
+ CPU, CUDA, MPS: linalg_householder_product_out
14257
14409
 
14258
14410
  - func: linalg_inv_ex(Tensor A, *, bool check_errors=False) -> (Tensor inverse, Tensor info)
14259
14411
  python_module: linalg
@@ -14313,6 +14465,7 @@
14313
14465
  python_module: linalg
14314
14466
  variants: function
14315
14467
  structured_delegate: linalg_vector_norm.out
14468
+ tags: reduction
14316
14469
 
14317
14470
  - func: linalg_vector_norm.out(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
14318
14471
  python_module: linalg
@@ -14320,6 +14473,7 @@
14320
14473
  dispatch:
14321
14474
  CPU, CUDA: linalg_vector_norm_out
14322
14475
  MPS: linalg_vector_norm_out_mps
14476
+ tags: reduction
14323
14477
 
14324
14478
  - func: linalg_matrix_norm(Tensor self, Scalar ord, int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
14325
14479
  python_module: linalg
@@ -14976,6 +15130,7 @@
14976
15130
  - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
14977
15131
  dispatch:
14978
15132
  CUDA: _scaled_dot_product_flash_attention_cuda
15133
+ XPU: _scaled_dot_product_flash_attention_xpu
14979
15134
  NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
14980
15135
  tags: nondeterministic_seeded
14981
15136
 
@@ -14995,6 +15150,7 @@
14995
15150
  variants: function
14996
15151
  dispatch:
14997
15152
  CUDA: _scaled_dot_product_flash_attention_backward_cuda
15153
+ XPU: _scaled_dot_product_flash_attention_backward_xpu
14998
15154
  NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
14999
15155
 
15000
15156
  - func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)