torch-rb 0.14.1 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -134,7 +134,7 @@
134
134
  autogen: _new_zeros_with_same_feature_meta.out
135
135
 
136
136
  # This function compares the storage numel of self with that of other, where
137
- # storage numel is cumputed as: `other.storage().nbytes() / other.itemsize()`.
137
+ # storage numel is computed as: `other.storage().nbytes() / other.itemsize()`.
138
138
  # We create this function for composite compliance purposes. The batching rule
139
139
  # always returns true because vmapped as_strided does not support accessing
140
140
  # storage locations not indexable by the input tensor.
@@ -175,17 +175,29 @@
175
175
  CPU: _assert_async_msg_cpu
176
176
  CUDA: _assert_async_msg_cuda
177
177
 
178
+ - func: _assert_scalar(Scalar self, str assert_msg) -> ()
179
+ dispatch:
180
+ CompositeExplicitAutograd: _assert_scalar
181
+
182
+ - func: _functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor
183
+ dispatch:
184
+ CompositeExplicitAutograd: _functional_assert_scalar
185
+
178
186
  - func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
179
187
  dispatch:
180
188
  CPU: _functional_assert_async_msg_cpu
181
189
 
182
190
  - func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
183
191
 
192
+ - func: _print(str s) -> ()
193
+ dispatch:
194
+ CompositeExplicitAutograd: _print
195
+
184
196
  - func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
185
197
  dispatch:
186
198
  CompositeExplicitAutograd: sym_constrain_range
187
199
 
188
- - func: sym_constrain_range_for_size(Scalar size, *, int? min, int? max) -> ()
200
+ - func: sym_constrain_range_for_size(Scalar size, *, int? min=None, int? max=None) -> ()
189
201
  dispatch:
190
202
  CompositeExplicitAutograd: sym_constrain_range_for_size
191
203
 
@@ -431,6 +443,7 @@
431
443
  structured_inherits: TensorIteratorBase
432
444
  dispatch:
433
445
  CPU, CUDA: sgn_out
446
+ MPS: sgn_out_mps
434
447
  SparseCPU, SparseCUDA: sgn_sparse_out
435
448
  SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
436
449
  tags: pointwise
@@ -469,6 +482,7 @@
469
482
  - func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
470
483
  dispatch:
471
484
  CPU, CUDA: conj_physical_out
485
+ MPS: conj_physical_out_mps
472
486
  SparseCPU, SparseCUDA: conj_physical_out_sparse
473
487
  SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
474
488
  tags: pointwise
@@ -563,8 +577,8 @@
563
577
  dispatch:
564
578
  SparseCPU: add_out_sparse_cpu
565
579
  SparseCUDA: add_out_sparse_cuda
566
- SparseCsrCPU: add_out_sparse_csr_cpu
567
- SparseCsrCUDA: add_out_sparse_csr_cuda
580
+ SparseCsrCPU: add_out_sparse_compressed_cpu
581
+ SparseCsrCUDA: add_out_sparse_compressed_cuda
568
582
  MkldnnCPU: mkldnn_add_out
569
583
  MPS: add_out_mps
570
584
  tags: pointwise
@@ -681,15 +695,29 @@
681
695
  structured_delegate: all.out
682
696
  variants: function, method
683
697
 
698
+ - func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
699
+ device_check: NoCheck # TensorIterator
700
+ structured_delegate: all.dims_out
701
+ variants: function, method
702
+ cpp_no_default_args: ['dim']
703
+ dispatch:
704
+ CompositeExplicitAutograd: all_dims_default
705
+
684
706
  - func: all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
685
707
  device_check: NoCheck # TensorIterator
686
708
  structured: True
687
- precomputed:
688
- - dim -> int dim
689
709
  dispatch:
690
710
  CPU, CUDA: all_out
691
711
  MPS: all_out_mps
692
712
 
713
+ - func: all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
714
+ device_check: NoCheck # TensorIterator
715
+ structured: True
716
+ dispatch:
717
+ CPU, CUDA: all_dims_out
718
+ CompositeExplicitAutograd: all_dims_out_default
719
+ cpp_no_default_args: ['dim']
720
+
693
721
  - func: all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
694
722
  device_check: NoCheck # TensorIterator
695
723
  variants: function, method
@@ -709,15 +737,30 @@
709
737
  variants: function, method
710
738
  tags: core
711
739
 
740
+ - func: any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
741
+ device_check: NoCheck # TensorIterator
742
+ structured_delegate: any.dims_out
743
+ variants: function, method
744
+ cpp_no_default_args: ['dim']
745
+ tags: core
746
+ dispatch:
747
+ CompositeExplicitAutograd: any_dims_default
748
+
712
749
  - func: any.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
713
750
  device_check: NoCheck # TensorIterator
714
751
  structured: True
715
- precomputed:
716
- - dim -> int dim
717
752
  dispatch:
718
753
  CPU, CUDA: any_out
719
754
  MPS: any_out_mps
720
755
 
756
+ - func: any.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
757
+ device_check: NoCheck # TensorIterator
758
+ structured: True
759
+ dispatch:
760
+ CPU, CUDA: any_dims_out
761
+ CompositeExplicitAutograd: any_dims_out_default
762
+ cpp_no_default_args: ['dim']
763
+
721
764
  - func: any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
722
765
  device_check: NoCheck # TensorIterator
723
766
  variants: function, method
@@ -733,7 +776,7 @@
733
776
  dispatch:
734
777
  CompositeExplicitAutograd: arange
735
778
 
736
- # This operator should be named `aragne.start_out` if following the naming convention. However that
779
+ # This operator should be named `arange.start_out` if following the naming convention. However that
737
780
  # name is already taken. Disabled because of CI job failures.
738
781
  # FIXME: enable this
739
782
  #- func: arange.start_out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)
@@ -1190,6 +1233,13 @@
1190
1233
  CompositeExplicitAutograd: copysign_out
1191
1234
  tags: pointwise
1192
1235
 
1236
+ - func: _lazy_clone(Tensor self) -> Tensor
1237
+ # Like clone, but the copy takes place lazily, only if either the
1238
+ # input or the output are written.
1239
+ variants: function, method
1240
+ dispatch:
1241
+ CompositeExplicitAutograd: _lazy_clone
1242
+
1193
1243
  - func: logical_not(Tensor self) -> Tensor
1194
1244
  device_check: NoCheck # TensorIterator
1195
1245
  variants: function, method
@@ -1326,6 +1376,7 @@
1326
1376
  dispatch:
1327
1377
  SparseCPU, SparseCUDA: cat_sparse
1328
1378
  QuantizedCPU: cat_quantized_cpu
1379
+ NestedTensorCPU, NestedTensorCUDA: cat_nested
1329
1380
  tags: core
1330
1381
 
1331
1382
  - func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
@@ -1590,6 +1641,7 @@
1590
1641
  - func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
1591
1642
  dispatch:
1592
1643
  CPU, CUDA: complex_out
1644
+ MPS: complex_out_mps
1593
1645
 
1594
1646
  - func: polar(Tensor abs, Tensor angle) -> Tensor
1595
1647
  variants: function
@@ -1613,59 +1665,67 @@
1613
1665
  variants: method
1614
1666
  manual_cpp_binding: True
1615
1667
 
1616
- - func: convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups) -> Tensor
1668
+ - func: convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
1617
1669
  dispatch:
1618
1670
  CompositeExplicitAutograd: convolution
1619
1671
  autogen: convolution.out
1620
1672
  tags: core
1621
1673
 
1622
- - func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1674
+ - func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1623
1675
  dispatch:
1624
1676
  CompositeExplicitAutograd, CUDA: convolution_backward
1625
1677
  autogen: convolution_backward.out
1626
1678
  tags: core
1627
1679
 
1628
- - func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
1680
+ - func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
1629
1681
  dispatch:
1630
1682
  CompositeExplicitAutograd: convolution_overrideable
1631
1683
  autogen: convolution_overrideable.out
1632
1684
 
1633
- - func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
1685
+ - func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
1634
1686
  dispatch:
1635
1687
  CompositeExplicitAutograd: convolution_backward_overrideable
1636
1688
  autogen: convolution_backward_overrideable.out
1637
1689
 
1638
- - func: _convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
1690
+ - func: _convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
1639
1691
  dispatch:
1640
1692
  CompositeExplicitAutograd: _convolution
1641
1693
  autogen: _convolution.out
1642
1694
 
1643
- - func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
1695
+ - func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, int[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
1644
1696
 
1645
- - func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[] dilation, int groups) -> Tensor
1697
+ - func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, str padding, SymInt[] dilation, SymInt groups) -> Tensor
1698
+ dispatch:
1699
+ CompositeImplicitAutograd: _convolution_mode_symint
1646
1700
 
1647
- - func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1701
+ - func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1648
1702
 
1649
- - func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, SymInt[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor
1703
+ - func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] dilation=1, SymInt groups=1) -> Tensor
1650
1704
  dispatch:
1651
1705
  CompositeImplicitAutograd: conv1d_symint
1652
1706
 
1653
- - func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor
1707
+ - func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, SymInt groups=1) -> Tensor
1654
1708
  dispatch:
1655
1709
  CompositeImplicitAutograd: conv2d_symint
1656
1710
 
1657
- - func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor
1711
+ - func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
1658
1712
  dispatch:
1659
1713
  CompositeImplicitAutograd: conv3d_symint
1660
1714
 
1661
- - func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, str padding="valid", int[1] dilation=1, int groups=1) -> Tensor
1715
+ - func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, str padding="valid", SymInt[1] dilation=1, SymInt groups=1) -> Tensor
1662
1716
  cpp_no_default_args: ['bias', 'stride', 'padding']
1717
+ dispatch:
1718
+ CompositeImplicitAutograd: conv1d_padding_symint
1663
1719
 
1664
- - func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, str padding="valid", int[2] dilation=1, int groups=1) -> Tensor
1720
+ - func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, str padding="valid", SymInt[2] dilation=1, SymInt groups=1) -> Tensor
1665
1721
  cpp_no_default_args: ['bias', 'stride', 'padding']
1722
+ dispatch:
1723
+ CompositeImplicitAutograd: conv2d_padding_symint
1666
1724
 
1667
- - func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, str padding="valid", int[3] dilation=1, int groups=1) -> Tensor
1725
+ - func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, str padding="valid", SymInt[3] dilation=1, SymInt groups=1) -> Tensor
1668
1726
  cpp_no_default_args: ['bias', 'stride', 'padding']
1727
+ dispatch:
1728
+ CompositeImplicitAutograd: conv3d_padding_symint
1669
1729
 
1670
1730
  - func: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
1671
1731
  dispatch:
@@ -1675,15 +1735,15 @@
1675
1735
  - func: conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)
1676
1736
 
1677
1737
  # NB: we inherit the goofy argument order from PyTorch torch.nn.functional
1678
- - func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, int groups=1, int[1] dilation=1) -> Tensor
1738
+ - func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, SymInt groups=1, SymInt[1] dilation=1) -> Tensor
1679
1739
  dispatch:
1680
1740
  CompositeImplicitAutograd: conv_transpose1d_symint
1681
1741
 
1682
- - func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, int groups=1, int[2] dilation=1) -> Tensor
1742
+ - func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt groups=1, SymInt[2] dilation=1) -> Tensor
1683
1743
  dispatch:
1684
1744
  CompositeImplicitAutograd: conv_transpose2d_symint
1685
1745
 
1686
- - func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor
1746
+ - func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt groups=1, SymInt[3] dilation=1) -> Tensor
1687
1747
  dispatch:
1688
1748
  CompositeImplicitAutograd: conv_transpose3d_symint
1689
1749
 
@@ -1691,6 +1751,7 @@
1691
1751
  variants: function
1692
1752
  dispatch:
1693
1753
  CompositeExplicitAutogradNonFunctional: copy
1754
+ tags: core
1694
1755
 
1695
1756
  - func: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
1696
1757
  variants: method
@@ -1720,6 +1781,8 @@
1720
1781
  device_check: NoCheck # TensorIterator
1721
1782
  variants: function, method
1722
1783
  structured_delegate: cos.out
1784
+ dispatch:
1785
+ NestedTensorCPU, NestedTensorCUDA: cos_nested
1723
1786
  tags: [core, pointwise]
1724
1787
 
1725
1788
  - func: cos_(Tensor(a!) self) -> Tensor(a!)
@@ -1802,32 +1865,35 @@
1802
1865
  CUDA: cudnn_batch_norm_backward
1803
1866
  autogen: cudnn_batch_norm_backward.out
1804
1867
 
1805
- - func: cudnn_convolution(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1868
+ - func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1806
1869
  dispatch:
1807
1870
  CUDA: cudnn_convolution
1808
- autogen: cudnn_convolution.out
1809
1871
 
1810
- - func: cudnn_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1872
+ - func: cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
1873
+ dispatch:
1874
+ CUDA: cudnn_convolution_out
1875
+
1876
+ - func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1811
1877
  dispatch:
1812
1878
  CUDA: cudnn_convolution_transpose
1813
1879
  autogen: cudnn_convolution_transpose.out
1814
1880
 
1815
- - func: _mps_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups) -> Tensor
1881
+ - func: _mps_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
1816
1882
  dispatch:
1817
1883
  MPS: _mps_convolution_transpose
1818
1884
  autogen: _mps_convolution_transpose.out
1819
1885
 
1820
- - func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool[2] output_mask) -> (Tensor, Tensor)
1886
+ - func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[2] output_mask) -> (Tensor, Tensor)
1821
1887
  dispatch:
1822
1888
  MPS: mps_convolution_transpose_backward
1823
1889
  autogen: mps_convolution_transpose_backward.out
1824
1890
 
1825
- - func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
1891
+ - func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
1826
1892
  dispatch:
1827
1893
  CUDA: cudnn_convolution_relu
1828
1894
  autogen: cudnn_convolution_relu.out
1829
1895
 
1830
- - func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
1896
+ - func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
1831
1897
  dispatch:
1832
1898
  CUDA: cudnn_convolution_add_relu
1833
1899
  autogen: cudnn_convolution_add_relu.out
@@ -1967,6 +2033,7 @@
1967
2033
  dispatch:
1968
2034
  CPU: ctc_loss_cpu
1969
2035
  CUDA: ctc_loss_gpu
2036
+ Meta: ctc_loss_meta
1970
2037
  autogen: _ctc_loss.out
1971
2038
  tags: dynamic_output_shape # the shape of second output is data dependent
1972
2039
 
@@ -1999,6 +2066,7 @@
1999
2066
  variants: function, method
2000
2067
  dispatch:
2001
2068
  CompositeExplicitAutograd: diagonal
2069
+ tags: core
2002
2070
 
2003
2071
  - func: linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a)
2004
2072
  python_module: linalg
@@ -2079,7 +2147,7 @@
2079
2147
  structured_delegate: div.out_mode
2080
2148
  dispatch:
2081
2149
  SparseCPU, SparseCUDA: div_sparse
2082
- tags: pointwise
2150
+ tags: [core, pointwise]
2083
2151
 
2084
2152
  - func: div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!)
2085
2153
  device_check: NoCheck # TensorIterator
@@ -2120,7 +2188,7 @@
2120
2188
  variants: function, method
2121
2189
  dispatch:
2122
2190
  CompositeExplicitAutograd: div
2123
- tags: pointwise
2191
+ tags: [core, pointwise]
2124
2192
 
2125
2193
  - func: div_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!)
2126
2194
  variants: method
@@ -2302,7 +2370,7 @@
2302
2370
  Meta: empty_meta_symint
2303
2371
  MkldnnCPU: empty_mkldnn
2304
2372
  SparseCPU, SparseCUDA, SparseMeta: empty_sparse
2305
- SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
2373
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
2306
2374
  QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
2307
2375
  tags: core
2308
2376
 
@@ -2370,7 +2438,7 @@
2370
2438
  variants: method
2371
2439
  device_check: NoCheck
2372
2440
  device_guard: False
2373
- tags: inplace_view
2441
+ tags: [core, inplace_view]
2374
2442
  dispatch:
2375
2443
  Meta: resize__symint
2376
2444
  CPU: resize_
@@ -2408,7 +2476,7 @@
2408
2476
  CompositeExplicitAutograd: empty_like
2409
2477
  QuantizedCPU, QuantizedCUDA: empty_like_quantized
2410
2478
  SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
2411
- SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
2479
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
2412
2480
  NestedTensorCPU, NestedTensorCUDA: empty_like_nested
2413
2481
  autogen: empty_like.out
2414
2482
 
@@ -2517,7 +2585,7 @@
2517
2585
  dispatch:
2518
2586
  SparseCPU, SparseCUDA: expm1_sparse
2519
2587
  SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
2520
- tags: pointwise
2588
+ tags: [core, pointwise]
2521
2589
 
2522
2590
  - func: expm1_(Tensor(a!) self) -> Tensor(a!)
2523
2591
  device_check: NoCheck # TensorIterator
@@ -2684,10 +2752,15 @@
2684
2752
  - func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
2685
2753
  device_check: NoCheck # TensorIterator
2686
2754
  variants: function, method
2755
+ dispatch:
2756
+ CompositeExplicitAutograd: floor_divide
2687
2757
 
2688
2758
  - func: floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
2689
2759
  device_check: NoCheck # TensorIterator
2690
2760
  variants: method
2761
+ dispatch:
2762
+ CompositeExplicitAutograd: floor_divide_
2763
+ autogen: floor_divide.Scalar_out
2691
2764
 
2692
2765
  - func: frac(Tensor self) -> Tensor
2693
2766
  device_check: NoCheck # TensorIterator
@@ -2905,12 +2978,14 @@
2905
2978
  dispatch:
2906
2979
  CPU: _fft_r2c_mkl
2907
2980
  CUDA: _fft_r2c_cufft
2981
+ MPS: _fft_r2c_mps
2908
2982
 
2909
2983
  - func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
2910
2984
  variants: function
2911
2985
  dispatch:
2912
2986
  CPU: _fft_r2c_mkl_out
2913
2987
  CUDA: _fft_r2c_cufft_out
2988
+ MPS: _fft_r2c_mps_out
2914
2989
 
2915
2990
  # Complex to real inverse FFT
2916
2991
  - func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
@@ -2918,12 +2993,14 @@
2918
2993
  dispatch:
2919
2994
  CPU: _fft_c2r_mkl
2920
2995
  CUDA: _fft_c2r_cufft
2996
+ MPS: _fft_c2r_mps
2921
2997
 
2922
2998
  - func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
2923
2999
  variants: function
2924
3000
  dispatch:
2925
3001
  CPU: _fft_c2r_mkl_out
2926
3002
  CUDA: _fft_c2r_cufft_out
3003
+ MPS: _fft_c2r_mps_out
2927
3004
 
2928
3005
  # Standard complex to complex FFT (forward or backward)
2929
3006
  - func: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
@@ -2931,12 +3008,14 @@
2931
3008
  dispatch:
2932
3009
  CPU: _fft_c2c_mkl
2933
3010
  CUDA: _fft_c2c_cufft
3011
+ MPS: _fft_c2c_mps
2934
3012
 
2935
3013
  - func: _fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)
2936
3014
  variants: function
2937
3015
  dispatch:
2938
3016
  CPU: _fft_c2c_mkl_out
2939
3017
  CUDA: _fft_c2c_cufft_out
3018
+ MPS: _fft_c2c_mps_out
2940
3019
 
2941
3020
  - func: _validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()
2942
3021
  device_check: NoCheck
@@ -2979,7 +3058,7 @@
2979
3058
  - func: _unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
2980
3059
  variants: function
2981
3060
  dispatch:
2982
- CPU, CUDA: _unsafe_index
3061
+ CompositeExplicitAutograd: _unsafe_index
2983
3062
 
2984
3063
  - func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
2985
3064
  structured: True
@@ -3253,14 +3332,22 @@
3253
3332
  dispatch:
3254
3333
  CUDA: _cslt_compress
3255
3334
 
3256
- - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, bool transpose_result=False) -> Tensor
3335
+ - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
3257
3336
  dispatch:
3258
3337
  CUDA: _cslt_sparse_mm
3259
3338
 
3260
- - func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None) -> Tensor
3339
+ - func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
3340
+ dispatch:
3341
+ CUDA: _cslt_sparse_mm_search
3342
+
3343
+ - func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
3261
3344
  dispatch:
3262
3345
  CUDA: _sparse_semi_structured_linear
3263
3346
 
3347
+ - func: _mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor
3348
+ dispatch:
3349
+ CUDA: _mixed_dtypes_linear
3350
+
3264
3351
  - func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
3265
3352
 
3266
3353
  - func: fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
@@ -3291,12 +3378,42 @@
3291
3378
  dispatch:
3292
3379
  CompositeExplicitAutograd: linspace
3293
3380
 
3381
+ - func: linspace.Tensor_Tensor(Tensor start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3382
+ category_override: factory
3383
+ dispatch:
3384
+ CompositeExplicitAutograd: linspace
3385
+
3386
+ - func: linspace.Tensor_Scalar(Tensor start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3387
+ category_override: factory
3388
+ dispatch:
3389
+ CompositeExplicitAutograd: linspace
3390
+
3391
+ - func: linspace.Scalar_Tensor(Scalar start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3392
+ category_override: factory
3393
+ dispatch:
3394
+ CompositeExplicitAutograd: linspace
3395
+
3294
3396
  - func: linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
3295
3397
  dispatch:
3296
3398
  CPU, Meta: linspace_out
3297
3399
  CUDA: linspace_cuda_out
3298
3400
  MPS: linspace_out_mps
3299
3401
 
3402
+ - func: linspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
3403
+ category_override: factory
3404
+ dispatch:
3405
+ CompositeExplicitAutograd: linspace_out
3406
+
3407
+ - func: linspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
3408
+ category_override: factory
3409
+ dispatch:
3410
+ CompositeExplicitAutograd: linspace_out
3411
+
3412
+ - func: linspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
3413
+ category_override: factory
3414
+ dispatch:
3415
+ CompositeExplicitAutograd: linspace_out
3416
+
3300
3417
  - func: log(Tensor self) -> Tensor
3301
3418
  device_check: NoCheck # TensorIterator
3302
3419
  structured_delegate: log.out
@@ -3322,7 +3439,7 @@
3322
3439
  device_check: NoCheck # TensorIterator
3323
3440
  structured_delegate: log10.out
3324
3441
  variants: function, method
3325
- tags: pointwise
3442
+ tags: [core, pointwise]
3326
3443
 
3327
3444
  - func: log10_(Tensor(a!) self) -> Tensor(a!)
3328
3445
  device_check: NoCheck # TensorIterator
@@ -3346,7 +3463,7 @@
3346
3463
  dispatch:
3347
3464
  SparseCPU, SparseCUDA: log1p_sparse
3348
3465
  SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
3349
- tags: pointwise
3466
+ tags: [core, pointwise]
3350
3467
 
3351
3468
  - func: log1p_(Tensor(a!) self) -> Tensor(a!)
3352
3469
  device_check: NoCheck # TensorIterator
@@ -3372,7 +3489,7 @@
3372
3489
  device_check: NoCheck # TensorIterator
3373
3490
  structured_delegate: log2.out
3374
3491
  variants: function, method
3375
- tags: pointwise
3492
+ tags: [core, pointwise]
3376
3493
 
3377
3494
  - func: log2_(Tensor(a!) self) -> Tensor(a!)
3378
3495
  device_check: NoCheck # TensorIterator
@@ -3477,11 +3594,41 @@
3477
3594
  dispatch:
3478
3595
  CompositeExplicitAutograd: logspace
3479
3596
 
3597
+ - func: logspace.Tensor_Tensor(Tensor start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3598
+ category_override: factory
3599
+ dispatch:
3600
+ CompositeExplicitAutograd: logspace
3601
+
3602
+ - func: logspace.Tensor_Scalar(Tensor start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3603
+ category_override: factory
3604
+ dispatch:
3605
+ CompositeExplicitAutograd: logspace
3606
+
3607
+ - func: logspace.Scalar_Tensor(Scalar start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3608
+ category_override: factory
3609
+ dispatch:
3610
+ CompositeExplicitAutograd: logspace
3611
+
3480
3612
  - func: logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
3481
3613
  dispatch:
3482
3614
  CPU, Meta: logspace_out
3483
3615
  CUDA: logspace_cuda_out
3484
3616
 
3617
+ - func: logspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
3618
+ category_override: factory
3619
+ dispatch:
3620
+ CompositeExplicitAutograd: logspace_out
3621
+
3622
+ - func: logspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
3623
+ category_override: factory
3624
+ dispatch:
3625
+ CompositeExplicitAutograd: logspace_out
3626
+
3627
+ - func: logspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
3628
+ category_override: factory
3629
+ dispatch:
3630
+ CompositeExplicitAutograd: logspace_out
3631
+
3485
3632
  # log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
3486
3633
  - func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
3487
3634
  variants: function, method
@@ -3847,17 +3994,17 @@
3847
3994
  # TODO: Add this function to MPS dispatch key so that we avoid declaring it in
3848
3995
  # native_functions.yaml
3849
3996
  # https://github.com/pytorch/pytorch/issues/77394
3850
- - func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
3997
+ - func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
3851
3998
  dispatch:
3852
3999
  MPS: _mps_convolution
3853
4000
  autogen: _mps_convolution.out
3854
4001
 
3855
- - func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
4002
+ - func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
3856
4003
  dispatch:
3857
4004
  MPS: mps_convolution_backward
3858
4005
  autogen: mps_convolution_backward.out
3859
4006
 
3860
- - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, int[] stride, int[] dilation, int groups) -> Tensor
4007
+ - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
3861
4008
  dispatch:
3862
4009
  CompositeExplicitAutograd: mkldnn_convolution
3863
4010
  autogen: mkldnn_convolution.out
@@ -3883,26 +4030,26 @@
3883
4030
  CUDA: miopen_batch_norm_backward
3884
4031
  autogen: miopen_batch_norm_backward.out
3885
4032
 
3886
- - func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
4033
+ - func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
3887
4034
  dispatch:
3888
4035
  CUDA: miopen_convolution
3889
4036
  autogen: miopen_convolution.out
3890
4037
 
3891
- - func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
4038
+ - func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
3892
4039
  dispatch:
3893
4040
  CUDA: miopen_convolution_transpose
3894
4041
  autogen: miopen_convolution_transpose.out
3895
4042
 
3896
- - func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
4043
+ - func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
3897
4044
  dispatch:
3898
4045
  CUDA: miopen_depthwise_convolution
3899
4046
  autogen: miopen_depthwise_convolution.out
3900
4047
 
3901
- - func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
4048
+ - func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
3902
4049
  dispatch:
3903
4050
  CUDA: miopen_convolution_relu
3904
4051
 
3905
- - func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
4052
+ - func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
3906
4053
  dispatch:
3907
4054
  CUDA: miopen_convolution_add_relu
3908
4055
 
@@ -3943,6 +4090,20 @@
3943
4090
  dispatch:
3944
4091
  CUDA: _int_mm_out_cuda
3945
4092
 
4093
+ - func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
4094
+ dispatch:
4095
+ CPU: _convert_weight_to_int4pack_cpu
4096
+ CUDA: _convert_weight_to_int4pack_cuda
4097
+
4098
+ - func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
4099
+ dispatch:
4100
+ CPU: _weight_int4pack_mm_cpu
4101
+ CUDA: _weight_int4pack_mm_cuda
4102
+
4103
+ - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
4104
+ dispatch:
4105
+ CPU: _weight_int8pack_mm_cpu
4106
+
3946
4107
  - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
3947
4108
  python_module: sparse
3948
4109
 
@@ -4087,6 +4248,7 @@
4087
4248
  device_guard: False
4088
4249
  dispatch:
4089
4250
  CompositeImplicitAutograd: narrow_symint
4251
+ NestedTensorCPU, NestedTensorCUDA: narrow_nested_symint
4090
4252
 
4091
4253
  - func: narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)
4092
4254
  variants: function, method
@@ -4199,7 +4361,7 @@
4199
4361
 
4200
4362
  - func: _nnpack_available() -> bool
4201
4363
 
4202
- - func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, int[2] stride=1) -> Tensor
4364
+ - func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, SymInt[2] stride=1) -> Tensor
4203
4365
  variants: function
4204
4366
  dispatch:
4205
4367
  CompositeExplicitAutograd: _nnpack_spatial_convolution
@@ -4314,23 +4476,24 @@
4314
4476
  - func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
4315
4477
  dispatch:
4316
4478
  CPU: pixel_shuffle_cpu
4479
+ MPS: pixel_shuffle_mps
4317
4480
  CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
4318
4481
  autogen: pixel_shuffle.out
4319
- tags: core
4320
4482
 
4321
4483
  - func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
4322
4484
  dispatch:
4323
4485
  CPU: pixel_unshuffle_cpu
4486
+ MPS: pixel_unshuffle_mps
4324
4487
  CompositeExplicitAutogradNonFunctional: math_pixel_unshuffle
4325
4488
  autogen: pixel_unshuffle.out
4326
4489
 
4327
- - func: channel_shuffle(Tensor self, int groups) -> Tensor
4490
+ - func: channel_shuffle(Tensor self, SymInt groups) -> Tensor
4328
4491
  dispatch:
4329
4492
  CPU, CUDA: channel_shuffle
4330
4493
  QuantizedCPU: channel_shuffle_quantized_cpu
4331
4494
  autogen: channel_shuffle.out
4332
4495
 
4333
- - func: native_channel_shuffle(Tensor self, int groups) -> Tensor
4496
+ - func: native_channel_shuffle(Tensor self, SymInt groups) -> Tensor
4334
4497
  dispatch:
4335
4498
  CPU: channel_shuffle_cpu
4336
4499
  CompositeImplicitAutograd: math_channel_shuffle
@@ -4338,7 +4501,7 @@
4338
4501
  - func: is_pinned(Tensor self, Device? device=None) -> bool
4339
4502
  variants: method
4340
4503
  dispatch:
4341
- CUDA: is_pinned_cuda
4504
+ NestedTensorCUDA, CUDA: is_pinned_cuda
4342
4505
  MPS: is_pinned_mps
4343
4506
  CompositeExplicitAutograd: is_pinned_default
4344
4507
 
@@ -4352,6 +4515,7 @@
4352
4515
  dispatch:
4353
4516
  CUDA: _pin_memory_cuda
4354
4517
  MPS: _pin_memory_mps
4518
+ NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
4355
4519
  autogen: _pin_memory.out
4356
4520
 
4357
4521
  - func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
@@ -4660,7 +4824,7 @@
4660
4824
  autogen: repeat.out
4661
4825
  tags: core
4662
4826
 
4663
- - func: repeat_interleave.Tensor(Tensor repeats, *, int? output_size=None) -> Tensor
4827
+ - func: repeat_interleave.Tensor(Tensor repeats, *, SymInt? output_size=None) -> Tensor
4664
4828
  variants: function
4665
4829
  dispatch:
4666
4830
  CPU: repeat_interleave_cpu
@@ -4669,10 +4833,12 @@
4669
4833
  tags: dynamic_output_shape
4670
4834
  autogen: repeat_interleave.Tensor_out
4671
4835
 
4672
- - func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, int? output_size=None) -> Tensor
4836
+ - func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
4673
4837
  variants: function, method
4838
+ dispatch:
4839
+ CompositeImplicitAutograd: repeat_interleave_symint
4674
4840
 
4675
- - func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, int? output_size=None) -> Tensor
4841
+ - func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
4676
4842
  variants: function, method
4677
4843
  dispatch:
4678
4844
  CompositeImplicitAutograd: repeat_interleave_symint
@@ -4683,7 +4849,7 @@
4683
4849
  device_guard: False
4684
4850
  dispatch:
4685
4851
  CompositeImplicitAutograd: reshape_symint
4686
- CompositeImplicitAutogradNestedTensor: reshape_nested
4852
+ CompositeImplicitAutogradNestedTensor: reshape_nested_symint
4687
4853
 
4688
4854
  - func: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
4689
4855
  variants: function
@@ -4842,6 +5008,7 @@
4842
5008
  device_check: NoCheck # TensorIterator
4843
5009
  python_module: nn
4844
5010
  dispatch:
5011
+ QuantizedCPU: gelu_quantized_cpu_
4845
5012
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
4846
5013
 
4847
5014
  - func: gelu(Tensor self, *, str approximate='none') -> Tensor
@@ -4973,12 +5140,14 @@
4973
5140
  python_module: nn
4974
5141
  dispatch:
4975
5142
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
5143
+ tags: pointwise
4976
5144
 
4977
5145
  - func: silu_(Tensor(a!) self) -> Tensor(a!)
4978
5146
  structured_delegate: silu.out
4979
5147
  python_module: nn
4980
5148
  dispatch:
4981
5149
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
5150
+ tags: pointwise
4982
5151
 
4983
5152
  - func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4984
5153
  structured: True
@@ -4987,6 +5156,7 @@
4987
5156
  dispatch:
4988
5157
  CPU, CUDA: silu_out
4989
5158
  MPS: silu_out_mps
5159
+ tags: pointwise
4990
5160
 
4991
5161
  - func: silu_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
4992
5162
  structured: True
@@ -4995,6 +5165,7 @@
4995
5165
  dispatch:
4996
5166
  CPU, CUDA: silu_backward_out
4997
5167
  MPS: silu_backward_out_mps
5168
+ tags: pointwise
4998
5169
 
4999
5170
  - func: silu_backward(Tensor grad_output, Tensor self) -> Tensor
5000
5171
  structured_delegate: silu_backward.grad_input
@@ -5002,6 +5173,7 @@
5002
5173
  dispatch:
5003
5174
  CompositeImplicitAutograd: math_silu_backward
5004
5175
  NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
5176
+ tags: pointwise
5005
5177
 
5006
5178
  - func: mish(Tensor self) -> Tensor
5007
5179
  structured_delegate: mish.out
@@ -5017,11 +5189,13 @@
5017
5189
  python_module: nn
5018
5190
  dispatch:
5019
5191
  CPU, CUDA: mish_out
5192
+ MPS: mish_out_mps
5020
5193
 
5021
5194
  - func: mish_backward(Tensor grad_output, Tensor self) -> Tensor
5022
5195
  python_module: nn
5023
5196
  dispatch:
5024
5197
  CPU, CUDA: mish_backward
5198
+ MPS: mish_backward_mps
5025
5199
  CompositeImplicitAutograd: math_mish_backward
5026
5200
 
5027
5201
  - func: sigmoid(Tensor self) -> Tensor
@@ -5076,6 +5250,7 @@
5076
5250
  dispatch:
5077
5251
  SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
5078
5252
  SparseCPU, SparseCUDA: sin_sparse
5253
+ NestedTensorCPU, NestedTensorCUDA: sin_nested
5079
5254
  tags: [core, pointwise]
5080
5255
 
5081
5256
  - func: sin_(Tensor(a!) self) -> Tensor(a!)
@@ -5221,6 +5396,21 @@
5221
5396
  CompositeExplicitAutograd: slice_backward
5222
5397
  autogen: slice_backward.out
5223
5398
 
5399
+ # NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
5400
+ # slice.Tensor, split_with_sizes, et. al.). Currently, these are only used during fake-ification
5401
+ # of PT2 graph input subclass instances that are views. This means:
5402
+ # * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
5403
+ # * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
5404
+ # * A subclass will have to implement this to work in PT2 if a subclass view is used as a graph
5405
+ # input AND the view utilizes this op in its inverse. The idea is that slice_inverse() is
5406
+ # easier to implement for a subclass than as_strided()
5407
+ - func: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
5408
+ variants: function, method
5409
+ device_check: NoCheck
5410
+ device_guard: False
5411
+ dispatch:
5412
+ CompositeExplicitAutograd: slice_inverse_symint
5413
+
5224
5414
  - func: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
5225
5415
  variants: function, method
5226
5416
  device_check: NoCheck
@@ -5228,7 +5418,7 @@
5228
5418
  dispatch:
5229
5419
  CompositeExplicitAutogradNonFunctional: slice_scatter
5230
5420
  autogen: slice_scatter.out
5231
- tags: core
5421
+ tags: [core, view_copy]
5232
5422
 
5233
5423
  - func: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
5234
5424
  variants: function, method
@@ -5427,6 +5617,14 @@
5427
5617
  SparseCPU: _sspaddmm_out_cpu
5428
5618
  SparseCUDA: _sspaddmm_out_cuda
5429
5619
 
5620
+ - func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
5621
+ dispatch:
5622
+ CompositeExplicitAutograd: _chunk_cat
5623
+
5624
+ - func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
5625
+ dispatch:
5626
+ CompositeExplicitAutograd: _chunk_cat_out
5627
+
5430
5628
  - func: stack(Tensor[] tensors, int dim=0) -> Tensor
5431
5629
  dispatch:
5432
5630
  CompositeExplicitAutograd: stack
@@ -5618,6 +5816,7 @@
5618
5816
  variants: function
5619
5817
  dispatch:
5620
5818
  CPU, CUDA: std_mean
5819
+ MPS: std_mean_mps
5621
5820
  autogen: std_mean.correction_out
5622
5821
 
5623
5822
  - func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
@@ -5873,7 +6072,6 @@
5873
6072
  CPU, MPS: roll
5874
6073
  CUDA: roll_cuda
5875
6074
  autogen: roll.out
5876
- tags: core
5877
6075
 
5878
6076
  # default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
5879
6077
 
@@ -5956,6 +6154,52 @@
5956
6154
  CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
5957
6155
  autogen: _nested_view_from_buffer_copy.out
5958
6156
 
6157
+ - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
6158
+ variants: function
6159
+ device_check: NoCheck
6160
+ dispatch: {}
6161
+
6162
+ - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
6163
+ variants: function
6164
+ device_check: NoCheck
6165
+ tags: view_copy
6166
+ dispatch:
6167
+ CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
6168
+ autogen: _nested_view_from_jagged_copy.out
6169
+
6170
+ - func: _nested_get_values(Tensor(a) self) -> Tensor(a)
6171
+ variants: function
6172
+ device_check: NoCheck
6173
+ dispatch: {}
6174
+
6175
+ - func: _nested_get_values_copy(Tensor self) -> Tensor
6176
+ variants: function
6177
+ device_check: NoCheck
6178
+ tags: view_copy
6179
+ dispatch:
6180
+ CompositeExplicitAutogradNonFunctional: _nested_get_values_copy
6181
+ autogen: _nested_get_values_copy.out
6182
+
6183
+ - func: _nested_get_offsets(Tensor self) -> Tensor
6184
+ variants: function
6185
+ device_check: NoCheck
6186
+ dispatch: {}
6187
+
6188
+ # returns undefined Tensor if no lengths present
6189
+ - func: _nested_get_lengths(Tensor self) -> Tensor
6190
+ variants: function
6191
+ device_check: NoCheck
6192
+ dispatch: {}
6193
+
6194
+ - func: _nested_get_ragged_idx(Tensor self) -> int
6195
+ variants: function
6196
+ device_check: NoCheck
6197
+ dispatch: {}
6198
+
6199
+ - func: _nested_get_jagged_dummy(Tensor any) -> Tensor
6200
+ category_override: dummy
6201
+ dispatch: {}
6202
+
5959
6203
  - func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
5960
6204
  dispatch:
5961
6205
  # calls unsqueeze
@@ -5971,7 +6215,7 @@
5971
6215
  dispatch:
5972
6216
  SparseCPU, SparseCUDA: trunc_sparse
5973
6217
  SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
5974
- tags: pointwise
6218
+ tags: [core, pointwise]
5975
6219
 
5976
6220
  - func: trunc_(Tensor(a!) self) -> Tensor(a!)
5977
6221
  structured_delegate: trunc.out
@@ -6140,6 +6384,7 @@
6140
6384
  variants: function
6141
6385
  dispatch:
6142
6386
  CPU, CUDA: var_mean
6387
+ MPS: var_mean_mps
6143
6388
  autogen: var_mean.correction_out
6144
6389
 
6145
6390
  - func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
@@ -6160,15 +6405,13 @@
6160
6405
  device_check: NoCheck # TensorIterator
6161
6406
  variants: function, method
6162
6407
  dispatch:
6163
- CPU, CUDA: where
6164
- MPS: where_mps
6408
+ CPU, CUDA, MPS: where
6165
6409
  tags: [core, pointwise]
6166
6410
 
6167
6411
  - func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
6168
6412
  device_check: NoCheck # TensorIterator
6169
6413
  dispatch:
6170
- CPU, CUDA: where_self_out
6171
- MPS: where_self_out_mps
6414
+ CPU, CUDA, MPS: where_self_out
6172
6415
 
6173
6416
  - func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
6174
6417
  variants: function
@@ -6196,6 +6439,7 @@
6196
6439
  dispatch:
6197
6440
  CPU: weight_norm_cpu
6198
6441
  CUDA: weight_norm_cuda
6442
+ MPS: weight_norm_mps
6199
6443
  autogen: _weight_norm_interface.out
6200
6444
 
6201
6445
  - func: _weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
@@ -6203,6 +6447,7 @@
6203
6447
  dispatch:
6204
6448
  CPU: weight_norm_backward_cpu
6205
6449
  CUDA: weight_norm_backward_cuda
6450
+ MPS: weight_norm_backward_mps
6206
6451
  autogen: _weight_norm_interface_backward.out
6207
6452
 
6208
6453
  - func: _weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
@@ -6219,6 +6464,7 @@
6219
6464
  dispatch:
6220
6465
  CPU: _efficientzerotensor
6221
6466
  CUDA: _efficientzerotensor_cuda
6467
+ MPS: _efficientzerotensor_mps
6222
6468
  Meta: _efficientzerotensor_meta
6223
6469
  autogen: _efficientzerotensor.out
6224
6470
 
@@ -6506,7 +6752,7 @@
6506
6752
  MPS: zero_mps_
6507
6753
  Meta: zero_meta_
6508
6754
  SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
6509
- SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
6755
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
6510
6756
  MkldnnCPU: mkldnn_zero_
6511
6757
  NestedTensorCPU, NestedTensorCUDA: zero_nested_
6512
6758
  autogen: zero, zero.out
@@ -6675,12 +6921,12 @@
6675
6921
  structured_delegate: _addmm_activation.out
6676
6922
  variants: function, method
6677
6923
 
6678
- - func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None) -> (Tensor, Tensor)
6924
+ - func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)
6679
6925
  variants: function
6680
6926
  dispatch:
6681
6927
  CUDA: _scaled_mm_cuda
6682
6928
 
6683
- - func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
6929
+ - func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
6684
6930
  variants: function
6685
6931
  dispatch:
6686
6932
  CUDA: _scaled_mm_out_cuda
@@ -6796,7 +7042,7 @@
6796
7042
  # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
6797
7043
  # the default would never make sense.
6798
7044
 
6799
- - func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
7045
+ - func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6800
7046
  dispatch:
6801
7047
  CompositeExplicitAutograd: sparse_compressed_tensor
6802
7048
 
@@ -6813,7 +7059,10 @@
6813
7059
  - func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6814
7060
  - func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6815
7061
 
6816
- - func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
7062
+ - func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
7063
+ dispatch:
7064
+ CompositeImplicitAutograd: _sparse_compressed_tensor_unsafe_symint
7065
+
6817
7066
  - func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
6818
7067
  - func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
6819
7068
  - func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -6899,7 +7148,7 @@
6899
7148
  dispatch:
6900
7149
  CPU, CUDA: sparse_dim_strided
6901
7150
  SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
6902
- SparseCsrCPU, SparseCsrCUDA: sparse_dim_sparse_csr
7151
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
6903
7152
  device_check: NoCheck
6904
7153
  device_guard: False
6905
7154
 
@@ -6916,7 +7165,7 @@
6916
7165
  dispatch:
6917
7166
  CPU, CUDA: dense_dim_strided
6918
7167
  SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
6919
- SparseCsrCPU, SparseCsrCUDA: dense_dim_sparse_csr
7168
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
6920
7169
  device_check: NoCheck
6921
7170
  device_guard: False
6922
7171
 
@@ -6932,7 +7181,7 @@
6932
7181
  variants: method
6933
7182
  dispatch:
6934
7183
  SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
6935
- SparseCsrCPU, SparseCsrCUDA: _nnz_sparse_csr
7184
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
6936
7185
  device_check: NoCheck
6937
7186
  device_guard: False
6938
7187
 
@@ -6995,7 +7244,7 @@
6995
7244
  variants: method
6996
7245
  dispatch:
6997
7246
  SparseCPU, SparseCUDA, SparseMeta: values_sparse
6998
- SparseCsrCPU, SparseCsrCUDA: values_sparse_csr
7247
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
6999
7248
  NestedTensorCPU, NestedTensorCUDA: values_nested
7000
7249
  CompositeExplicitAutograd: values_default
7001
7250
  device_check: NoCheck
@@ -7004,7 +7253,7 @@
7004
7253
  - func: crow_indices(Tensor(a) self) -> Tensor(a)
7005
7254
  variants: method
7006
7255
  dispatch:
7007
- SparseCsrCPU, SparseCsrCUDA: crow_indices_sparse_csr
7256
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: crow_indices_sparse_csr
7008
7257
  CompositeExplicitAutograd: crow_indices_default
7009
7258
  device_check: NoCheck
7010
7259
  device_guard: False
@@ -7012,7 +7261,7 @@
7012
7261
  - func: col_indices(Tensor(a) self) -> Tensor(a)
7013
7262
  variants: method
7014
7263
  dispatch:
7015
- SparseCsrCPU, SparseCsrCUDA: col_indices_sparse_csr
7264
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: col_indices_sparse_csr
7016
7265
  CompositeExplicitAutograd: col_indices_default
7017
7266
  device_check: NoCheck
7018
7267
  device_guard: False
@@ -7020,7 +7269,7 @@
7020
7269
  - func: ccol_indices(Tensor(a) self) -> Tensor(a)
7021
7270
  variants: method
7022
7271
  dispatch:
7023
- SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
7272
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ccol_indices_sparse_csr
7024
7273
  CompositeExplicitAutograd: ccol_indices_default
7025
7274
  device_check: NoCheck
7026
7275
  device_guard: False
@@ -7028,7 +7277,7 @@
7028
7277
  - func: row_indices(Tensor(a) self) -> Tensor(a)
7029
7278
  variants: method
7030
7279
  dispatch:
7031
- SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
7280
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: row_indices_sparse_csr
7032
7281
  CompositeExplicitAutograd: row_indices_default
7033
7282
  device_check: NoCheck
7034
7283
  device_guard: False
@@ -7055,7 +7304,7 @@
7055
7304
  variants: function, method
7056
7305
  dispatch:
7057
7306
  CompositeExplicitAutograd: unbind
7058
- CompositeImplicitAutogradNestedTensor: NestedTensor_unbind
7307
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
7059
7308
 
7060
7309
  - func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
7061
7310
  variants: function, method
@@ -7143,14 +7392,14 @@
7143
7392
  CPU: dense_to_mkldnn
7144
7393
  autogen: to_mkldnn.out
7145
7394
 
7146
- - func: mkldnn_reorder_conv2d_weight(Tensor self, int[2] padding=0, int[2] stride=1, int[2] dilation=1, int groups=1, int[]? input_size=None) -> Tensor
7395
+ - func: mkldnn_reorder_conv2d_weight(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor
7147
7396
  variants: function
7148
7397
  python_module: nn
7149
7398
  dispatch:
7150
7399
  MkldnnCPU: mkldnn_reorder_conv2d_weight
7151
7400
  autogen: mkldnn_reorder_conv2d_weight.out
7152
7401
 
7153
- - func: mkldnn_reorder_conv3d_weight(Tensor self, int[3] padding=0, int[3] stride=1, int[3] dilation=1, int groups=1) -> Tensor
7402
+ - func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
7154
7403
  variants: function
7155
7404
  python_module: nn
7156
7405
  dispatch:
@@ -7537,6 +7786,7 @@
7537
7786
  dispatch:
7538
7787
  CPU, CUDA, Meta, MPS: set_
7539
7788
  autogen: set.source_Storage, set.source_Storage_out
7789
+ tags: inplace_view
7540
7790
 
7541
7791
  - func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
7542
7792
  variants: method
@@ -7549,6 +7799,7 @@
7549
7799
  MPS: set_storage_mps_
7550
7800
  QuantizedCPU, QuantizedCUDA: set_storage_quantized_
7551
7801
  autogen: set.source_Storage_storage_offset, set.source_Storage_storage_offset_out
7802
+ tags: inplace_view
7552
7803
 
7553
7804
  - func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
7554
7805
  variants: method
@@ -7556,6 +7807,7 @@
7556
7807
  device_guard: False
7557
7808
  dispatch:
7558
7809
  CompositeImplicitAutograd: set__symint
7810
+ tags: inplace_view
7559
7811
 
7560
7812
  - func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
7561
7813
  variants: method
@@ -7564,6 +7816,7 @@
7564
7816
  dispatch:
7565
7817
  CPU, CUDA, Meta, MPS: set_tensor_
7566
7818
  autogen: set.source_Tensor, set.source_Tensor_out
7819
+ tags: inplace_view
7567
7820
 
7568
7821
  - func: set_(Tensor(a!) self) -> Tensor(a!)
7569
7822
  variants: method
@@ -7573,6 +7826,7 @@
7573
7826
  Meta: set_meta_
7574
7827
  MPS: set_mps_
7575
7828
  autogen: set, set.out
7829
+ tags: inplace_view
7576
7830
 
7577
7831
  # Not making it CompositeImplicitAutograd because lift
7578
7832
  # should be a primitive w.r.t. functorch
@@ -7656,6 +7910,10 @@
7656
7910
  dispatch:
7657
7911
  CompositeExplicitAutograd: masked_scatter
7658
7912
 
7913
+ - func: masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor
7914
+ dispatch:
7915
+ CompositeExplicitAutograd: masked_scatter_backward_symint
7916
+
7659
7917
  - func: _masked_softmax(Tensor self, Tensor mask, int? dim=None, int? mask_type=None) -> Tensor
7660
7918
  dispatch:
7661
7919
  CUDA: masked_softmax_cuda
@@ -7938,6 +8196,8 @@
7938
8196
  - func: bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
7939
8197
  device_check: NoCheck # TensorIterator
7940
8198
  variants: method
8199
+ dispatch:
8200
+ CompositeExplicitAutograd: bitwise_and_
7941
8201
  tags: pointwise
7942
8202
 
7943
8203
  - func: bitwise_and_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
@@ -7982,6 +8242,8 @@
7982
8242
  - func: bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor
7983
8243
  device_check: NoCheck # TensorIterator
7984
8244
  variants: method, function
8245
+ dispatch:
8246
+ CompositeExplicitAutograd: bitwise_or
7985
8247
  tags: [core, pointwise]
7986
8248
 
7987
8249
  - func: bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
@@ -8001,6 +8263,8 @@
8001
8263
  - func: bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8002
8264
  device_check: NoCheck # TensorIterator
8003
8265
  variants: method
8266
+ dispatch:
8267
+ CompositeExplicitAutograd: bitwise_or_
8004
8268
  tags: pointwise
8005
8269
 
8006
8270
  - func: bitwise_or_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
@@ -8045,6 +8309,8 @@
8045
8309
  - func: bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor
8046
8310
  device_check: NoCheck # TensorIterator
8047
8311
  variants: method, function
8312
+ dispatch:
8313
+ CompositeExplicitAutograd: bitwise_xor
8048
8314
  tags: [core, pointwise]
8049
8315
 
8050
8316
  - func: bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
@@ -8064,6 +8330,8 @@
8064
8330
  - func: bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8065
8331
  device_check: NoCheck # TensorIterator
8066
8332
  variants: method
8333
+ dispatch:
8334
+ CompositeExplicitAutograd: bitwise_xor_
8067
8335
  tags: pointwise
8068
8336
 
8069
8337
  - func: bitwise_xor_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
@@ -8504,6 +8772,7 @@
8504
8772
  variants: method, function
8505
8773
  dispatch:
8506
8774
  QuantizedCPU: eq_quantized_cpu
8775
+ NestedTensorCPU, NestedTensorCUDA: eq_scalar_nested
8507
8776
  tags: [core, pointwise]
8508
8777
 
8509
8778
  - func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -8540,6 +8809,7 @@
8540
8809
  variants: method, function
8541
8810
  dispatch:
8542
8811
  QuantizedCPU: ge_quantized_cpu
8812
+ NestedTensorCPU, NestedTensorCUDA: ge_scalar_nested
8543
8813
  tags: [core, pointwise]
8544
8814
 
8545
8815
  - func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -8666,6 +8936,7 @@
8666
8936
  variants: method, function
8667
8937
  dispatch:
8668
8938
  QuantizedCPU: gt_quantized_cpu
8939
+ NestedTensorCPU, NestedTensorCUDA: gt_scalar_nested
8669
8940
  tags: [core, pointwise]
8670
8941
 
8671
8942
  - func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -9106,6 +9377,7 @@
9106
9377
  structured_inherits: TensorIteratorBase
9107
9378
  dispatch:
9108
9379
  CPU, CUDA: lgamma_out
9380
+ MPS: lgamma_out_mps
9109
9381
  tags: pointwise
9110
9382
 
9111
9383
  - func: lgamma_(Tensor(a!) self) -> Tensor(a!)
@@ -9126,6 +9398,7 @@
9126
9398
  structured_inherits: TensorIteratorBase
9127
9399
  dispatch:
9128
9400
  CPU, CUDA: digamma_out
9401
+ MPS: digamma_out_mps
9129
9402
  tags: pointwise
9130
9403
 
9131
9404
  - func: digamma(Tensor self) -> Tensor
@@ -9140,6 +9413,7 @@
9140
9413
  structured_inherits: TensorIteratorBase
9141
9414
  dispatch:
9142
9415
  CPU, CUDA: polygamma_out
9416
+ MPS: polygamma_out_mps
9143
9417
  tags: pointwise
9144
9418
 
9145
9419
  - func: polygamma(int n, Tensor self) -> Tensor
@@ -9263,7 +9537,7 @@
9263
9537
  dispatch:
9264
9538
  CPU, CUDA: atan2_out
9265
9539
  MPS: atan2_out_mps
9266
- tags: pointwise
9540
+ tags: [core, pointwise]
9267
9541
 
9268
9542
  - func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
9269
9543
  device_check: NoCheck # TensorIterator
@@ -9275,7 +9549,7 @@
9275
9549
  device_check: NoCheck # TensorIterator
9276
9550
  structured_delegate: atan2.out
9277
9551
  variants: method, function
9278
- tags: pointwise
9552
+ tags: [core, pointwise]
9279
9553
  # arctan2, alias of atan2
9280
9554
 
9281
9555
  - func: arctan2(Tensor self, Tensor other) -> Tensor
@@ -9464,7 +9738,7 @@
9464
9738
  structured: True
9465
9739
  structured_inherits: TensorIteratorBase
9466
9740
  dispatch:
9467
- CPU, CUDA: nextafter_out
9741
+ CPU, CUDA, MPS: nextafter_out
9468
9742
  tags: pointwise
9469
9743
 
9470
9744
  - func: nextafter(Tensor self, Tensor other) -> Tensor
@@ -9811,7 +10085,7 @@
9811
10085
  - func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor
9812
10086
  device_check: NoCheck # TensorIterator
9813
10087
  structured_delegate: pow.Scalar_out
9814
- tags: pointwise
10088
+ tags: [core, pointwise]
9815
10089
 
9816
10090
  - func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)
9817
10091
  device_check: NoCheck # TensorIterator
@@ -9954,12 +10228,14 @@
9954
10228
  variants: function
9955
10229
  dispatch:
9956
10230
  CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
10231
+ CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
9957
10232
  autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
9958
10233
 
9959
10234
  - func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
9960
10235
  variants: function
9961
10236
  dispatch:
9962
10237
  CUDA: _amp_update_scale_cuda_
10238
+ CPU: _amp_update_scale_cpu_
9963
10239
  autogen: _amp_update_scale, _amp_update_scale.out
9964
10240
 
9965
10241
  #- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
@@ -10020,6 +10296,21 @@
10020
10296
  CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
10021
10297
  autogen: _foreach_add.ScalarList_out
10022
10298
 
10299
+ - func: _foreach_add.Tensor(Tensor[] self, Tensor other, *, Scalar alpha=1) -> Tensor[]
10300
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10301
+ variants: function
10302
+ dispatch:
10303
+ CPU: foreach_tensor_add_tensor_kernel_slow
10304
+ CUDA: foreach_tensor_add_tensor_kernel_cuda
10305
+
10306
+ - func: _foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()
10307
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10308
+ variants: function
10309
+ dispatch:
10310
+ CPU: foreach_tensor_add_tensor_kernel_slow_
10311
+ CUDA: foreach_tensor_add_tensor_kernel_cuda_
10312
+ autogen: _foreach_add.Tensor_out
10313
+
10023
10314
  - func: _foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
10024
10315
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10025
10316
  variants: function
@@ -10170,6 +10461,21 @@
10170
10461
  CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
10171
10462
  autogen: _foreach_div.ScalarList_out
10172
10463
 
10464
+ - func: _foreach_div.Tensor(Tensor[] self, Tensor other) -> Tensor[]
10465
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10466
+ variants: function
10467
+ dispatch:
10468
+ CPU: foreach_tensor_div_tensor_kernel_slow
10469
+ CUDA: foreach_tensor_div_tensor_kernel_cuda
10470
+
10471
+ - func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
10472
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10473
+ variants: function
10474
+ dispatch:
10475
+ CPU: foreach_tensor_div_tensor_kernel_slow_
10476
+ CUDA: foreach_tensor_div_tensor_kernel_cuda_
10477
+ autogen: _foreach_div.Tensor_out
10478
+
10173
10479
  - func: _foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
10174
10480
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10175
10481
  variants: function
@@ -10990,37 +11296,44 @@
10990
11296
  dispatch:
10991
11297
  CPU: bucketize_cpu
10992
11298
  CUDA: bucketize_cuda
11299
+ MPS: bucketize_mps
10993
11300
 
10994
11301
  - func: bucketize.Tensor_out(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)
10995
11302
  dispatch:
10996
11303
  CPU: bucketize_out_cpu
10997
11304
  CUDA: bucketize_out_cuda
11305
+ MPS: bucketize_out_mps
10998
11306
 
10999
11307
  - func: bucketize.Scalar(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
11000
11308
  dispatch:
11001
11309
  CPU: bucketize_cpu
11002
11310
  CUDA: bucketize_cuda
11311
+ MPS: bucketize_mps
11003
11312
  autogen: bucketize.Scalar_out
11004
11313
 
11005
11314
  - func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
11006
11315
  dispatch:
11007
11316
  CPU: searchsorted_cpu
11008
11317
  CUDA: searchsorted_cuda
11318
+ MPS: searchsorted_mps
11009
11319
 
11010
11320
  - func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
11011
11321
  dispatch:
11012
11322
  CPU: searchsorted_out_cpu
11013
11323
  CUDA: searchsorted_out_cuda
11324
+ MPS: searchsorted_out_mps
11014
11325
 
11015
11326
  - func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
11016
11327
  dispatch:
11017
11328
  CPU: searchsorted_cpu
11018
11329
  CUDA: searchsorted_cuda
11330
+ MPS: searchsorted_mps
11019
11331
 
11020
11332
  - func: searchsorted.Scalar_out(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
11021
11333
  dispatch:
11022
11334
  CPU: searchsorted_out_cpu
11023
11335
  CUDA: searchsorted_out_cuda
11336
+ MPS: searchsorted_out_mps
11024
11337
 
11025
11338
  - func: _convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor
11026
11339
  structured_delegate: _convert_indices_from_coo_to_csr.out
@@ -11568,6 +11881,7 @@
11568
11881
  python_module: nn
11569
11882
  dispatch:
11570
11883
  CPU, CUDA: softshrink_out
11884
+ MPS: softshrink_out_mps
11571
11885
 
11572
11886
  - func: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
11573
11887
  structured_delegate: softshrink.out
@@ -11580,6 +11894,7 @@
11580
11894
  python_module: nn
11581
11895
  dispatch:
11582
11896
  CPU, CUDA: softshrink_backward_out
11897
+ MPS: softshrink_backward_out_mps
11583
11898
 
11584
11899
  - func: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
11585
11900
  structured_delegate: softshrink_backward.grad_input
@@ -12144,6 +12459,7 @@
12144
12459
  dispatch:
12145
12460
  CPU: upsample_linear1d_out_cpu
12146
12461
  CUDA: upsample_linear1d_out_cuda
12462
+ MPS: upsample_linear1d_out_mps
12147
12463
 
12148
12464
  - func: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
12149
12465
  python_module: nn
@@ -12155,6 +12471,7 @@
12155
12471
  dispatch:
12156
12472
  CPU: upsample_linear1d_backward_out_cpu
12157
12473
  CUDA: upsample_linear1d_backward_out_cuda
12474
+ MPS: upsample_linear1d_backward_out_mps
12158
12475
 
12159
12476
  - func: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
12160
12477
  python_module: nn
@@ -12482,101 +12799,101 @@
12482
12799
  # make the operational distinction clear.
12483
12800
  tags: pointwise
12484
12801
 
12485
- - func: slow_conv_transpose2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, int[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
12802
+ - func: slow_conv_transpose2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
12486
12803
  python_module: nn
12487
12804
  structured: True
12488
12805
  dispatch:
12489
12806
  CPU: slow_conv_transpose2d_structured_cpu
12490
12807
  CUDA: slow_conv_transpose2d_structured_cuda
12491
12808
 
12492
- - func: slow_conv_transpose2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, int[2] dilation=1) -> Tensor
12809
+ - func: slow_conv_transpose2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1) -> Tensor
12493
12810
  python_module: nn
12494
12811
  structured_delegate: slow_conv_transpose2d.out
12495
12812
 
12496
- - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
12813
+ - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
12497
12814
  python_module: nn
12498
12815
  dispatch:
12499
12816
  CPU: slow_conv_transpose3d_out_cpu
12500
12817
  CUDA: slow_conv_transpose3d_out_cuda
12501
12818
 
12502
- - func: slow_conv_transpose3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, int[3] dilation=1) -> Tensor
12819
+ - func: slow_conv_transpose3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1) -> Tensor
12503
12820
  python_module: nn
12504
12821
  dispatch:
12505
12822
  CPU: slow_conv_transpose3d_cpu
12506
12823
  CUDA: slow_conv_transpose3d_cuda
12507
12824
 
12508
- - func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
12825
+ - func: thnn_conv2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
12509
12826
  python_module: nn
12510
12827
 
12511
- - func: thnn_conv2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0) -> Tensor
12828
+ - func: thnn_conv2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0) -> Tensor
12512
12829
  python_module: nn
12513
12830
 
12514
- - func: _slow_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output) -> Tensor(a!)
12831
+ - func: _slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!)
12515
12832
  python_module: nn
12516
12833
  dispatch:
12517
12834
  CPU: slow_conv2d_forward_out_cpu
12518
12835
  CUDA: slow_conv2d_forward_out_cuda
12519
12836
 
12520
- - func: _slow_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> Tensor
12837
+ - func: _slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor
12521
12838
  python_module: nn
12522
12839
  dispatch:
12523
12840
  CPU: slow_conv2d_forward_cpu
12524
12841
  CUDA: slow_conv2d_forward_cuda
12525
12842
 
12526
- - func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
12843
+ - func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
12527
12844
  python_module: nn
12528
12845
  dispatch:
12529
12846
  CPU: slow_conv2d_backward_out_cpu
12530
12847
  CUDA: slow_conv2d_backward_out_cuda
12531
12848
 
12532
- - func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
12849
+ - func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
12533
12850
  python_module: nn
12534
12851
  dispatch:
12535
12852
  CPU: slow_conv2d_backward_cpu
12536
12853
  CUDA: slow_conv2d_backward_cuda
12537
12854
  autogen: _slow_conv2d_backward.output_mask_out
12538
12855
 
12539
- - func: _conv_depthwise2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, SymInt[2] padding, int[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
12856
+ - func: _conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
12540
12857
  use_const_ref_for_mutable_tensors: True
12541
12858
  python_module: nn
12542
12859
  dispatch:
12543
12860
  CUDA: conv_depthwise2d_cuda_out
12544
12861
 
12545
- - func: _conv_depthwise2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, SymInt[2] padding, int[2] dilation) -> Tensor
12862
+ - func: _conv_depthwise2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation) -> Tensor
12546
12863
  python_module: nn
12547
12864
  dispatch:
12548
12865
  CUDA: conv_depthwise2d_cuda
12549
12866
 
12550
- - func: conv_depthwise3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, SymInt[3] padding, int[3] dilation) -> Tensor
12867
+ - func: conv_depthwise3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, SymInt[3] dilation) -> Tensor
12551
12868
  python_module: nn
12552
12869
  dispatch:
12553
12870
  CUDA: conv_depthwise3d_cuda
12554
12871
  autogen: conv_depthwise3d.out
12555
12872
 
12556
- - func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
12873
+ - func: slow_conv3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
12557
12874
  python_module: nn
12558
12875
 
12559
- - func: slow_conv3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0) -> Tensor
12876
+ - func: slow_conv3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0) -> Tensor
12560
12877
  python_module: nn
12561
12878
 
12562
- - func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)
12879
+ - func: slow_conv3d_forward.output(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)
12563
12880
  python_module: nn
12564
12881
  dispatch:
12565
12882
  CPU: slow_conv3d_forward_out_cpu
12566
12883
 
12567
- - func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, SymInt[3] padding) -> Tensor
12884
+ - func: slow_conv3d_forward(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding) -> Tensor
12568
12885
  python_module: nn
12569
12886
  dispatch:
12570
12887
  CPU: slow_conv3d_forward_cpu
12571
12888
 
12572
- - func: slow_conv_dilated2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, int[2] dilation=1) -> Tensor
12889
+ - func: slow_conv_dilated2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1) -> Tensor
12573
12890
  python_module: nn
12574
12891
  dispatch:
12575
12892
  CPU: slow_conv_dilated2d_cpu
12576
12893
  CUDA: slow_conv_dilated2d_cuda
12577
12894
  autogen: slow_conv_dilated2d.out
12578
12895
 
12579
- - func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, int[3] dilation=1) -> Tensor
12896
+ - func: slow_conv_dilated3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1) -> Tensor
12580
12897
  python_module: nn
12581
12898
  dispatch:
12582
12899
  CPU: slow_conv_dilated3d_cpu
@@ -12627,7 +12944,7 @@
12627
12944
  SparseMeta: isinf_sparse_meta
12628
12945
  SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
12629
12946
  autogen: isinf.out
12630
- tags: core
12947
+ tags: [core, pointwise]
12631
12948
 
12632
12949
  - func: record_stream(Tensor(a!) self, Stream s) -> ()
12633
12950
  variants: method
@@ -13553,11 +13870,18 @@
13553
13870
  dispatch:
13554
13871
  CPU, CUDA: linalg_eig_out
13555
13872
 
13873
+ - func: _linalg_eigvals(Tensor self) -> Tensor
13874
+ python_module: linalg
13875
+ dispatch:
13876
+ CPU, CUDA: _linalg_eigvals
13877
+
13556
13878
  - func: linalg_eigvals(Tensor self) -> Tensor
13557
13879
  python_module: linalg
13558
13880
 
13559
13881
  - func: linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
13560
13882
  python_module: linalg
13883
+ dispatch:
13884
+ CPU, CUDA: linalg_eigvals_out
13561
13885
 
13562
13886
  # This function is exposes the `compute_v` flag, which is then used to implement `linalg.eigh` and
13563
13887
  # `linalg.eigvalsh` as composite functions that call this one
@@ -13861,6 +14185,12 @@
13861
14185
  # It is undocumented and should not be used outside of tests.
13862
14186
  - func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
13863
14187
 
14188
+ # Note: for testing COW materialization within `at::parallel_for` loop function
14189
+ - func: _test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor
14190
+ variants: function
14191
+ dispatch:
14192
+ CompositeExplicitAutograd: _test_parallel_materialize
14193
+
13864
14194
  # Note: this function is only for testing.
13865
14195
  - func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
13866
14196
  python_module: nn
@@ -14195,6 +14525,7 @@
14195
14525
  variants: function
14196
14526
  dispatch:
14197
14527
  CompositeExplicitAutograd: split_with_sizes_copy_out
14528
+ CUDA: split_with_sizes_copy_out_cuda
14198
14529
 
14199
14530
  - func: view_copy(Tensor self, SymInt[] size) -> Tensor
14200
14531
  variants: function
@@ -14269,19 +14600,29 @@
14269
14600
  variants: function
14270
14601
  tags: nondeterministic_seeded
14271
14602
 
14272
- - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor ouput, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14603
+ - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14273
14604
  dispatch:
14274
- CPU: _scaled_dot_product_flash_attention_cpu
14275
14605
  CUDA: _scaled_dot_product_flash_attention_cuda
14276
14606
  NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
14277
14607
  tags: nondeterministic_seeded
14278
14608
 
14279
- - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14609
+ - func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
14610
+ dispatch:
14611
+ CPU: _scaled_dot_product_flash_attention_cpu
14612
+ tags: nondeterministic_seeded
14613
+
14614
+ - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14280
14615
  device_check: NoCheck
14281
14616
  variants: function
14282
14617
  dispatch:
14283
- CPU: _scaled_dot_product_flash_attention_backward_cpu
14284
14618
  CUDA: _scaled_dot_product_flash_attention_backward_cuda
14619
+ NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
14620
+
14621
+ - func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14622
+ device_check: NoCheck
14623
+ variants: function
14624
+ dispatch:
14625
+ CPU: _scaled_dot_product_flash_attention_cpu_backward
14285
14626
 
14286
14627
  - func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
14287
14628
  dispatch:
@@ -14295,26 +14636,31 @@
14295
14636
  CUDA: _scaled_dot_product_efficient_attention_backward_cuda
14296
14637
  tags: nondeterministic_seeded
14297
14638
 
14298
- - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14639
+ - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
14640
+ dispatch:
14641
+ CUDA: _scaled_dot_product_cudnn_attention_cuda
14642
+ tags: nondeterministic_seeded
14643
+
14644
+ - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14299
14645
  variants: function
14300
14646
  dispatch:
14301
14647
  CUDA: _flash_attention_forward
14302
14648
  tags: nondeterministic_seeded
14303
14649
 
14304
- - func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14650
+ - func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14305
14651
  device_check: NoCheck
14306
14652
  variants: function
14307
14653
  dispatch:
14308
14654
  CUDA: _flash_attention_backward
14309
14655
 
14310
- # Returns ouput, logsumexp if compute_logsumexp
14311
- - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
14656
+ # Returns output, logsumexp if compute_logsumexp
14657
+ - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, int? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
14312
14658
  variants: function
14313
14659
  dispatch:
14314
14660
  CUDA: _efficient_attention_forward
14315
14661
  tags: nondeterministic_seeded
14316
14662
 
14317
- - func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int max_seqlen_k, int max_seqlen_q, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
14663
+ - func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
14318
14664
  device_check: NoCheck
14319
14665
  variants: function
14320
14666
  dispatch:
@@ -14422,12 +14768,16 @@
14422
14768
  tags: pointwise
14423
14769
 
14424
14770
  - func: special_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
14771
+ dispatch:
14772
+ CompositeExplicitAutograd: special_chebyshev_polynomial_t
14425
14773
  device_check: NoCheck
14426
14774
  python_module: special
14427
14775
  variants: function
14428
14776
  tags: pointwise
14429
14777
 
14430
14778
  - func: special_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
14779
+ dispatch:
14780
+ CompositeExplicitAutograd: special_chebyshev_polynomial_t
14431
14781
  device_check: NoCheck
14432
14782
  python_module: special
14433
14783
  variants: function
@@ -14444,6 +14794,8 @@
14444
14794
  tags: pointwise
14445
14795
 
14446
14796
  - func: special_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
14797
+ dispatch:
14798
+ CompositeExplicitAutograd: special_chebyshev_polynomial_t_out
14447
14799
  device_check: NoCheck
14448
14800
  python_module: special
14449
14801
  variants: function
@@ -14465,12 +14817,16 @@
14465
14817
  tags: pointwise
14466
14818
 
14467
14819
  - func: special_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
14820
+ dispatch:
14821
+ CompositeExplicitAutograd: special_chebyshev_polynomial_u
14468
14822
  device_check: NoCheck
14469
14823
  python_module: special
14470
14824
  variants: function
14471
14825
  tags: pointwise
14472
14826
 
14473
14827
  - func: special_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
14828
+ dispatch:
14829
+ CompositeExplicitAutograd: special_chebyshev_polynomial_u
14474
14830
  device_check: NoCheck
14475
14831
  python_module: special
14476
14832
  variants: function
@@ -14487,6 +14843,8 @@
14487
14843
  tags: pointwise
14488
14844
 
14489
14845
  - func: special_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
14846
+ dispatch:
14847
+ CompositeExplicitAutograd: special_chebyshev_polynomial_u_out
14490
14848
  device_check: NoCheck
14491
14849
  python_module: special
14492
14850
  variants: function
@@ -14508,12 +14866,16 @@
14508
14866
  tags: pointwise
14509
14867
 
14510
14868
  - func: special_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
14869
+ dispatch:
14870
+ CompositeExplicitAutograd: special_chebyshev_polynomial_v
14511
14871
  device_check: NoCheck
14512
14872
  python_module: special
14513
14873
  variants: function
14514
14874
  tags: pointwise
14515
14875
 
14516
14876
  - func: special_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
14877
+ dispatch:
14878
+ CompositeExplicitAutograd: special_chebyshev_polynomial_v
14517
14879
  device_check: NoCheck
14518
14880
  python_module: special
14519
14881
  variants: function
@@ -14530,6 +14892,8 @@
14530
14892
  tags: pointwise
14531
14893
 
14532
14894
  - func: special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
14895
+ dispatch:
14896
+ CompositeExplicitAutograd: special_chebyshev_polynomial_v_out
14533
14897
  device_check: NoCheck
14534
14898
  python_module: special
14535
14899
  variants: function
@@ -14551,12 +14915,16 @@
14551
14915
  tags: pointwise
14552
14916
 
14553
14917
  - func: special_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
14918
+ dispatch:
14919
+ CompositeExplicitAutograd: special_chebyshev_polynomial_w
14554
14920
  device_check: NoCheck
14555
14921
  python_module: special
14556
14922
  variants: function
14557
14923
  tags: pointwise
14558
14924
 
14559
14925
  - func: special_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
14926
+ dispatch:
14927
+ CompositeExplicitAutograd: special_chebyshev_polynomial_w
14560
14928
  device_check: NoCheck
14561
14929
  python_module: special
14562
14930
  variants: function
@@ -14573,6 +14941,8 @@
14573
14941
  tags: pointwise
14574
14942
 
14575
14943
  - func: special_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
14944
+ dispatch:
14945
+ CompositeExplicitAutograd: special_chebyshev_polynomial_w_out
14576
14946
  device_check: NoCheck
14577
14947
  python_module: special
14578
14948
  variants: function
@@ -14594,12 +14964,16 @@
14594
14964
  tags: pointwise
14595
14965
 
14596
14966
  - func: special_hermite_polynomial_h.x_scalar(Scalar x, Tensor n) -> Tensor
14967
+ dispatch:
14968
+ CompositeExplicitAutograd: special_hermite_polynomial_h
14597
14969
  device_check: NoCheck
14598
14970
  python_module: special
14599
14971
  variants: function
14600
14972
  tags: pointwise
14601
14973
 
14602
14974
  - func: special_hermite_polynomial_h.n_scalar(Tensor x, Scalar n) -> Tensor
14975
+ dispatch:
14976
+ CompositeExplicitAutograd: special_hermite_polynomial_h
14603
14977
  device_check: NoCheck
14604
14978
  python_module: special
14605
14979
  variants: function
@@ -14616,6 +14990,8 @@
14616
14990
  tags: pointwise
14617
14991
 
14618
14992
  - func: special_hermite_polynomial_h.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
14993
+ dispatch:
14994
+ CompositeExplicitAutograd: special_hermite_polynomial_h_out
14619
14995
  device_check: NoCheck
14620
14996
  python_module: special
14621
14997
  variants: function
@@ -14637,12 +15013,16 @@
14637
15013
  tags: pointwise
14638
15014
 
14639
15015
  - func: special_hermite_polynomial_he.x_scalar(Scalar x, Tensor n) -> Tensor
15016
+ dispatch:
15017
+ CompositeExplicitAutograd: special_hermite_polynomial_he
14640
15018
  device_check: NoCheck
14641
15019
  python_module: special
14642
15020
  variants: function
14643
15021
  tags: pointwise
14644
15022
 
14645
15023
  - func: special_hermite_polynomial_he.n_scalar(Tensor x, Scalar n) -> Tensor
15024
+ dispatch:
15025
+ CompositeExplicitAutograd: special_hermite_polynomial_he
14646
15026
  device_check: NoCheck
14647
15027
  python_module: special
14648
15028
  variants: function
@@ -14659,6 +15039,8 @@
14659
15039
  tags: pointwise
14660
15040
 
14661
15041
  - func: special_hermite_polynomial_he.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15042
+ dispatch:
15043
+ CompositeExplicitAutograd: special_hermite_polynomial_he_out
14662
15044
  device_check: NoCheck
14663
15045
  python_module: special
14664
15046
  variants: function
@@ -14680,12 +15062,16 @@
14680
15062
  tags: pointwise
14681
15063
 
14682
15064
  - func: special_laguerre_polynomial_l.x_scalar(Scalar x, Tensor n) -> Tensor
15065
+ dispatch:
15066
+ CompositeExplicitAutograd: special_laguerre_polynomial_l
14683
15067
  device_check: NoCheck
14684
15068
  python_module: special
14685
15069
  variants: function
14686
15070
  tags: pointwise
14687
15071
 
14688
15072
  - func: special_laguerre_polynomial_l.n_scalar(Tensor x, Scalar n) -> Tensor
15073
+ dispatch:
15074
+ CompositeExplicitAutograd: special_laguerre_polynomial_l
14689
15075
  device_check: NoCheck
14690
15076
  python_module: special
14691
15077
  variants: function
@@ -14702,6 +15088,8 @@
14702
15088
  tags: pointwise
14703
15089
 
14704
15090
  - func: special_laguerre_polynomial_l.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15091
+ dispatch:
15092
+ CompositeExplicitAutograd: special_laguerre_polynomial_l_out
14705
15093
  device_check: NoCheck
14706
15094
  python_module: special
14707
15095
  variants: function
@@ -14723,12 +15111,16 @@
14723
15111
  tags: pointwise
14724
15112
 
14725
15113
  - func: special_legendre_polynomial_p.x_scalar(Scalar x, Tensor n) -> Tensor
15114
+ dispatch:
15115
+ CompositeExplicitAutograd: special_legendre_polynomial_p
14726
15116
  device_check: NoCheck
14727
15117
  python_module: special
14728
15118
  variants: function
14729
15119
  tags: pointwise
14730
15120
 
14731
15121
  - func: special_legendre_polynomial_p.n_scalar(Tensor x, Scalar n) -> Tensor
15122
+ dispatch:
15123
+ CompositeExplicitAutograd: special_legendre_polynomial_p
14732
15124
  device_check: NoCheck
14733
15125
  python_module: special
14734
15126
  variants: function
@@ -14745,6 +15137,8 @@
14745
15137
  tags: pointwise
14746
15138
 
14747
15139
  - func: special_legendre_polynomial_p.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15140
+ dispatch:
15141
+ CompositeExplicitAutograd: special_legendre_polynomial_p_out
14748
15142
  device_check: NoCheck
14749
15143
  python_module: special
14750
15144
  variants: function
@@ -14856,12 +15250,16 @@
14856
15250
  tags: pointwise
14857
15251
 
14858
15252
  - func: special_shifted_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
15253
+ dispatch:
15254
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
14859
15255
  device_check: NoCheck
14860
15256
  python_module: special
14861
15257
  variants: function
14862
15258
  tags: pointwise
14863
15259
 
14864
15260
  - func: special_shifted_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
15261
+ dispatch:
15262
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
14865
15263
  device_check: NoCheck
14866
15264
  python_module: special
14867
15265
  variants: function
@@ -14878,6 +15276,8 @@
14878
15276
  tags: pointwise
14879
15277
 
14880
15278
  - func: special_shifted_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15279
+ dispatch:
15280
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t_out
14881
15281
  device_check: NoCheck
14882
15282
  python_module: special
14883
15283
  variants: function
@@ -14899,12 +15299,16 @@
14899
15299
  tags: pointwise
14900
15300
 
14901
15301
  - func: special_shifted_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
15302
+ dispatch:
15303
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
14902
15304
  device_check: NoCheck
14903
15305
  python_module: special
14904
15306
  variants: function
14905
15307
  tags: pointwise
14906
15308
 
14907
15309
  - func: special_shifted_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
15310
+ dispatch:
15311
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
14908
15312
  device_check: NoCheck
14909
15313
  python_module: special
14910
15314
  variants: function
@@ -14921,6 +15325,8 @@
14921
15325
  tags: pointwise
14922
15326
 
14923
15327
  - func: special_shifted_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15328
+ dispatch:
15329
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u_out
14924
15330
  device_check: NoCheck
14925
15331
  python_module: special
14926
15332
  variants: function
@@ -14942,12 +15348,16 @@
14942
15348
  tags: pointwise
14943
15349
 
14944
15350
  - func: special_shifted_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
15351
+ dispatch:
15352
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
14945
15353
  device_check: NoCheck
14946
15354
  python_module: special
14947
15355
  variants: function
14948
15356
  tags: pointwise
14949
15357
 
14950
15358
  - func: special_shifted_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
15359
+ dispatch:
15360
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
14951
15361
  device_check: NoCheck
14952
15362
  python_module: special
14953
15363
  variants: function
@@ -14964,6 +15374,8 @@
14964
15374
  tags: pointwise
14965
15375
 
14966
15376
  - func: special_shifted_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15377
+ dispatch:
15378
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v_out
14967
15379
  device_check: NoCheck
14968
15380
  python_module: special
14969
15381
  variants: function
@@ -14985,12 +15397,16 @@
14985
15397
  tags: pointwise
14986
15398
 
14987
15399
  - func: special_shifted_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
15400
+ dispatch:
15401
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
14988
15402
  device_check: NoCheck
14989
15403
  python_module: special
14990
15404
  variants: function
14991
15405
  tags: pointwise
14992
15406
 
14993
15407
  - func: special_shifted_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
15408
+ dispatch:
15409
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
14994
15410
  device_check: NoCheck
14995
15411
  python_module: special
14996
15412
  variants: function
@@ -15007,6 +15423,8 @@
15007
15423
  tags: pointwise
15008
15424
 
15009
15425
  - func: special_shifted_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15426
+ dispatch:
15427
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w_out
15010
15428
  device_check: NoCheck
15011
15429
  python_module: special
15012
15430
  variants: function
@@ -15075,6 +15493,22 @@
15075
15493
  CUDA: _fused_adamw_kernel_cuda_
15076
15494
  autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
15077
15495
 
15496
+ - func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
15497
+ # Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
15498
+ variants: function
15499
+ dispatch:
15500
+ CUDA: _fused_sgd_kernel_cuda_
15501
+ autogen: _fused_sgd, _fused_sgd.out
15502
+
15503
+ - func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
15504
+ # Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
15505
+ # but still skip the device check as the Tensor LR can be on CPU
15506
+ device_check: NoCheck
15507
+ variants: function
15508
+ dispatch:
15509
+ CUDA: _fused_sgd_kernel_cuda_
15510
+ autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
15511
+
15078
15512
  # This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
15079
15513
  - func: _propagate_xla_data(Tensor input, Tensor output) -> ()
15080
15514
  variants: function