torch-rb 0.14.1 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -134,7 +134,7 @@
134
134
  autogen: _new_zeros_with_same_feature_meta.out
135
135
 
136
136
  # This function compares the storage numel of self with that of other, where
137
- # storage numel is cumputed as: `other.storage().nbytes() / other.itemsize()`.
137
+ # storage numel is computed as: `other.storage().nbytes() / other.itemsize()`.
138
138
  # We create this function for composite compliance purposes. The batching rule
139
139
  # always returns true because vmapped as_strided does not support accessing
140
140
  # storage locations not indexable by the input tensor.
@@ -175,17 +175,29 @@
175
175
  CPU: _assert_async_msg_cpu
176
176
  CUDA: _assert_async_msg_cuda
177
177
 
178
+ - func: _assert_scalar(Scalar self, str assert_msg) -> ()
179
+ dispatch:
180
+ CompositeExplicitAutograd: _assert_scalar
181
+
182
+ - func: _functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor
183
+ dispatch:
184
+ CompositeExplicitAutograd: _functional_assert_scalar
185
+
178
186
  - func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
179
187
  dispatch:
180
188
  CPU: _functional_assert_async_msg_cpu
181
189
 
182
190
  - func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
183
191
 
192
+ - func: _print(str s) -> ()
193
+ dispatch:
194
+ CompositeExplicitAutograd: _print
195
+
184
196
  - func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
185
197
  dispatch:
186
198
  CompositeExplicitAutograd: sym_constrain_range
187
199
 
188
- - func: sym_constrain_range_for_size(Scalar size, *, int? min, int? max) -> ()
200
+ - func: sym_constrain_range_for_size(Scalar size, *, int? min=None, int? max=None) -> ()
189
201
  dispatch:
190
202
  CompositeExplicitAutograd: sym_constrain_range_for_size
191
203
 
@@ -431,6 +443,7 @@
431
443
  structured_inherits: TensorIteratorBase
432
444
  dispatch:
433
445
  CPU, CUDA: sgn_out
446
+ MPS: sgn_out_mps
434
447
  SparseCPU, SparseCUDA: sgn_sparse_out
435
448
  SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
436
449
  tags: pointwise
@@ -469,6 +482,7 @@
469
482
  - func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
470
483
  dispatch:
471
484
  CPU, CUDA: conj_physical_out
485
+ MPS: conj_physical_out_mps
472
486
  SparseCPU, SparseCUDA: conj_physical_out_sparse
473
487
  SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
474
488
  tags: pointwise
@@ -563,8 +577,8 @@
563
577
  dispatch:
564
578
  SparseCPU: add_out_sparse_cpu
565
579
  SparseCUDA: add_out_sparse_cuda
566
- SparseCsrCPU: add_out_sparse_csr_cpu
567
- SparseCsrCUDA: add_out_sparse_csr_cuda
580
+ SparseCsrCPU: add_out_sparse_compressed_cpu
581
+ SparseCsrCUDA: add_out_sparse_compressed_cuda
568
582
  MkldnnCPU: mkldnn_add_out
569
583
  MPS: add_out_mps
570
584
  tags: pointwise
@@ -681,15 +695,29 @@
681
695
  structured_delegate: all.out
682
696
  variants: function, method
683
697
 
698
+ - func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
699
+ device_check: NoCheck # TensorIterator
700
+ structured_delegate: all.dims_out
701
+ variants: function, method
702
+ cpp_no_default_args: ['dim']
703
+ dispatch:
704
+ CompositeExplicitAutograd: all_dims_default
705
+
684
706
  - func: all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
685
707
  device_check: NoCheck # TensorIterator
686
708
  structured: True
687
- precomputed:
688
- - dim -> int dim
689
709
  dispatch:
690
710
  CPU, CUDA: all_out
691
711
  MPS: all_out_mps
692
712
 
713
+ - func: all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
714
+ device_check: NoCheck # TensorIterator
715
+ structured: True
716
+ dispatch:
717
+ CPU, CUDA: all_dims_out
718
+ CompositeExplicitAutograd: all_dims_out_default
719
+ cpp_no_default_args: ['dim']
720
+
693
721
  - func: all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
694
722
  device_check: NoCheck # TensorIterator
695
723
  variants: function, method
@@ -709,15 +737,30 @@
709
737
  variants: function, method
710
738
  tags: core
711
739
 
740
+ - func: any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
741
+ device_check: NoCheck # TensorIterator
742
+ structured_delegate: any.dims_out
743
+ variants: function, method
744
+ cpp_no_default_args: ['dim']
745
+ tags: core
746
+ dispatch:
747
+ CompositeExplicitAutograd: any_dims_default
748
+
712
749
  - func: any.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
713
750
  device_check: NoCheck # TensorIterator
714
751
  structured: True
715
- precomputed:
716
- - dim -> int dim
717
752
  dispatch:
718
753
  CPU, CUDA: any_out
719
754
  MPS: any_out_mps
720
755
 
756
+ - func: any.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
757
+ device_check: NoCheck # TensorIterator
758
+ structured: True
759
+ dispatch:
760
+ CPU, CUDA: any_dims_out
761
+ CompositeExplicitAutograd: any_dims_out_default
762
+ cpp_no_default_args: ['dim']
763
+
721
764
  - func: any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
722
765
  device_check: NoCheck # TensorIterator
723
766
  variants: function, method
@@ -733,7 +776,7 @@
733
776
  dispatch:
734
777
  CompositeExplicitAutograd: arange
735
778
 
736
- # This operator should be named `aragne.start_out` if following the naming convention. However that
779
+ # This operator should be named `arange.start_out` if following the naming convention. However that
737
780
  # name is already taken. Disabled because of CI job failures.
738
781
  # FIXME: enable this
739
782
  #- func: arange.start_out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)
@@ -1190,6 +1233,13 @@
1190
1233
  CompositeExplicitAutograd: copysign_out
1191
1234
  tags: pointwise
1192
1235
 
1236
+ - func: _lazy_clone(Tensor self) -> Tensor
1237
+ # Like clone, but the copy takes place lazily, only if either the
1238
+ # input or the output are written.
1239
+ variants: function, method
1240
+ dispatch:
1241
+ CompositeExplicitAutograd: _lazy_clone
1242
+
1193
1243
  - func: logical_not(Tensor self) -> Tensor
1194
1244
  device_check: NoCheck # TensorIterator
1195
1245
  variants: function, method
@@ -1326,6 +1376,7 @@
1326
1376
  dispatch:
1327
1377
  SparseCPU, SparseCUDA: cat_sparse
1328
1378
  QuantizedCPU: cat_quantized_cpu
1379
+ NestedTensorCPU, NestedTensorCUDA: cat_nested
1329
1380
  tags: core
1330
1381
 
1331
1382
  - func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
@@ -1590,6 +1641,7 @@
1590
1641
  - func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
1591
1642
  dispatch:
1592
1643
  CPU, CUDA: complex_out
1644
+ MPS: complex_out_mps
1593
1645
 
1594
1646
  - func: polar(Tensor abs, Tensor angle) -> Tensor
1595
1647
  variants: function
@@ -1613,59 +1665,67 @@
1613
1665
  variants: method
1614
1666
  manual_cpp_binding: True
1615
1667
 
1616
- - func: convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups) -> Tensor
1668
+ - func: convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
1617
1669
  dispatch:
1618
1670
  CompositeExplicitAutograd: convolution
1619
1671
  autogen: convolution.out
1620
1672
  tags: core
1621
1673
 
1622
- - func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1674
+ - func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1623
1675
  dispatch:
1624
1676
  CompositeExplicitAutograd, CUDA: convolution_backward
1625
1677
  autogen: convolution_backward.out
1626
1678
  tags: core
1627
1679
 
1628
- - func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
1680
+ - func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
1629
1681
  dispatch:
1630
1682
  CompositeExplicitAutograd: convolution_overrideable
1631
1683
  autogen: convolution_overrideable.out
1632
1684
 
1633
- - func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
1685
+ - func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
1634
1686
  dispatch:
1635
1687
  CompositeExplicitAutograd: convolution_backward_overrideable
1636
1688
  autogen: convolution_backward_overrideable.out
1637
1689
 
1638
- - func: _convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
1690
+ - func: _convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
1639
1691
  dispatch:
1640
1692
  CompositeExplicitAutograd: _convolution
1641
1693
  autogen: _convolution.out
1642
1694
 
1643
- - func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
1695
+ - func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, int[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
1644
1696
 
1645
- - func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[] dilation, int groups) -> Tensor
1697
+ - func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, str padding, SymInt[] dilation, SymInt groups) -> Tensor
1698
+ dispatch:
1699
+ CompositeImplicitAutograd: _convolution_mode_symint
1646
1700
 
1647
- - func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1701
+ - func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1648
1702
 
1649
- - func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, SymInt[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor
1703
+ - func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] dilation=1, SymInt groups=1) -> Tensor
1650
1704
  dispatch:
1651
1705
  CompositeImplicitAutograd: conv1d_symint
1652
1706
 
1653
- - func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor
1707
+ - func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, SymInt groups=1) -> Tensor
1654
1708
  dispatch:
1655
1709
  CompositeImplicitAutograd: conv2d_symint
1656
1710
 
1657
- - func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor
1711
+ - func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
1658
1712
  dispatch:
1659
1713
  CompositeImplicitAutograd: conv3d_symint
1660
1714
 
1661
- - func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, str padding="valid", int[1] dilation=1, int groups=1) -> Tensor
1715
+ - func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, str padding="valid", SymInt[1] dilation=1, SymInt groups=1) -> Tensor
1662
1716
  cpp_no_default_args: ['bias', 'stride', 'padding']
1717
+ dispatch:
1718
+ CompositeImplicitAutograd: conv1d_padding_symint
1663
1719
 
1664
- - func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, str padding="valid", int[2] dilation=1, int groups=1) -> Tensor
1720
+ - func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, str padding="valid", SymInt[2] dilation=1, SymInt groups=1) -> Tensor
1665
1721
  cpp_no_default_args: ['bias', 'stride', 'padding']
1722
+ dispatch:
1723
+ CompositeImplicitAutograd: conv2d_padding_symint
1666
1724
 
1667
- - func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, str padding="valid", int[3] dilation=1, int groups=1) -> Tensor
1725
+ - func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, str padding="valid", SymInt[3] dilation=1, SymInt groups=1) -> Tensor
1668
1726
  cpp_no_default_args: ['bias', 'stride', 'padding']
1727
+ dispatch:
1728
+ CompositeImplicitAutograd: conv3d_padding_symint
1669
1729
 
1670
1730
  - func: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
1671
1731
  dispatch:
@@ -1675,15 +1735,15 @@
1675
1735
  - func: conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)
1676
1736
 
1677
1737
  # NB: we inherit the goofy argument order from PyTorch torch.nn.functional
1678
- - func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, int groups=1, int[1] dilation=1) -> Tensor
1738
+ - func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, SymInt groups=1, SymInt[1] dilation=1) -> Tensor
1679
1739
  dispatch:
1680
1740
  CompositeImplicitAutograd: conv_transpose1d_symint
1681
1741
 
1682
- - func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, int groups=1, int[2] dilation=1) -> Tensor
1742
+ - func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt groups=1, SymInt[2] dilation=1) -> Tensor
1683
1743
  dispatch:
1684
1744
  CompositeImplicitAutograd: conv_transpose2d_symint
1685
1745
 
1686
- - func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor
1746
+ - func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt groups=1, SymInt[3] dilation=1) -> Tensor
1687
1747
  dispatch:
1688
1748
  CompositeImplicitAutograd: conv_transpose3d_symint
1689
1749
 
@@ -1691,6 +1751,7 @@
1691
1751
  variants: function
1692
1752
  dispatch:
1693
1753
  CompositeExplicitAutogradNonFunctional: copy
1754
+ tags: core
1694
1755
 
1695
1756
  - func: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
1696
1757
  variants: method
@@ -1720,6 +1781,8 @@
1720
1781
  device_check: NoCheck # TensorIterator
1721
1782
  variants: function, method
1722
1783
  structured_delegate: cos.out
1784
+ dispatch:
1785
+ NestedTensorCPU, NestedTensorCUDA: cos_nested
1723
1786
  tags: [core, pointwise]
1724
1787
 
1725
1788
  - func: cos_(Tensor(a!) self) -> Tensor(a!)
@@ -1802,32 +1865,35 @@
1802
1865
  CUDA: cudnn_batch_norm_backward
1803
1866
  autogen: cudnn_batch_norm_backward.out
1804
1867
 
1805
- - func: cudnn_convolution(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1868
+ - func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1806
1869
  dispatch:
1807
1870
  CUDA: cudnn_convolution
1808
- autogen: cudnn_convolution.out
1809
1871
 
1810
- - func: cudnn_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1872
+ - func: cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
1873
+ dispatch:
1874
+ CUDA: cudnn_convolution_out
1875
+
1876
+ - func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1811
1877
  dispatch:
1812
1878
  CUDA: cudnn_convolution_transpose
1813
1879
  autogen: cudnn_convolution_transpose.out
1814
1880
 
1815
- - func: _mps_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups) -> Tensor
1881
+ - func: _mps_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
1816
1882
  dispatch:
1817
1883
  MPS: _mps_convolution_transpose
1818
1884
  autogen: _mps_convolution_transpose.out
1819
1885
 
1820
- - func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool[2] output_mask) -> (Tensor, Tensor)
1886
+ - func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[2] output_mask) -> (Tensor, Tensor)
1821
1887
  dispatch:
1822
1888
  MPS: mps_convolution_transpose_backward
1823
1889
  autogen: mps_convolution_transpose_backward.out
1824
1890
 
1825
- - func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
1891
+ - func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
1826
1892
  dispatch:
1827
1893
  CUDA: cudnn_convolution_relu
1828
1894
  autogen: cudnn_convolution_relu.out
1829
1895
 
1830
- - func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
1896
+ - func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
1831
1897
  dispatch:
1832
1898
  CUDA: cudnn_convolution_add_relu
1833
1899
  autogen: cudnn_convolution_add_relu.out
@@ -1967,6 +2033,7 @@
1967
2033
  dispatch:
1968
2034
  CPU: ctc_loss_cpu
1969
2035
  CUDA: ctc_loss_gpu
2036
+ Meta: ctc_loss_meta
1970
2037
  autogen: _ctc_loss.out
1971
2038
  tags: dynamic_output_shape # the shape of second output is data dependent
1972
2039
 
@@ -1999,6 +2066,7 @@
1999
2066
  variants: function, method
2000
2067
  dispatch:
2001
2068
  CompositeExplicitAutograd: diagonal
2069
+ tags: core
2002
2070
 
2003
2071
  - func: linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a)
2004
2072
  python_module: linalg
@@ -2079,7 +2147,7 @@
2079
2147
  structured_delegate: div.out_mode
2080
2148
  dispatch:
2081
2149
  SparseCPU, SparseCUDA: div_sparse
2082
- tags: pointwise
2150
+ tags: [core, pointwise]
2083
2151
 
2084
2152
  - func: div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!)
2085
2153
  device_check: NoCheck # TensorIterator
@@ -2120,7 +2188,7 @@
2120
2188
  variants: function, method
2121
2189
  dispatch:
2122
2190
  CompositeExplicitAutograd: div
2123
- tags: pointwise
2191
+ tags: [core, pointwise]
2124
2192
 
2125
2193
  - func: div_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!)
2126
2194
  variants: method
@@ -2302,7 +2370,7 @@
2302
2370
  Meta: empty_meta_symint
2303
2371
  MkldnnCPU: empty_mkldnn
2304
2372
  SparseCPU, SparseCUDA, SparseMeta: empty_sparse
2305
- SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
2373
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
2306
2374
  QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
2307
2375
  tags: core
2308
2376
 
@@ -2370,7 +2438,7 @@
2370
2438
  variants: method
2371
2439
  device_check: NoCheck
2372
2440
  device_guard: False
2373
- tags: inplace_view
2441
+ tags: [core, inplace_view]
2374
2442
  dispatch:
2375
2443
  Meta: resize__symint
2376
2444
  CPU: resize_
@@ -2408,7 +2476,7 @@
2408
2476
  CompositeExplicitAutograd: empty_like
2409
2477
  QuantizedCPU, QuantizedCUDA: empty_like_quantized
2410
2478
  SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
2411
- SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
2479
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
2412
2480
  NestedTensorCPU, NestedTensorCUDA: empty_like_nested
2413
2481
  autogen: empty_like.out
2414
2482
 
@@ -2517,7 +2585,7 @@
2517
2585
  dispatch:
2518
2586
  SparseCPU, SparseCUDA: expm1_sparse
2519
2587
  SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
2520
- tags: pointwise
2588
+ tags: [core, pointwise]
2521
2589
 
2522
2590
  - func: expm1_(Tensor(a!) self) -> Tensor(a!)
2523
2591
  device_check: NoCheck # TensorIterator
@@ -2684,10 +2752,15 @@
2684
2752
  - func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
2685
2753
  device_check: NoCheck # TensorIterator
2686
2754
  variants: function, method
2755
+ dispatch:
2756
+ CompositeExplicitAutograd: floor_divide
2687
2757
 
2688
2758
  - func: floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
2689
2759
  device_check: NoCheck # TensorIterator
2690
2760
  variants: method
2761
+ dispatch:
2762
+ CompositeExplicitAutograd: floor_divide_
2763
+ autogen: floor_divide.Scalar_out
2691
2764
 
2692
2765
  - func: frac(Tensor self) -> Tensor
2693
2766
  device_check: NoCheck # TensorIterator
@@ -2905,12 +2978,14 @@
2905
2978
  dispatch:
2906
2979
  CPU: _fft_r2c_mkl
2907
2980
  CUDA: _fft_r2c_cufft
2981
+ MPS: _fft_r2c_mps
2908
2982
 
2909
2983
  - func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
2910
2984
  variants: function
2911
2985
  dispatch:
2912
2986
  CPU: _fft_r2c_mkl_out
2913
2987
  CUDA: _fft_r2c_cufft_out
2988
+ MPS: _fft_r2c_mps_out
2914
2989
 
2915
2990
  # Complex to real inverse FFT
2916
2991
  - func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
@@ -2918,12 +2993,14 @@
2918
2993
  dispatch:
2919
2994
  CPU: _fft_c2r_mkl
2920
2995
  CUDA: _fft_c2r_cufft
2996
+ MPS: _fft_c2r_mps
2921
2997
 
2922
2998
  - func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
2923
2999
  variants: function
2924
3000
  dispatch:
2925
3001
  CPU: _fft_c2r_mkl_out
2926
3002
  CUDA: _fft_c2r_cufft_out
3003
+ MPS: _fft_c2r_mps_out
2927
3004
 
2928
3005
  # Standard complex to complex FFT (forward or backward)
2929
3006
  - func: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
@@ -2931,12 +3008,14 @@
2931
3008
  dispatch:
2932
3009
  CPU: _fft_c2c_mkl
2933
3010
  CUDA: _fft_c2c_cufft
3011
+ MPS: _fft_c2c_mps
2934
3012
 
2935
3013
  - func: _fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)
2936
3014
  variants: function
2937
3015
  dispatch:
2938
3016
  CPU: _fft_c2c_mkl_out
2939
3017
  CUDA: _fft_c2c_cufft_out
3018
+ MPS: _fft_c2c_mps_out
2940
3019
 
2941
3020
  - func: _validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()
2942
3021
  device_check: NoCheck
@@ -2979,7 +3058,7 @@
2979
3058
  - func: _unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
2980
3059
  variants: function
2981
3060
  dispatch:
2982
- CPU, CUDA: _unsafe_index
3061
+ CompositeExplicitAutograd: _unsafe_index
2983
3062
 
2984
3063
  - func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
2985
3064
  structured: True
@@ -3253,14 +3332,22 @@
3253
3332
  dispatch:
3254
3333
  CUDA: _cslt_compress
3255
3334
 
3256
- - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, bool transpose_result=False) -> Tensor
3335
+ - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
3257
3336
  dispatch:
3258
3337
  CUDA: _cslt_sparse_mm
3259
3338
 
3260
- - func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None) -> Tensor
3339
+ - func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
3340
+ dispatch:
3341
+ CUDA: _cslt_sparse_mm_search
3342
+
3343
+ - func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
3261
3344
  dispatch:
3262
3345
  CUDA: _sparse_semi_structured_linear
3263
3346
 
3347
+ - func: _mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor
3348
+ dispatch:
3349
+ CUDA: _mixed_dtypes_linear
3350
+
3264
3351
  - func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
3265
3352
 
3266
3353
  - func: fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
@@ -3291,12 +3378,42 @@
3291
3378
  dispatch:
3292
3379
  CompositeExplicitAutograd: linspace
3293
3380
 
3381
+ - func: linspace.Tensor_Tensor(Tensor start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3382
+ category_override: factory
3383
+ dispatch:
3384
+ CompositeExplicitAutograd: linspace
3385
+
3386
+ - func: linspace.Tensor_Scalar(Tensor start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3387
+ category_override: factory
3388
+ dispatch:
3389
+ CompositeExplicitAutograd: linspace
3390
+
3391
+ - func: linspace.Scalar_Tensor(Scalar start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3392
+ category_override: factory
3393
+ dispatch:
3394
+ CompositeExplicitAutograd: linspace
3395
+
3294
3396
  - func: linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
3295
3397
  dispatch:
3296
3398
  CPU, Meta: linspace_out
3297
3399
  CUDA: linspace_cuda_out
3298
3400
  MPS: linspace_out_mps
3299
3401
 
3402
+ - func: linspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
3403
+ category_override: factory
3404
+ dispatch:
3405
+ CompositeExplicitAutograd: linspace_out
3406
+
3407
+ - func: linspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
3408
+ category_override: factory
3409
+ dispatch:
3410
+ CompositeExplicitAutograd: linspace_out
3411
+
3412
+ - func: linspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
3413
+ category_override: factory
3414
+ dispatch:
3415
+ CompositeExplicitAutograd: linspace_out
3416
+
3300
3417
  - func: log(Tensor self) -> Tensor
3301
3418
  device_check: NoCheck # TensorIterator
3302
3419
  structured_delegate: log.out
@@ -3322,7 +3439,7 @@
3322
3439
  device_check: NoCheck # TensorIterator
3323
3440
  structured_delegate: log10.out
3324
3441
  variants: function, method
3325
- tags: pointwise
3442
+ tags: [core, pointwise]
3326
3443
 
3327
3444
  - func: log10_(Tensor(a!) self) -> Tensor(a!)
3328
3445
  device_check: NoCheck # TensorIterator
@@ -3346,7 +3463,7 @@
3346
3463
  dispatch:
3347
3464
  SparseCPU, SparseCUDA: log1p_sparse
3348
3465
  SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
3349
- tags: pointwise
3466
+ tags: [core, pointwise]
3350
3467
 
3351
3468
  - func: log1p_(Tensor(a!) self) -> Tensor(a!)
3352
3469
  device_check: NoCheck # TensorIterator
@@ -3372,7 +3489,7 @@
3372
3489
  device_check: NoCheck # TensorIterator
3373
3490
  structured_delegate: log2.out
3374
3491
  variants: function, method
3375
- tags: pointwise
3492
+ tags: [core, pointwise]
3376
3493
 
3377
3494
  - func: log2_(Tensor(a!) self) -> Tensor(a!)
3378
3495
  device_check: NoCheck # TensorIterator
@@ -3477,11 +3594,41 @@
3477
3594
  dispatch:
3478
3595
  CompositeExplicitAutograd: logspace
3479
3596
 
3597
+ - func: logspace.Tensor_Tensor(Tensor start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3598
+ category_override: factory
3599
+ dispatch:
3600
+ CompositeExplicitAutograd: logspace
3601
+
3602
+ - func: logspace.Tensor_Scalar(Tensor start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3603
+ category_override: factory
3604
+ dispatch:
3605
+ CompositeExplicitAutograd: logspace
3606
+
3607
+ - func: logspace.Scalar_Tensor(Scalar start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3608
+ category_override: factory
3609
+ dispatch:
3610
+ CompositeExplicitAutograd: logspace
3611
+
3480
3612
  - func: logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
3481
3613
  dispatch:
3482
3614
  CPU, Meta: logspace_out
3483
3615
  CUDA: logspace_cuda_out
3484
3616
 
3617
+ - func: logspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
3618
+ category_override: factory
3619
+ dispatch:
3620
+ CompositeExplicitAutograd: logspace_out
3621
+
3622
+ - func: logspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
3623
+ category_override: factory
3624
+ dispatch:
3625
+ CompositeExplicitAutograd: logspace_out
3626
+
3627
+ - func: logspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
3628
+ category_override: factory
3629
+ dispatch:
3630
+ CompositeExplicitAutograd: logspace_out
3631
+
3485
3632
  # log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
3486
3633
  - func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
3487
3634
  variants: function, method
@@ -3847,17 +3994,17 @@
3847
3994
  # TODO: Add this function to MPS dispatch key so that we avoid declaring it in
3848
3995
  # native_functions.yaml
3849
3996
  # https://github.com/pytorch/pytorch/issues/77394
3850
- - func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
3997
+ - func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
3851
3998
  dispatch:
3852
3999
  MPS: _mps_convolution
3853
4000
  autogen: _mps_convolution.out
3854
4001
 
3855
- - func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
4002
+ - func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
3856
4003
  dispatch:
3857
4004
  MPS: mps_convolution_backward
3858
4005
  autogen: mps_convolution_backward.out
3859
4006
 
3860
- - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, int[] stride, int[] dilation, int groups) -> Tensor
4007
+ - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
3861
4008
  dispatch:
3862
4009
  CompositeExplicitAutograd: mkldnn_convolution
3863
4010
  autogen: mkldnn_convolution.out
@@ -3883,26 +4030,26 @@
3883
4030
  CUDA: miopen_batch_norm_backward
3884
4031
  autogen: miopen_batch_norm_backward.out
3885
4032
 
3886
- - func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
4033
+ - func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
3887
4034
  dispatch:
3888
4035
  CUDA: miopen_convolution
3889
4036
  autogen: miopen_convolution.out
3890
4037
 
3891
- - func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
4038
+ - func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
3892
4039
  dispatch:
3893
4040
  CUDA: miopen_convolution_transpose
3894
4041
  autogen: miopen_convolution_transpose.out
3895
4042
 
3896
- - func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
4043
+ - func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
3897
4044
  dispatch:
3898
4045
  CUDA: miopen_depthwise_convolution
3899
4046
  autogen: miopen_depthwise_convolution.out
3900
4047
 
3901
- - func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
4048
+ - func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
3902
4049
  dispatch:
3903
4050
  CUDA: miopen_convolution_relu
3904
4051
 
3905
- - func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
4052
+ - func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
3906
4053
  dispatch:
3907
4054
  CUDA: miopen_convolution_add_relu
3908
4055
 
@@ -3943,6 +4090,20 @@
3943
4090
  dispatch:
3944
4091
  CUDA: _int_mm_out_cuda
3945
4092
 
4093
+ - func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
4094
+ dispatch:
4095
+ CPU: _convert_weight_to_int4pack_cpu
4096
+ CUDA: _convert_weight_to_int4pack_cuda
4097
+
4098
+ - func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
4099
+ dispatch:
4100
+ CPU: _weight_int4pack_mm_cpu
4101
+ CUDA: _weight_int4pack_mm_cuda
4102
+
4103
+ - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
4104
+ dispatch:
4105
+ CPU: _weight_int8pack_mm_cpu
4106
+
3946
4107
  - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
3947
4108
  python_module: sparse
3948
4109
 
@@ -4087,6 +4248,7 @@
4087
4248
  device_guard: False
4088
4249
  dispatch:
4089
4250
  CompositeImplicitAutograd: narrow_symint
4251
+ NestedTensorCPU, NestedTensorCUDA: narrow_nested_symint
4090
4252
 
4091
4253
  - func: narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)
4092
4254
  variants: function, method
@@ -4199,7 +4361,7 @@
4199
4361
 
4200
4362
  - func: _nnpack_available() -> bool
4201
4363
 
4202
- - func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, int[2] stride=1) -> Tensor
4364
+ - func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, SymInt[2] stride=1) -> Tensor
4203
4365
  variants: function
4204
4366
  dispatch:
4205
4367
  CompositeExplicitAutograd: _nnpack_spatial_convolution
@@ -4314,23 +4476,24 @@
4314
4476
  - func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
4315
4477
  dispatch:
4316
4478
  CPU: pixel_shuffle_cpu
4479
+ MPS: pixel_shuffle_mps
4317
4480
  CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
4318
4481
  autogen: pixel_shuffle.out
4319
- tags: core
4320
4482
 
4321
4483
  - func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
4322
4484
  dispatch:
4323
4485
  CPU: pixel_unshuffle_cpu
4486
+ MPS: pixel_unshuffle_mps
4324
4487
  CompositeExplicitAutogradNonFunctional: math_pixel_unshuffle
4325
4488
  autogen: pixel_unshuffle.out
4326
4489
 
4327
- - func: channel_shuffle(Tensor self, int groups) -> Tensor
4490
+ - func: channel_shuffle(Tensor self, SymInt groups) -> Tensor
4328
4491
  dispatch:
4329
4492
  CPU, CUDA: channel_shuffle
4330
4493
  QuantizedCPU: channel_shuffle_quantized_cpu
4331
4494
  autogen: channel_shuffle.out
4332
4495
 
4333
- - func: native_channel_shuffle(Tensor self, int groups) -> Tensor
4496
+ - func: native_channel_shuffle(Tensor self, SymInt groups) -> Tensor
4334
4497
  dispatch:
4335
4498
  CPU: channel_shuffle_cpu
4336
4499
  CompositeImplicitAutograd: math_channel_shuffle
@@ -4338,7 +4501,7 @@
4338
4501
  - func: is_pinned(Tensor self, Device? device=None) -> bool
4339
4502
  variants: method
4340
4503
  dispatch:
4341
- CUDA: is_pinned_cuda
4504
+ NestedTensorCUDA, CUDA: is_pinned_cuda
4342
4505
  MPS: is_pinned_mps
4343
4506
  CompositeExplicitAutograd: is_pinned_default
4344
4507
 
@@ -4352,6 +4515,7 @@
4352
4515
  dispatch:
4353
4516
  CUDA: _pin_memory_cuda
4354
4517
  MPS: _pin_memory_mps
4518
+ NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
4355
4519
  autogen: _pin_memory.out
4356
4520
 
4357
4521
  - func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
@@ -4660,7 +4824,7 @@
4660
4824
  autogen: repeat.out
4661
4825
  tags: core
4662
4826
 
4663
- - func: repeat_interleave.Tensor(Tensor repeats, *, int? output_size=None) -> Tensor
4827
+ - func: repeat_interleave.Tensor(Tensor repeats, *, SymInt? output_size=None) -> Tensor
4664
4828
  variants: function
4665
4829
  dispatch:
4666
4830
  CPU: repeat_interleave_cpu
@@ -4669,10 +4833,12 @@
4669
4833
  tags: dynamic_output_shape
4670
4834
  autogen: repeat_interleave.Tensor_out
4671
4835
 
4672
- - func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, int? output_size=None) -> Tensor
4836
+ - func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
4673
4837
  variants: function, method
4838
+ dispatch:
4839
+ CompositeImplicitAutograd: repeat_interleave_symint
4674
4840
 
4675
- - func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, int? output_size=None) -> Tensor
4841
+ - func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
4676
4842
  variants: function, method
4677
4843
  dispatch:
4678
4844
  CompositeImplicitAutograd: repeat_interleave_symint
@@ -4683,7 +4849,7 @@
4683
4849
  device_guard: False
4684
4850
  dispatch:
4685
4851
  CompositeImplicitAutograd: reshape_symint
4686
- CompositeImplicitAutogradNestedTensor: reshape_nested
4852
+ CompositeImplicitAutogradNestedTensor: reshape_nested_symint
4687
4853
 
4688
4854
  - func: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
4689
4855
  variants: function
@@ -4842,6 +5008,7 @@
4842
5008
  device_check: NoCheck # TensorIterator
4843
5009
  python_module: nn
4844
5010
  dispatch:
5011
+ QuantizedCPU: gelu_quantized_cpu_
4845
5012
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
4846
5013
 
4847
5014
  - func: gelu(Tensor self, *, str approximate='none') -> Tensor
@@ -4973,12 +5140,14 @@
4973
5140
  python_module: nn
4974
5141
  dispatch:
4975
5142
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
5143
+ tags: pointwise
4976
5144
 
4977
5145
  - func: silu_(Tensor(a!) self) -> Tensor(a!)
4978
5146
  structured_delegate: silu.out
4979
5147
  python_module: nn
4980
5148
  dispatch:
4981
5149
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
5150
+ tags: pointwise
4982
5151
 
4983
5152
  - func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4984
5153
  structured: True
@@ -4987,6 +5156,7 @@
4987
5156
  dispatch:
4988
5157
  CPU, CUDA: silu_out
4989
5158
  MPS: silu_out_mps
5159
+ tags: pointwise
4990
5160
 
4991
5161
  - func: silu_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
4992
5162
  structured: True
@@ -4995,6 +5165,7 @@
4995
5165
  dispatch:
4996
5166
  CPU, CUDA: silu_backward_out
4997
5167
  MPS: silu_backward_out_mps
5168
+ tags: pointwise
4998
5169
 
4999
5170
  - func: silu_backward(Tensor grad_output, Tensor self) -> Tensor
5000
5171
  structured_delegate: silu_backward.grad_input
@@ -5002,6 +5173,7 @@
5002
5173
  dispatch:
5003
5174
  CompositeImplicitAutograd: math_silu_backward
5004
5175
  NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
5176
+ tags: pointwise
5005
5177
 
5006
5178
  - func: mish(Tensor self) -> Tensor
5007
5179
  structured_delegate: mish.out
@@ -5017,11 +5189,13 @@
5017
5189
  python_module: nn
5018
5190
  dispatch:
5019
5191
  CPU, CUDA: mish_out
5192
+ MPS: mish_out_mps
5020
5193
 
5021
5194
  - func: mish_backward(Tensor grad_output, Tensor self) -> Tensor
5022
5195
  python_module: nn
5023
5196
  dispatch:
5024
5197
  CPU, CUDA: mish_backward
5198
+ MPS: mish_backward_mps
5025
5199
  CompositeImplicitAutograd: math_mish_backward
5026
5200
 
5027
5201
  - func: sigmoid(Tensor self) -> Tensor
@@ -5076,6 +5250,7 @@
5076
5250
  dispatch:
5077
5251
  SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
5078
5252
  SparseCPU, SparseCUDA: sin_sparse
5253
+ NestedTensorCPU, NestedTensorCUDA: sin_nested
5079
5254
  tags: [core, pointwise]
5080
5255
 
5081
5256
  - func: sin_(Tensor(a!) self) -> Tensor(a!)
@@ -5221,6 +5396,21 @@
5221
5396
  CompositeExplicitAutograd: slice_backward
5222
5397
  autogen: slice_backward.out
5223
5398
 
5399
+ # NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
5400
+ # slice.Tensor, split_with_sizes, et. al.). Currently, these are only used during fake-ification
5401
+ # of PT2 graph input subclass instances that are views. This means:
5402
+ # * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
5403
+ # * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
5404
+ # * A subclass will have to implement this to work in PT2 if a subclass view is used as a graph
5405
+ # input AND the view utilizes this op in its inverse. The idea is that slice_inverse() is
5406
+ # easier to implement for a subclass than as_strided()
5407
+ - func: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
5408
+ variants: function, method
5409
+ device_check: NoCheck
5410
+ device_guard: False
5411
+ dispatch:
5412
+ CompositeExplicitAutograd: slice_inverse_symint
5413
+
5224
5414
  - func: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
5225
5415
  variants: function, method
5226
5416
  device_check: NoCheck
@@ -5228,7 +5418,7 @@
5228
5418
  dispatch:
5229
5419
  CompositeExplicitAutogradNonFunctional: slice_scatter
5230
5420
  autogen: slice_scatter.out
5231
- tags: core
5421
+ tags: [core, view_copy]
5232
5422
 
5233
5423
  - func: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
5234
5424
  variants: function, method
@@ -5427,6 +5617,14 @@
5427
5617
  SparseCPU: _sspaddmm_out_cpu
5428
5618
  SparseCUDA: _sspaddmm_out_cuda
5429
5619
 
5620
+ - func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
5621
+ dispatch:
5622
+ CompositeExplicitAutograd: _chunk_cat
5623
+
5624
+ - func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
5625
+ dispatch:
5626
+ CompositeExplicitAutograd: _chunk_cat_out
5627
+
5430
5628
  - func: stack(Tensor[] tensors, int dim=0) -> Tensor
5431
5629
  dispatch:
5432
5630
  CompositeExplicitAutograd: stack
@@ -5618,6 +5816,7 @@
5618
5816
  variants: function
5619
5817
  dispatch:
5620
5818
  CPU, CUDA: std_mean
5819
+ MPS: std_mean_mps
5621
5820
  autogen: std_mean.correction_out
5622
5821
 
5623
5822
  - func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
@@ -5873,7 +6072,6 @@
5873
6072
  CPU, MPS: roll
5874
6073
  CUDA: roll_cuda
5875
6074
  autogen: roll.out
5876
- tags: core
5877
6075
 
5878
6076
  # default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
5879
6077
 
@@ -5956,6 +6154,52 @@
5956
6154
  CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
5957
6155
  autogen: _nested_view_from_buffer_copy.out
5958
6156
 
6157
+ - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
6158
+ variants: function
6159
+ device_check: NoCheck
6160
+ dispatch: {}
6161
+
6162
+ - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
6163
+ variants: function
6164
+ device_check: NoCheck
6165
+ tags: view_copy
6166
+ dispatch:
6167
+ CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
6168
+ autogen: _nested_view_from_jagged_copy.out
6169
+
6170
+ - func: _nested_get_values(Tensor(a) self) -> Tensor(a)
6171
+ variants: function
6172
+ device_check: NoCheck
6173
+ dispatch: {}
6174
+
6175
+ - func: _nested_get_values_copy(Tensor self) -> Tensor
6176
+ variants: function
6177
+ device_check: NoCheck
6178
+ tags: view_copy
6179
+ dispatch:
6180
+ CompositeExplicitAutogradNonFunctional: _nested_get_values_copy
6181
+ autogen: _nested_get_values_copy.out
6182
+
6183
+ - func: _nested_get_offsets(Tensor self) -> Tensor
6184
+ variants: function
6185
+ device_check: NoCheck
6186
+ dispatch: {}
6187
+
6188
+ # returns undefined Tensor if no lengths present
6189
+ - func: _nested_get_lengths(Tensor self) -> Tensor
6190
+ variants: function
6191
+ device_check: NoCheck
6192
+ dispatch: {}
6193
+
6194
+ - func: _nested_get_ragged_idx(Tensor self) -> int
6195
+ variants: function
6196
+ device_check: NoCheck
6197
+ dispatch: {}
6198
+
6199
+ - func: _nested_get_jagged_dummy(Tensor any) -> Tensor
6200
+ category_override: dummy
6201
+ dispatch: {}
6202
+
5959
6203
  - func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
5960
6204
  dispatch:
5961
6205
  # calls unsqueeze
@@ -5971,7 +6215,7 @@
5971
6215
  dispatch:
5972
6216
  SparseCPU, SparseCUDA: trunc_sparse
5973
6217
  SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
5974
- tags: pointwise
6218
+ tags: [core, pointwise]
5975
6219
 
5976
6220
  - func: trunc_(Tensor(a!) self) -> Tensor(a!)
5977
6221
  structured_delegate: trunc.out
@@ -6140,6 +6384,7 @@
6140
6384
  variants: function
6141
6385
  dispatch:
6142
6386
  CPU, CUDA: var_mean
6387
+ MPS: var_mean_mps
6143
6388
  autogen: var_mean.correction_out
6144
6389
 
6145
6390
  - func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
@@ -6160,15 +6405,13 @@
6160
6405
  device_check: NoCheck # TensorIterator
6161
6406
  variants: function, method
6162
6407
  dispatch:
6163
- CPU, CUDA: where
6164
- MPS: where_mps
6408
+ CPU, CUDA, MPS: where
6165
6409
  tags: [core, pointwise]
6166
6410
 
6167
6411
  - func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
6168
6412
  device_check: NoCheck # TensorIterator
6169
6413
  dispatch:
6170
- CPU, CUDA: where_self_out
6171
- MPS: where_self_out_mps
6414
+ CPU, CUDA, MPS: where_self_out
6172
6415
 
6173
6416
  - func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
6174
6417
  variants: function
@@ -6196,6 +6439,7 @@
6196
6439
  dispatch:
6197
6440
  CPU: weight_norm_cpu
6198
6441
  CUDA: weight_norm_cuda
6442
+ MPS: weight_norm_mps
6199
6443
  autogen: _weight_norm_interface.out
6200
6444
 
6201
6445
  - func: _weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
@@ -6203,6 +6447,7 @@
6203
6447
  dispatch:
6204
6448
  CPU: weight_norm_backward_cpu
6205
6449
  CUDA: weight_norm_backward_cuda
6450
+ MPS: weight_norm_backward_mps
6206
6451
  autogen: _weight_norm_interface_backward.out
6207
6452
 
6208
6453
  - func: _weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
@@ -6219,6 +6464,7 @@
6219
6464
  dispatch:
6220
6465
  CPU: _efficientzerotensor
6221
6466
  CUDA: _efficientzerotensor_cuda
6467
+ MPS: _efficientzerotensor_mps
6222
6468
  Meta: _efficientzerotensor_meta
6223
6469
  autogen: _efficientzerotensor.out
6224
6470
 
@@ -6506,7 +6752,7 @@
6506
6752
  MPS: zero_mps_
6507
6753
  Meta: zero_meta_
6508
6754
  SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
6509
- SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
6755
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
6510
6756
  MkldnnCPU: mkldnn_zero_
6511
6757
  NestedTensorCPU, NestedTensorCUDA: zero_nested_
6512
6758
  autogen: zero, zero.out
@@ -6675,12 +6921,12 @@
6675
6921
  structured_delegate: _addmm_activation.out
6676
6922
  variants: function, method
6677
6923
 
6678
- - func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None) -> (Tensor, Tensor)
6924
+ - func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)
6679
6925
  variants: function
6680
6926
  dispatch:
6681
6927
  CUDA: _scaled_mm_cuda
6682
6928
 
6683
- - func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
6929
+ - func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
6684
6930
  variants: function
6685
6931
  dispatch:
6686
6932
  CUDA: _scaled_mm_out_cuda
@@ -6796,7 +7042,7 @@
6796
7042
  # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
6797
7043
  # the default would never make sense.
6798
7044
 
6799
- - func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
7045
+ - func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6800
7046
  dispatch:
6801
7047
  CompositeExplicitAutograd: sparse_compressed_tensor
6802
7048
 
@@ -6813,7 +7059,10 @@
6813
7059
  - func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6814
7060
  - func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
6815
7061
 
6816
- - func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
7062
+ - func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
7063
+ dispatch:
7064
+ CompositeImplicitAutograd: _sparse_compressed_tensor_unsafe_symint
7065
+
6817
7066
  - func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
6818
7067
  - func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
6819
7068
  - func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -6899,7 +7148,7 @@
6899
7148
  dispatch:
6900
7149
  CPU, CUDA: sparse_dim_strided
6901
7150
  SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
6902
- SparseCsrCPU, SparseCsrCUDA: sparse_dim_sparse_csr
7151
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
6903
7152
  device_check: NoCheck
6904
7153
  device_guard: False
6905
7154
 
@@ -6916,7 +7165,7 @@
6916
7165
  dispatch:
6917
7166
  CPU, CUDA: dense_dim_strided
6918
7167
  SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
6919
- SparseCsrCPU, SparseCsrCUDA: dense_dim_sparse_csr
7168
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
6920
7169
  device_check: NoCheck
6921
7170
  device_guard: False
6922
7171
 
@@ -6932,7 +7181,7 @@
6932
7181
  variants: method
6933
7182
  dispatch:
6934
7183
  SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
6935
- SparseCsrCPU, SparseCsrCUDA: _nnz_sparse_csr
7184
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
6936
7185
  device_check: NoCheck
6937
7186
  device_guard: False
6938
7187
 
@@ -6995,7 +7244,7 @@
6995
7244
  variants: method
6996
7245
  dispatch:
6997
7246
  SparseCPU, SparseCUDA, SparseMeta: values_sparse
6998
- SparseCsrCPU, SparseCsrCUDA: values_sparse_csr
7247
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
6999
7248
  NestedTensorCPU, NestedTensorCUDA: values_nested
7000
7249
  CompositeExplicitAutograd: values_default
7001
7250
  device_check: NoCheck
@@ -7004,7 +7253,7 @@
7004
7253
  - func: crow_indices(Tensor(a) self) -> Tensor(a)
7005
7254
  variants: method
7006
7255
  dispatch:
7007
- SparseCsrCPU, SparseCsrCUDA: crow_indices_sparse_csr
7256
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: crow_indices_sparse_csr
7008
7257
  CompositeExplicitAutograd: crow_indices_default
7009
7258
  device_check: NoCheck
7010
7259
  device_guard: False
@@ -7012,7 +7261,7 @@
7012
7261
  - func: col_indices(Tensor(a) self) -> Tensor(a)
7013
7262
  variants: method
7014
7263
  dispatch:
7015
- SparseCsrCPU, SparseCsrCUDA: col_indices_sparse_csr
7264
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: col_indices_sparse_csr
7016
7265
  CompositeExplicitAutograd: col_indices_default
7017
7266
  device_check: NoCheck
7018
7267
  device_guard: False
@@ -7020,7 +7269,7 @@
7020
7269
  - func: ccol_indices(Tensor(a) self) -> Tensor(a)
7021
7270
  variants: method
7022
7271
  dispatch:
7023
- SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
7272
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ccol_indices_sparse_csr
7024
7273
  CompositeExplicitAutograd: ccol_indices_default
7025
7274
  device_check: NoCheck
7026
7275
  device_guard: False
@@ -7028,7 +7277,7 @@
7028
7277
  - func: row_indices(Tensor(a) self) -> Tensor(a)
7029
7278
  variants: method
7030
7279
  dispatch:
7031
- SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
7280
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: row_indices_sparse_csr
7032
7281
  CompositeExplicitAutograd: row_indices_default
7033
7282
  device_check: NoCheck
7034
7283
  device_guard: False
@@ -7055,7 +7304,7 @@
7055
7304
  variants: function, method
7056
7305
  dispatch:
7057
7306
  CompositeExplicitAutograd: unbind
7058
- CompositeImplicitAutogradNestedTensor: NestedTensor_unbind
7307
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
7059
7308
 
7060
7309
  - func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
7061
7310
  variants: function, method
@@ -7143,14 +7392,14 @@
7143
7392
  CPU: dense_to_mkldnn
7144
7393
  autogen: to_mkldnn.out
7145
7394
 
7146
- - func: mkldnn_reorder_conv2d_weight(Tensor self, int[2] padding=0, int[2] stride=1, int[2] dilation=1, int groups=1, int[]? input_size=None) -> Tensor
7395
+ - func: mkldnn_reorder_conv2d_weight(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor
7147
7396
  variants: function
7148
7397
  python_module: nn
7149
7398
  dispatch:
7150
7399
  MkldnnCPU: mkldnn_reorder_conv2d_weight
7151
7400
  autogen: mkldnn_reorder_conv2d_weight.out
7152
7401
 
7153
- - func: mkldnn_reorder_conv3d_weight(Tensor self, int[3] padding=0, int[3] stride=1, int[3] dilation=1, int groups=1) -> Tensor
7402
+ - func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
7154
7403
  variants: function
7155
7404
  python_module: nn
7156
7405
  dispatch:
@@ -7537,6 +7786,7 @@
7537
7786
  dispatch:
7538
7787
  CPU, CUDA, Meta, MPS: set_
7539
7788
  autogen: set.source_Storage, set.source_Storage_out
7789
+ tags: inplace_view
7540
7790
 
7541
7791
  - func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
7542
7792
  variants: method
@@ -7549,6 +7799,7 @@
7549
7799
  MPS: set_storage_mps_
7550
7800
  QuantizedCPU, QuantizedCUDA: set_storage_quantized_
7551
7801
  autogen: set.source_Storage_storage_offset, set.source_Storage_storage_offset_out
7802
+ tags: inplace_view
7552
7803
 
7553
7804
  - func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
7554
7805
  variants: method
@@ -7556,6 +7807,7 @@
7556
7807
  device_guard: False
7557
7808
  dispatch:
7558
7809
  CompositeImplicitAutograd: set__symint
7810
+ tags: inplace_view
7559
7811
 
7560
7812
  - func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
7561
7813
  variants: method
@@ -7564,6 +7816,7 @@
7564
7816
  dispatch:
7565
7817
  CPU, CUDA, Meta, MPS: set_tensor_
7566
7818
  autogen: set.source_Tensor, set.source_Tensor_out
7819
+ tags: inplace_view
7567
7820
 
7568
7821
  - func: set_(Tensor(a!) self) -> Tensor(a!)
7569
7822
  variants: method
@@ -7573,6 +7826,7 @@
7573
7826
  Meta: set_meta_
7574
7827
  MPS: set_mps_
7575
7828
  autogen: set, set.out
7829
+ tags: inplace_view
7576
7830
 
7577
7831
  # Not making it CompositeImplicitAutograd because lift
7578
7832
  # should be a primitive w.r.t. functorch
@@ -7656,6 +7910,10 @@
7656
7910
  dispatch:
7657
7911
  CompositeExplicitAutograd: masked_scatter
7658
7912
 
7913
+ - func: masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor
7914
+ dispatch:
7915
+ CompositeExplicitAutograd: masked_scatter_backward_symint
7916
+
7659
7917
  - func: _masked_softmax(Tensor self, Tensor mask, int? dim=None, int? mask_type=None) -> Tensor
7660
7918
  dispatch:
7661
7919
  CUDA: masked_softmax_cuda
@@ -7938,6 +8196,8 @@
7938
8196
  - func: bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
7939
8197
  device_check: NoCheck # TensorIterator
7940
8198
  variants: method
8199
+ dispatch:
8200
+ CompositeExplicitAutograd: bitwise_and_
7941
8201
  tags: pointwise
7942
8202
 
7943
8203
  - func: bitwise_and_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
@@ -7982,6 +8242,8 @@
7982
8242
  - func: bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor
7983
8243
  device_check: NoCheck # TensorIterator
7984
8244
  variants: method, function
8245
+ dispatch:
8246
+ CompositeExplicitAutograd: bitwise_or
7985
8247
  tags: [core, pointwise]
7986
8248
 
7987
8249
  - func: bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
@@ -8001,6 +8263,8 @@
8001
8263
  - func: bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8002
8264
  device_check: NoCheck # TensorIterator
8003
8265
  variants: method
8266
+ dispatch:
8267
+ CompositeExplicitAutograd: bitwise_or_
8004
8268
  tags: pointwise
8005
8269
 
8006
8270
  - func: bitwise_or_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
@@ -8045,6 +8309,8 @@
8045
8309
  - func: bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor
8046
8310
  device_check: NoCheck # TensorIterator
8047
8311
  variants: method, function
8312
+ dispatch:
8313
+ CompositeExplicitAutograd: bitwise_xor
8048
8314
  tags: [core, pointwise]
8049
8315
 
8050
8316
  - func: bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
@@ -8064,6 +8330,8 @@
8064
8330
  - func: bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8065
8331
  device_check: NoCheck # TensorIterator
8066
8332
  variants: method
8333
+ dispatch:
8334
+ CompositeExplicitAutograd: bitwise_xor_
8067
8335
  tags: pointwise
8068
8336
 
8069
8337
  - func: bitwise_xor_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
@@ -8504,6 +8772,7 @@
8504
8772
  variants: method, function
8505
8773
  dispatch:
8506
8774
  QuantizedCPU: eq_quantized_cpu
8775
+ NestedTensorCPU, NestedTensorCUDA: eq_scalar_nested
8507
8776
  tags: [core, pointwise]
8508
8777
 
8509
8778
  - func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -8540,6 +8809,7 @@
8540
8809
  variants: method, function
8541
8810
  dispatch:
8542
8811
  QuantizedCPU: ge_quantized_cpu
8812
+ NestedTensorCPU, NestedTensorCUDA: ge_scalar_nested
8543
8813
  tags: [core, pointwise]
8544
8814
 
8545
8815
  - func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -8666,6 +8936,7 @@
8666
8936
  variants: method, function
8667
8937
  dispatch:
8668
8938
  QuantizedCPU: gt_quantized_cpu
8939
+ NestedTensorCPU, NestedTensorCUDA: gt_scalar_nested
8669
8940
  tags: [core, pointwise]
8670
8941
 
8671
8942
  - func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -9106,6 +9377,7 @@
9106
9377
  structured_inherits: TensorIteratorBase
9107
9378
  dispatch:
9108
9379
  CPU, CUDA: lgamma_out
9380
+ MPS: lgamma_out_mps
9109
9381
  tags: pointwise
9110
9382
 
9111
9383
  - func: lgamma_(Tensor(a!) self) -> Tensor(a!)
@@ -9126,6 +9398,7 @@
9126
9398
  structured_inherits: TensorIteratorBase
9127
9399
  dispatch:
9128
9400
  CPU, CUDA: digamma_out
9401
+ MPS: digamma_out_mps
9129
9402
  tags: pointwise
9130
9403
 
9131
9404
  - func: digamma(Tensor self) -> Tensor
@@ -9140,6 +9413,7 @@
9140
9413
  structured_inherits: TensorIteratorBase
9141
9414
  dispatch:
9142
9415
  CPU, CUDA: polygamma_out
9416
+ MPS: polygamma_out_mps
9143
9417
  tags: pointwise
9144
9418
 
9145
9419
  - func: polygamma(int n, Tensor self) -> Tensor
@@ -9263,7 +9537,7 @@
9263
9537
  dispatch:
9264
9538
  CPU, CUDA: atan2_out
9265
9539
  MPS: atan2_out_mps
9266
- tags: pointwise
9540
+ tags: [core, pointwise]
9267
9541
 
9268
9542
  - func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
9269
9543
  device_check: NoCheck # TensorIterator
@@ -9275,7 +9549,7 @@
9275
9549
  device_check: NoCheck # TensorIterator
9276
9550
  structured_delegate: atan2.out
9277
9551
  variants: method, function
9278
- tags: pointwise
9552
+ tags: [core, pointwise]
9279
9553
  # arctan2, alias of atan2
9280
9554
 
9281
9555
  - func: arctan2(Tensor self, Tensor other) -> Tensor
@@ -9464,7 +9738,7 @@
9464
9738
  structured: True
9465
9739
  structured_inherits: TensorIteratorBase
9466
9740
  dispatch:
9467
- CPU, CUDA: nextafter_out
9741
+ CPU, CUDA, MPS: nextafter_out
9468
9742
  tags: pointwise
9469
9743
 
9470
9744
  - func: nextafter(Tensor self, Tensor other) -> Tensor
@@ -9811,7 +10085,7 @@
9811
10085
  - func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor
9812
10086
  device_check: NoCheck # TensorIterator
9813
10087
  structured_delegate: pow.Scalar_out
9814
- tags: pointwise
10088
+ tags: [core, pointwise]
9815
10089
 
9816
10090
  - func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)
9817
10091
  device_check: NoCheck # TensorIterator
@@ -9954,12 +10228,14 @@
9954
10228
  variants: function
9955
10229
  dispatch:
9956
10230
  CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
10231
+ CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
9957
10232
  autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
9958
10233
 
9959
10234
  - func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
9960
10235
  variants: function
9961
10236
  dispatch:
9962
10237
  CUDA: _amp_update_scale_cuda_
10238
+ CPU: _amp_update_scale_cpu_
9963
10239
  autogen: _amp_update_scale, _amp_update_scale.out
9964
10240
 
9965
10241
  #- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
@@ -10020,6 +10296,21 @@
10020
10296
  CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
10021
10297
  autogen: _foreach_add.ScalarList_out
10022
10298
 
10299
+ - func: _foreach_add.Tensor(Tensor[] self, Tensor other, *, Scalar alpha=1) -> Tensor[]
10300
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10301
+ variants: function
10302
+ dispatch:
10303
+ CPU: foreach_tensor_add_tensor_kernel_slow
10304
+ CUDA: foreach_tensor_add_tensor_kernel_cuda
10305
+
10306
+ - func: _foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()
10307
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10308
+ variants: function
10309
+ dispatch:
10310
+ CPU: foreach_tensor_add_tensor_kernel_slow_
10311
+ CUDA: foreach_tensor_add_tensor_kernel_cuda_
10312
+ autogen: _foreach_add.Tensor_out
10313
+
10023
10314
  - func: _foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
10024
10315
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10025
10316
  variants: function
@@ -10170,6 +10461,21 @@
10170
10461
  CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
10171
10462
  autogen: _foreach_div.ScalarList_out
10172
10463
 
10464
+ - func: _foreach_div.Tensor(Tensor[] self, Tensor other) -> Tensor[]
10465
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10466
+ variants: function
10467
+ dispatch:
10468
+ CPU: foreach_tensor_div_tensor_kernel_slow
10469
+ CUDA: foreach_tensor_div_tensor_kernel_cuda
10470
+
10471
+ - func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
10472
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10473
+ variants: function
10474
+ dispatch:
10475
+ CPU: foreach_tensor_div_tensor_kernel_slow_
10476
+ CUDA: foreach_tensor_div_tensor_kernel_cuda_
10477
+ autogen: _foreach_div.Tensor_out
10478
+
10173
10479
  - func: _foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
10174
10480
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
10175
10481
  variants: function
@@ -10990,37 +11296,44 @@
10990
11296
  dispatch:
10991
11297
  CPU: bucketize_cpu
10992
11298
  CUDA: bucketize_cuda
11299
+ MPS: bucketize_mps
10993
11300
 
10994
11301
  - func: bucketize.Tensor_out(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)
10995
11302
  dispatch:
10996
11303
  CPU: bucketize_out_cpu
10997
11304
  CUDA: bucketize_out_cuda
11305
+ MPS: bucketize_out_mps
10998
11306
 
10999
11307
  - func: bucketize.Scalar(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
11000
11308
  dispatch:
11001
11309
  CPU: bucketize_cpu
11002
11310
  CUDA: bucketize_cuda
11311
+ MPS: bucketize_mps
11003
11312
  autogen: bucketize.Scalar_out
11004
11313
 
11005
11314
  - func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
11006
11315
  dispatch:
11007
11316
  CPU: searchsorted_cpu
11008
11317
  CUDA: searchsorted_cuda
11318
+ MPS: searchsorted_mps
11009
11319
 
11010
11320
  - func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
11011
11321
  dispatch:
11012
11322
  CPU: searchsorted_out_cpu
11013
11323
  CUDA: searchsorted_out_cuda
11324
+ MPS: searchsorted_out_mps
11014
11325
 
11015
11326
  - func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
11016
11327
  dispatch:
11017
11328
  CPU: searchsorted_cpu
11018
11329
  CUDA: searchsorted_cuda
11330
+ MPS: searchsorted_mps
11019
11331
 
11020
11332
  - func: searchsorted.Scalar_out(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
11021
11333
  dispatch:
11022
11334
  CPU: searchsorted_out_cpu
11023
11335
  CUDA: searchsorted_out_cuda
11336
+ MPS: searchsorted_out_mps
11024
11337
 
11025
11338
  - func: _convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor
11026
11339
  structured_delegate: _convert_indices_from_coo_to_csr.out
@@ -11568,6 +11881,7 @@
11568
11881
  python_module: nn
11569
11882
  dispatch:
11570
11883
  CPU, CUDA: softshrink_out
11884
+ MPS: softshrink_out_mps
11571
11885
 
11572
11886
  - func: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
11573
11887
  structured_delegate: softshrink.out
@@ -11580,6 +11894,7 @@
11580
11894
  python_module: nn
11581
11895
  dispatch:
11582
11896
  CPU, CUDA: softshrink_backward_out
11897
+ MPS: softshrink_backward_out_mps
11583
11898
 
11584
11899
  - func: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
11585
11900
  structured_delegate: softshrink_backward.grad_input
@@ -12144,6 +12459,7 @@
12144
12459
  dispatch:
12145
12460
  CPU: upsample_linear1d_out_cpu
12146
12461
  CUDA: upsample_linear1d_out_cuda
12462
+ MPS: upsample_linear1d_out_mps
12147
12463
 
12148
12464
  - func: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
12149
12465
  python_module: nn
@@ -12155,6 +12471,7 @@
12155
12471
  dispatch:
12156
12472
  CPU: upsample_linear1d_backward_out_cpu
12157
12473
  CUDA: upsample_linear1d_backward_out_cuda
12474
+ MPS: upsample_linear1d_backward_out_mps
12158
12475
 
12159
12476
  - func: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
12160
12477
  python_module: nn
@@ -12482,101 +12799,101 @@
12482
12799
  # make the operational distinction clear.
12483
12800
  tags: pointwise
12484
12801
 
12485
- - func: slow_conv_transpose2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, int[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
12802
+ - func: slow_conv_transpose2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
12486
12803
  python_module: nn
12487
12804
  structured: True
12488
12805
  dispatch:
12489
12806
  CPU: slow_conv_transpose2d_structured_cpu
12490
12807
  CUDA: slow_conv_transpose2d_structured_cuda
12491
12808
 
12492
- - func: slow_conv_transpose2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, int[2] dilation=1) -> Tensor
12809
+ - func: slow_conv_transpose2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1) -> Tensor
12493
12810
  python_module: nn
12494
12811
  structured_delegate: slow_conv_transpose2d.out
12495
12812
 
12496
- - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
12813
+ - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
12497
12814
  python_module: nn
12498
12815
  dispatch:
12499
12816
  CPU: slow_conv_transpose3d_out_cpu
12500
12817
  CUDA: slow_conv_transpose3d_out_cuda
12501
12818
 
12502
- - func: slow_conv_transpose3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, int[3] dilation=1) -> Tensor
12819
+ - func: slow_conv_transpose3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1) -> Tensor
12503
12820
  python_module: nn
12504
12821
  dispatch:
12505
12822
  CPU: slow_conv_transpose3d_cpu
12506
12823
  CUDA: slow_conv_transpose3d_cuda
12507
12824
 
12508
- - func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
12825
+ - func: thnn_conv2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
12509
12826
  python_module: nn
12510
12827
 
12511
- - func: thnn_conv2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0) -> Tensor
12828
+ - func: thnn_conv2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0) -> Tensor
12512
12829
  python_module: nn
12513
12830
 
12514
- - func: _slow_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output) -> Tensor(a!)
12831
+ - func: _slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!)
12515
12832
  python_module: nn
12516
12833
  dispatch:
12517
12834
  CPU: slow_conv2d_forward_out_cpu
12518
12835
  CUDA: slow_conv2d_forward_out_cuda
12519
12836
 
12520
- - func: _slow_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> Tensor
12837
+ - func: _slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor
12521
12838
  python_module: nn
12522
12839
  dispatch:
12523
12840
  CPU: slow_conv2d_forward_cpu
12524
12841
  CUDA: slow_conv2d_forward_cuda
12525
12842
 
12526
- - func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
12843
+ - func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
12527
12844
  python_module: nn
12528
12845
  dispatch:
12529
12846
  CPU: slow_conv2d_backward_out_cpu
12530
12847
  CUDA: slow_conv2d_backward_out_cuda
12531
12848
 
12532
- - func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
12849
+ - func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
12533
12850
  python_module: nn
12534
12851
  dispatch:
12535
12852
  CPU: slow_conv2d_backward_cpu
12536
12853
  CUDA: slow_conv2d_backward_cuda
12537
12854
  autogen: _slow_conv2d_backward.output_mask_out
12538
12855
 
12539
- - func: _conv_depthwise2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, SymInt[2] padding, int[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
12856
+ - func: _conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
12540
12857
  use_const_ref_for_mutable_tensors: True
12541
12858
  python_module: nn
12542
12859
  dispatch:
12543
12860
  CUDA: conv_depthwise2d_cuda_out
12544
12861
 
12545
- - func: _conv_depthwise2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, SymInt[2] padding, int[2] dilation) -> Tensor
12862
+ - func: _conv_depthwise2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation) -> Tensor
12546
12863
  python_module: nn
12547
12864
  dispatch:
12548
12865
  CUDA: conv_depthwise2d_cuda
12549
12866
 
12550
- - func: conv_depthwise3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, SymInt[3] padding, int[3] dilation) -> Tensor
12867
+ - func: conv_depthwise3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, SymInt[3] dilation) -> Tensor
12551
12868
  python_module: nn
12552
12869
  dispatch:
12553
12870
  CUDA: conv_depthwise3d_cuda
12554
12871
  autogen: conv_depthwise3d.out
12555
12872
 
12556
- - func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
12873
+ - func: slow_conv3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
12557
12874
  python_module: nn
12558
12875
 
12559
- - func: slow_conv3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0) -> Tensor
12876
+ - func: slow_conv3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0) -> Tensor
12560
12877
  python_module: nn
12561
12878
 
12562
- - func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)
12879
+ - func: slow_conv3d_forward.output(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)
12563
12880
  python_module: nn
12564
12881
  dispatch:
12565
12882
  CPU: slow_conv3d_forward_out_cpu
12566
12883
 
12567
- - func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, SymInt[3] padding) -> Tensor
12884
+ - func: slow_conv3d_forward(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding) -> Tensor
12568
12885
  python_module: nn
12569
12886
  dispatch:
12570
12887
  CPU: slow_conv3d_forward_cpu
12571
12888
 
12572
- - func: slow_conv_dilated2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, int[2] dilation=1) -> Tensor
12889
+ - func: slow_conv_dilated2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1) -> Tensor
12573
12890
  python_module: nn
12574
12891
  dispatch:
12575
12892
  CPU: slow_conv_dilated2d_cpu
12576
12893
  CUDA: slow_conv_dilated2d_cuda
12577
12894
  autogen: slow_conv_dilated2d.out
12578
12895
 
12579
- - func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, int[3] dilation=1) -> Tensor
12896
+ - func: slow_conv_dilated3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1) -> Tensor
12580
12897
  python_module: nn
12581
12898
  dispatch:
12582
12899
  CPU: slow_conv_dilated3d_cpu
@@ -12627,7 +12944,7 @@
12627
12944
  SparseMeta: isinf_sparse_meta
12628
12945
  SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
12629
12946
  autogen: isinf.out
12630
- tags: core
12947
+ tags: [core, pointwise]
12631
12948
 
12632
12949
  - func: record_stream(Tensor(a!) self, Stream s) -> ()
12633
12950
  variants: method
@@ -13553,11 +13870,18 @@
13553
13870
  dispatch:
13554
13871
  CPU, CUDA: linalg_eig_out
13555
13872
 
13873
+ - func: _linalg_eigvals(Tensor self) -> Tensor
13874
+ python_module: linalg
13875
+ dispatch:
13876
+ CPU, CUDA: _linalg_eigvals
13877
+
13556
13878
  - func: linalg_eigvals(Tensor self) -> Tensor
13557
13879
  python_module: linalg
13558
13880
 
13559
13881
  - func: linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
13560
13882
  python_module: linalg
13883
+ dispatch:
13884
+ CPU, CUDA: linalg_eigvals_out
13561
13885
 
13562
13886
  # This function is exposes the `compute_v` flag, which is then used to implement `linalg.eigh` and
13563
13887
  # `linalg.eigvalsh` as composite functions that call this one
@@ -13861,6 +14185,12 @@
13861
14185
  # It is undocumented and should not be used outside of tests.
13862
14186
  - func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
13863
14187
 
14188
+ # Note: for testing COW materialization within `at::parallel_for` loop function
14189
+ - func: _test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor
14190
+ variants: function
14191
+ dispatch:
14192
+ CompositeExplicitAutograd: _test_parallel_materialize
14193
+
13864
14194
  # Note: this function is only for testing.
13865
14195
  - func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
13866
14196
  python_module: nn
@@ -14195,6 +14525,7 @@
14195
14525
  variants: function
14196
14526
  dispatch:
14197
14527
  CompositeExplicitAutograd: split_with_sizes_copy_out
14528
+ CUDA: split_with_sizes_copy_out_cuda
14198
14529
 
14199
14530
  - func: view_copy(Tensor self, SymInt[] size) -> Tensor
14200
14531
  variants: function
@@ -14269,19 +14600,29 @@
14269
14600
  variants: function
14270
14601
  tags: nondeterministic_seeded
14271
14602
 
14272
- - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor ouput, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14603
+ - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14273
14604
  dispatch:
14274
- CPU: _scaled_dot_product_flash_attention_cpu
14275
14605
  CUDA: _scaled_dot_product_flash_attention_cuda
14276
14606
  NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
14277
14607
  tags: nondeterministic_seeded
14278
14608
 
14279
- - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14609
+ - func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
14610
+ dispatch:
14611
+ CPU: _scaled_dot_product_flash_attention_cpu
14612
+ tags: nondeterministic_seeded
14613
+
14614
+ - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14280
14615
  device_check: NoCheck
14281
14616
  variants: function
14282
14617
  dispatch:
14283
- CPU: _scaled_dot_product_flash_attention_backward_cpu
14284
14618
  CUDA: _scaled_dot_product_flash_attention_backward_cuda
14619
+ NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
14620
+
14621
+ - func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14622
+ device_check: NoCheck
14623
+ variants: function
14624
+ dispatch:
14625
+ CPU: _scaled_dot_product_flash_attention_cpu_backward
14285
14626
 
14286
14627
  - func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
14287
14628
  dispatch:
@@ -14295,26 +14636,31 @@
14295
14636
  CUDA: _scaled_dot_product_efficient_attention_backward_cuda
14296
14637
  tags: nondeterministic_seeded
14297
14638
 
14298
- - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14639
+ - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
14640
+ dispatch:
14641
+ CUDA: _scaled_dot_product_cudnn_attention_cuda
14642
+ tags: nondeterministic_seeded
14643
+
14644
+ - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14299
14645
  variants: function
14300
14646
  dispatch:
14301
14647
  CUDA: _flash_attention_forward
14302
14648
  tags: nondeterministic_seeded
14303
14649
 
14304
- - func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14650
+ - func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14305
14651
  device_check: NoCheck
14306
14652
  variants: function
14307
14653
  dispatch:
14308
14654
  CUDA: _flash_attention_backward
14309
14655
 
14310
- # Returns ouput, logsumexp if compute_logsumexp
14311
- - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
14656
+ # Returns output, logsumexp if compute_logsumexp
14657
+ - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, int? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
14312
14658
  variants: function
14313
14659
  dispatch:
14314
14660
  CUDA: _efficient_attention_forward
14315
14661
  tags: nondeterministic_seeded
14316
14662
 
14317
- - func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int max_seqlen_k, int max_seqlen_q, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
14663
+ - func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
14318
14664
  device_check: NoCheck
14319
14665
  variants: function
14320
14666
  dispatch:
@@ -14422,12 +14768,16 @@
14422
14768
  tags: pointwise
14423
14769
 
14424
14770
  - func: special_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
14771
+ dispatch:
14772
+ CompositeExplicitAutograd: special_chebyshev_polynomial_t
14425
14773
  device_check: NoCheck
14426
14774
  python_module: special
14427
14775
  variants: function
14428
14776
  tags: pointwise
14429
14777
 
14430
14778
  - func: special_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
14779
+ dispatch:
14780
+ CompositeExplicitAutograd: special_chebyshev_polynomial_t
14431
14781
  device_check: NoCheck
14432
14782
  python_module: special
14433
14783
  variants: function
@@ -14444,6 +14794,8 @@
14444
14794
  tags: pointwise
14445
14795
 
14446
14796
  - func: special_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
14797
+ dispatch:
14798
+ CompositeExplicitAutograd: special_chebyshev_polynomial_t_out
14447
14799
  device_check: NoCheck
14448
14800
  python_module: special
14449
14801
  variants: function
@@ -14465,12 +14817,16 @@
14465
14817
  tags: pointwise
14466
14818
 
14467
14819
  - func: special_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
14820
+ dispatch:
14821
+ CompositeExplicitAutograd: special_chebyshev_polynomial_u
14468
14822
  device_check: NoCheck
14469
14823
  python_module: special
14470
14824
  variants: function
14471
14825
  tags: pointwise
14472
14826
 
14473
14827
  - func: special_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
14828
+ dispatch:
14829
+ CompositeExplicitAutograd: special_chebyshev_polynomial_u
14474
14830
  device_check: NoCheck
14475
14831
  python_module: special
14476
14832
  variants: function
@@ -14487,6 +14843,8 @@
14487
14843
  tags: pointwise
14488
14844
 
14489
14845
  - func: special_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
14846
+ dispatch:
14847
+ CompositeExplicitAutograd: special_chebyshev_polynomial_u_out
14490
14848
  device_check: NoCheck
14491
14849
  python_module: special
14492
14850
  variants: function
@@ -14508,12 +14866,16 @@
14508
14866
  tags: pointwise
14509
14867
 
14510
14868
  - func: special_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
14869
+ dispatch:
14870
+ CompositeExplicitAutograd: special_chebyshev_polynomial_v
14511
14871
  device_check: NoCheck
14512
14872
  python_module: special
14513
14873
  variants: function
14514
14874
  tags: pointwise
14515
14875
 
14516
14876
  - func: special_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
14877
+ dispatch:
14878
+ CompositeExplicitAutograd: special_chebyshev_polynomial_v
14517
14879
  device_check: NoCheck
14518
14880
  python_module: special
14519
14881
  variants: function
@@ -14530,6 +14892,8 @@
14530
14892
  tags: pointwise
14531
14893
 
14532
14894
  - func: special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
14895
+ dispatch:
14896
+ CompositeExplicitAutograd: special_chebyshev_polynomial_v_out
14533
14897
  device_check: NoCheck
14534
14898
  python_module: special
14535
14899
  variants: function
@@ -14551,12 +14915,16 @@
14551
14915
  tags: pointwise
14552
14916
 
14553
14917
  - func: special_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
14918
+ dispatch:
14919
+ CompositeExplicitAutograd: special_chebyshev_polynomial_w
14554
14920
  device_check: NoCheck
14555
14921
  python_module: special
14556
14922
  variants: function
14557
14923
  tags: pointwise
14558
14924
 
14559
14925
  - func: special_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
14926
+ dispatch:
14927
+ CompositeExplicitAutograd: special_chebyshev_polynomial_w
14560
14928
  device_check: NoCheck
14561
14929
  python_module: special
14562
14930
  variants: function
@@ -14573,6 +14941,8 @@
14573
14941
  tags: pointwise
14574
14942
 
14575
14943
  - func: special_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
14944
+ dispatch:
14945
+ CompositeExplicitAutograd: special_chebyshev_polynomial_w_out
14576
14946
  device_check: NoCheck
14577
14947
  python_module: special
14578
14948
  variants: function
@@ -14594,12 +14964,16 @@
14594
14964
  tags: pointwise
14595
14965
 
14596
14966
  - func: special_hermite_polynomial_h.x_scalar(Scalar x, Tensor n) -> Tensor
14967
+ dispatch:
14968
+ CompositeExplicitAutograd: special_hermite_polynomial_h
14597
14969
  device_check: NoCheck
14598
14970
  python_module: special
14599
14971
  variants: function
14600
14972
  tags: pointwise
14601
14973
 
14602
14974
  - func: special_hermite_polynomial_h.n_scalar(Tensor x, Scalar n) -> Tensor
14975
+ dispatch:
14976
+ CompositeExplicitAutograd: special_hermite_polynomial_h
14603
14977
  device_check: NoCheck
14604
14978
  python_module: special
14605
14979
  variants: function
@@ -14616,6 +14990,8 @@
14616
14990
  tags: pointwise
14617
14991
 
14618
14992
  - func: special_hermite_polynomial_h.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
14993
+ dispatch:
14994
+ CompositeExplicitAutograd: special_hermite_polynomial_h_out
14619
14995
  device_check: NoCheck
14620
14996
  python_module: special
14621
14997
  variants: function
@@ -14637,12 +15013,16 @@
14637
15013
  tags: pointwise
14638
15014
 
14639
15015
  - func: special_hermite_polynomial_he.x_scalar(Scalar x, Tensor n) -> Tensor
15016
+ dispatch:
15017
+ CompositeExplicitAutograd: special_hermite_polynomial_he
14640
15018
  device_check: NoCheck
14641
15019
  python_module: special
14642
15020
  variants: function
14643
15021
  tags: pointwise
14644
15022
 
14645
15023
  - func: special_hermite_polynomial_he.n_scalar(Tensor x, Scalar n) -> Tensor
15024
+ dispatch:
15025
+ CompositeExplicitAutograd: special_hermite_polynomial_he
14646
15026
  device_check: NoCheck
14647
15027
  python_module: special
14648
15028
  variants: function
@@ -14659,6 +15039,8 @@
14659
15039
  tags: pointwise
14660
15040
 
14661
15041
  - func: special_hermite_polynomial_he.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15042
+ dispatch:
15043
+ CompositeExplicitAutograd: special_hermite_polynomial_he_out
14662
15044
  device_check: NoCheck
14663
15045
  python_module: special
14664
15046
  variants: function
@@ -14680,12 +15062,16 @@
14680
15062
  tags: pointwise
14681
15063
 
14682
15064
  - func: special_laguerre_polynomial_l.x_scalar(Scalar x, Tensor n) -> Tensor
15065
+ dispatch:
15066
+ CompositeExplicitAutograd: special_laguerre_polynomial_l
14683
15067
  device_check: NoCheck
14684
15068
  python_module: special
14685
15069
  variants: function
14686
15070
  tags: pointwise
14687
15071
 
14688
15072
  - func: special_laguerre_polynomial_l.n_scalar(Tensor x, Scalar n) -> Tensor
15073
+ dispatch:
15074
+ CompositeExplicitAutograd: special_laguerre_polynomial_l
14689
15075
  device_check: NoCheck
14690
15076
  python_module: special
14691
15077
  variants: function
@@ -14702,6 +15088,8 @@
14702
15088
  tags: pointwise
14703
15089
 
14704
15090
  - func: special_laguerre_polynomial_l.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15091
+ dispatch:
15092
+ CompositeExplicitAutograd: special_laguerre_polynomial_l_out
14705
15093
  device_check: NoCheck
14706
15094
  python_module: special
14707
15095
  variants: function
@@ -14723,12 +15111,16 @@
14723
15111
  tags: pointwise
14724
15112
 
14725
15113
  - func: special_legendre_polynomial_p.x_scalar(Scalar x, Tensor n) -> Tensor
15114
+ dispatch:
15115
+ CompositeExplicitAutograd: special_legendre_polynomial_p
14726
15116
  device_check: NoCheck
14727
15117
  python_module: special
14728
15118
  variants: function
14729
15119
  tags: pointwise
14730
15120
 
14731
15121
  - func: special_legendre_polynomial_p.n_scalar(Tensor x, Scalar n) -> Tensor
15122
+ dispatch:
15123
+ CompositeExplicitAutograd: special_legendre_polynomial_p
14732
15124
  device_check: NoCheck
14733
15125
  python_module: special
14734
15126
  variants: function
@@ -14745,6 +15137,8 @@
14745
15137
  tags: pointwise
14746
15138
 
14747
15139
  - func: special_legendre_polynomial_p.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15140
+ dispatch:
15141
+ CompositeExplicitAutograd: special_legendre_polynomial_p_out
14748
15142
  device_check: NoCheck
14749
15143
  python_module: special
14750
15144
  variants: function
@@ -14856,12 +15250,16 @@
14856
15250
  tags: pointwise
14857
15251
 
14858
15252
  - func: special_shifted_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
15253
+ dispatch:
15254
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
14859
15255
  device_check: NoCheck
14860
15256
  python_module: special
14861
15257
  variants: function
14862
15258
  tags: pointwise
14863
15259
 
14864
15260
  - func: special_shifted_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
15261
+ dispatch:
15262
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
14865
15263
  device_check: NoCheck
14866
15264
  python_module: special
14867
15265
  variants: function
@@ -14878,6 +15276,8 @@
14878
15276
  tags: pointwise
14879
15277
 
14880
15278
  - func: special_shifted_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15279
+ dispatch:
15280
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t_out
14881
15281
  device_check: NoCheck
14882
15282
  python_module: special
14883
15283
  variants: function
@@ -14899,12 +15299,16 @@
14899
15299
  tags: pointwise
14900
15300
 
14901
15301
  - func: special_shifted_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
15302
+ dispatch:
15303
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
14902
15304
  device_check: NoCheck
14903
15305
  python_module: special
14904
15306
  variants: function
14905
15307
  tags: pointwise
14906
15308
 
14907
15309
  - func: special_shifted_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
15310
+ dispatch:
15311
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
14908
15312
  device_check: NoCheck
14909
15313
  python_module: special
14910
15314
  variants: function
@@ -14921,6 +15325,8 @@
14921
15325
  tags: pointwise
14922
15326
 
14923
15327
  - func: special_shifted_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15328
+ dispatch:
15329
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u_out
14924
15330
  device_check: NoCheck
14925
15331
  python_module: special
14926
15332
  variants: function
@@ -14942,12 +15348,16 @@
14942
15348
  tags: pointwise
14943
15349
 
14944
15350
  - func: special_shifted_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
15351
+ dispatch:
15352
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
14945
15353
  device_check: NoCheck
14946
15354
  python_module: special
14947
15355
  variants: function
14948
15356
  tags: pointwise
14949
15357
 
14950
15358
  - func: special_shifted_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
15359
+ dispatch:
15360
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
14951
15361
  device_check: NoCheck
14952
15362
  python_module: special
14953
15363
  variants: function
@@ -14964,6 +15374,8 @@
14964
15374
  tags: pointwise
14965
15375
 
14966
15376
  - func: special_shifted_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15377
+ dispatch:
15378
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v_out
14967
15379
  device_check: NoCheck
14968
15380
  python_module: special
14969
15381
  variants: function
@@ -14985,12 +15397,16 @@
14985
15397
  tags: pointwise
14986
15398
 
14987
15399
  - func: special_shifted_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
15400
+ dispatch:
15401
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
14988
15402
  device_check: NoCheck
14989
15403
  python_module: special
14990
15404
  variants: function
14991
15405
  tags: pointwise
14992
15406
 
14993
15407
  - func: special_shifted_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
15408
+ dispatch:
15409
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
14994
15410
  device_check: NoCheck
14995
15411
  python_module: special
14996
15412
  variants: function
@@ -15007,6 +15423,8 @@
15007
15423
  tags: pointwise
15008
15424
 
15009
15425
  - func: special_shifted_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15426
+ dispatch:
15427
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w_out
15010
15428
  device_check: NoCheck
15011
15429
  python_module: special
15012
15430
  variants: function
@@ -15075,6 +15493,22 @@
15075
15493
  CUDA: _fused_adamw_kernel_cuda_
15076
15494
  autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
15077
15495
 
15496
+ - func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
15497
+ # Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
15498
+ variants: function
15499
+ dispatch:
15500
+ CUDA: _fused_sgd_kernel_cuda_
15501
+ autogen: _fused_sgd, _fused_sgd.out
15502
+
15503
+ - func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
15504
+ # Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
15505
+ # but still skip the device check as the Tensor LR can be on CPU
15506
+ device_check: NoCheck
15507
+ variants: function
15508
+ dispatch:
15509
+ CUDA: _fused_sgd_kernel_cuda_
15510
+ autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
15511
+
15078
15512
  # This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
15079
15513
  - func: _propagate_xla_data(Tensor input, Tensor output) -> ()
15080
15514
  variants: function