torch-rb 0.14.1 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +4 -6
- data/codegen/native_functions.yaml +552 -118
- data/ext/torch/extconf.rb +3 -0
- data/ext/torch/templates.h +0 -23
- data/ext/torch/tensor.cpp +1 -0
- data/ext/torch/utils.h +1 -1
- data/lib/torch/inspector.rb +8 -3
- data/lib/torch/nn/elu.rb +20 -0
- data/lib/torch/nn/functional.rb +12 -0
- data/lib/torch/nn/gelu.rb +18 -0
- data/lib/torch/nn/leaky_relu.rb +1 -1
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +2 -0
- metadata +6 -11
- data/ext/torch/fft_functions.h +0 -6
- data/ext/torch/linalg_functions.h +0 -6
- data/ext/torch/nn_functions.h +0 -6
- data/ext/torch/sparse_functions.h +0 -6
- data/ext/torch/special_functions.h +0 -6
- data/ext/torch/tensor_functions.h +0 -6
- data/ext/torch/torch_functions.h +0 -6
@@ -134,7 +134,7 @@
|
|
134
134
|
autogen: _new_zeros_with_same_feature_meta.out
|
135
135
|
|
136
136
|
# This function compares the storage numel of self with that of other, where
|
137
|
-
# storage numel is
|
137
|
+
# storage numel is computed as: `other.storage().nbytes() / other.itemsize()`.
|
138
138
|
# We create this function for composite compliance purposes. The batching rule
|
139
139
|
# always returns true because vmapped as_strided does not support accessing
|
140
140
|
# storage locations not indexable by the input tensor.
|
@@ -175,17 +175,29 @@
|
|
175
175
|
CPU: _assert_async_msg_cpu
|
176
176
|
CUDA: _assert_async_msg_cuda
|
177
177
|
|
178
|
+
- func: _assert_scalar(Scalar self, str assert_msg) -> ()
|
179
|
+
dispatch:
|
180
|
+
CompositeExplicitAutograd: _assert_scalar
|
181
|
+
|
182
|
+
- func: _functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor
|
183
|
+
dispatch:
|
184
|
+
CompositeExplicitAutograd: _functional_assert_scalar
|
185
|
+
|
178
186
|
- func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
|
179
187
|
dispatch:
|
180
188
|
CPU: _functional_assert_async_msg_cpu
|
181
189
|
|
182
190
|
- func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
|
183
191
|
|
192
|
+
- func: _print(str s) -> ()
|
193
|
+
dispatch:
|
194
|
+
CompositeExplicitAutograd: _print
|
195
|
+
|
184
196
|
- func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
|
185
197
|
dispatch:
|
186
198
|
CompositeExplicitAutograd: sym_constrain_range
|
187
199
|
|
188
|
-
- func: sym_constrain_range_for_size(Scalar size, *, int? min, int? max) -> ()
|
200
|
+
- func: sym_constrain_range_for_size(Scalar size, *, int? min=None, int? max=None) -> ()
|
189
201
|
dispatch:
|
190
202
|
CompositeExplicitAutograd: sym_constrain_range_for_size
|
191
203
|
|
@@ -431,6 +443,7 @@
|
|
431
443
|
structured_inherits: TensorIteratorBase
|
432
444
|
dispatch:
|
433
445
|
CPU, CUDA: sgn_out
|
446
|
+
MPS: sgn_out_mps
|
434
447
|
SparseCPU, SparseCUDA: sgn_sparse_out
|
435
448
|
SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
|
436
449
|
tags: pointwise
|
@@ -469,6 +482,7 @@
|
|
469
482
|
- func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
470
483
|
dispatch:
|
471
484
|
CPU, CUDA: conj_physical_out
|
485
|
+
MPS: conj_physical_out_mps
|
472
486
|
SparseCPU, SparseCUDA: conj_physical_out_sparse
|
473
487
|
SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
|
474
488
|
tags: pointwise
|
@@ -563,8 +577,8 @@
|
|
563
577
|
dispatch:
|
564
578
|
SparseCPU: add_out_sparse_cpu
|
565
579
|
SparseCUDA: add_out_sparse_cuda
|
566
|
-
SparseCsrCPU:
|
567
|
-
SparseCsrCUDA:
|
580
|
+
SparseCsrCPU: add_out_sparse_compressed_cpu
|
581
|
+
SparseCsrCUDA: add_out_sparse_compressed_cuda
|
568
582
|
MkldnnCPU: mkldnn_add_out
|
569
583
|
MPS: add_out_mps
|
570
584
|
tags: pointwise
|
@@ -681,15 +695,29 @@
|
|
681
695
|
structured_delegate: all.out
|
682
696
|
variants: function, method
|
683
697
|
|
698
|
+
- func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
|
699
|
+
device_check: NoCheck # TensorIterator
|
700
|
+
structured_delegate: all.dims_out
|
701
|
+
variants: function, method
|
702
|
+
cpp_no_default_args: ['dim']
|
703
|
+
dispatch:
|
704
|
+
CompositeExplicitAutograd: all_dims_default
|
705
|
+
|
684
706
|
- func: all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
685
707
|
device_check: NoCheck # TensorIterator
|
686
708
|
structured: True
|
687
|
-
precomputed:
|
688
|
-
- dim -> int dim
|
689
709
|
dispatch:
|
690
710
|
CPU, CUDA: all_out
|
691
711
|
MPS: all_out_mps
|
692
712
|
|
713
|
+
- func: all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
714
|
+
device_check: NoCheck # TensorIterator
|
715
|
+
structured: True
|
716
|
+
dispatch:
|
717
|
+
CPU, CUDA: all_dims_out
|
718
|
+
CompositeExplicitAutograd: all_dims_out_default
|
719
|
+
cpp_no_default_args: ['dim']
|
720
|
+
|
693
721
|
- func: all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
|
694
722
|
device_check: NoCheck # TensorIterator
|
695
723
|
variants: function, method
|
@@ -709,15 +737,30 @@
|
|
709
737
|
variants: function, method
|
710
738
|
tags: core
|
711
739
|
|
740
|
+
- func: any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
|
741
|
+
device_check: NoCheck # TensorIterator
|
742
|
+
structured_delegate: any.dims_out
|
743
|
+
variants: function, method
|
744
|
+
cpp_no_default_args: ['dim']
|
745
|
+
tags: core
|
746
|
+
dispatch:
|
747
|
+
CompositeExplicitAutograd: any_dims_default
|
748
|
+
|
712
749
|
- func: any.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
713
750
|
device_check: NoCheck # TensorIterator
|
714
751
|
structured: True
|
715
|
-
precomputed:
|
716
|
-
- dim -> int dim
|
717
752
|
dispatch:
|
718
753
|
CPU, CUDA: any_out
|
719
754
|
MPS: any_out_mps
|
720
755
|
|
756
|
+
- func: any.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
757
|
+
device_check: NoCheck # TensorIterator
|
758
|
+
structured: True
|
759
|
+
dispatch:
|
760
|
+
CPU, CUDA: any_dims_out
|
761
|
+
CompositeExplicitAutograd: any_dims_out_default
|
762
|
+
cpp_no_default_args: ['dim']
|
763
|
+
|
721
764
|
- func: any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
|
722
765
|
device_check: NoCheck # TensorIterator
|
723
766
|
variants: function, method
|
@@ -733,7 +776,7 @@
|
|
733
776
|
dispatch:
|
734
777
|
CompositeExplicitAutograd: arange
|
735
778
|
|
736
|
-
# This operator should be named `
|
779
|
+
# This operator should be named `arange.start_out` if following the naming convention. However that
|
737
780
|
# name is already taken. Disabled because of CI job failures.
|
738
781
|
# FIXME: enable this
|
739
782
|
#- func: arange.start_out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1190,6 +1233,13 @@
|
|
1190
1233
|
CompositeExplicitAutograd: copysign_out
|
1191
1234
|
tags: pointwise
|
1192
1235
|
|
1236
|
+
- func: _lazy_clone(Tensor self) -> Tensor
|
1237
|
+
# Like clone, but the copy takes place lazily, only if either the
|
1238
|
+
# input or the output are written.
|
1239
|
+
variants: function, method
|
1240
|
+
dispatch:
|
1241
|
+
CompositeExplicitAutograd: _lazy_clone
|
1242
|
+
|
1193
1243
|
- func: logical_not(Tensor self) -> Tensor
|
1194
1244
|
device_check: NoCheck # TensorIterator
|
1195
1245
|
variants: function, method
|
@@ -1326,6 +1376,7 @@
|
|
1326
1376
|
dispatch:
|
1327
1377
|
SparseCPU, SparseCUDA: cat_sparse
|
1328
1378
|
QuantizedCPU: cat_quantized_cpu
|
1379
|
+
NestedTensorCPU, NestedTensorCUDA: cat_nested
|
1329
1380
|
tags: core
|
1330
1381
|
|
1331
1382
|
- func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1590,6 +1641,7 @@
|
|
1590
1641
|
- func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
|
1591
1642
|
dispatch:
|
1592
1643
|
CPU, CUDA: complex_out
|
1644
|
+
MPS: complex_out_mps
|
1593
1645
|
|
1594
1646
|
- func: polar(Tensor abs, Tensor angle) -> Tensor
|
1595
1647
|
variants: function
|
@@ -1613,59 +1665,67 @@
|
|
1613
1665
|
variants: method
|
1614
1666
|
manual_cpp_binding: True
|
1615
1667
|
|
1616
|
-
- func: convolution(Tensor input, Tensor weight, Tensor? bias,
|
1668
|
+
- func: convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
|
1617
1669
|
dispatch:
|
1618
1670
|
CompositeExplicitAutograd: convolution
|
1619
1671
|
autogen: convolution.out
|
1620
1672
|
tags: core
|
1621
1673
|
|
1622
|
-
- func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes,
|
1674
|
+
- func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
1623
1675
|
dispatch:
|
1624
1676
|
CompositeExplicitAutograd, CUDA: convolution_backward
|
1625
1677
|
autogen: convolution_backward.out
|
1626
1678
|
tags: core
|
1627
1679
|
|
1628
|
-
- func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias,
|
1680
|
+
- func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
|
1629
1681
|
dispatch:
|
1630
1682
|
CompositeExplicitAutograd: convolution_overrideable
|
1631
1683
|
autogen: convolution_overrideable.out
|
1632
1684
|
|
1633
|
-
- func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight,
|
1685
|
+
- func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
|
1634
1686
|
dispatch:
|
1635
1687
|
CompositeExplicitAutograd: convolution_backward_overrideable
|
1636
1688
|
autogen: convolution_backward_overrideable.out
|
1637
1689
|
|
1638
|
-
- func: _convolution(Tensor input, Tensor weight, Tensor? bias,
|
1690
|
+
- func: _convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
|
1639
1691
|
dispatch:
|
1640
1692
|
CompositeExplicitAutograd: _convolution
|
1641
1693
|
autogen: _convolution.out
|
1642
1694
|
|
1643
|
-
- func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias,
|
1695
|
+
- func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, int[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
|
1644
1696
|
|
1645
|
-
- func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias,
|
1697
|
+
- func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, str padding, SymInt[] dilation, SymInt groups) -> Tensor
|
1698
|
+
dispatch:
|
1699
|
+
CompositeImplicitAutograd: _convolution_mode_symint
|
1646
1700
|
|
1647
|
-
- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self,
|
1701
|
+
- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
1648
1702
|
|
1649
|
-
- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None,
|
1703
|
+
- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] dilation=1, SymInt groups=1) -> Tensor
|
1650
1704
|
dispatch:
|
1651
1705
|
CompositeImplicitAutograd: conv1d_symint
|
1652
1706
|
|
1653
|
-
- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None,
|
1707
|
+
- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, SymInt groups=1) -> Tensor
|
1654
1708
|
dispatch:
|
1655
1709
|
CompositeImplicitAutograd: conv2d_symint
|
1656
1710
|
|
1657
|
-
- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None,
|
1711
|
+
- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
|
1658
1712
|
dispatch:
|
1659
1713
|
CompositeImplicitAutograd: conv3d_symint
|
1660
1714
|
|
1661
|
-
- func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None,
|
1715
|
+
- func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, str padding="valid", SymInt[1] dilation=1, SymInt groups=1) -> Tensor
|
1662
1716
|
cpp_no_default_args: ['bias', 'stride', 'padding']
|
1717
|
+
dispatch:
|
1718
|
+
CompositeImplicitAutograd: conv1d_padding_symint
|
1663
1719
|
|
1664
|
-
- func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None,
|
1720
|
+
- func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, str padding="valid", SymInt[2] dilation=1, SymInt groups=1) -> Tensor
|
1665
1721
|
cpp_no_default_args: ['bias', 'stride', 'padding']
|
1722
|
+
dispatch:
|
1723
|
+
CompositeImplicitAutograd: conv2d_padding_symint
|
1666
1724
|
|
1667
|
-
- func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None,
|
1725
|
+
- func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, str padding="valid", SymInt[3] dilation=1, SymInt groups=1) -> Tensor
|
1668
1726
|
cpp_no_default_args: ['bias', 'stride', 'padding']
|
1727
|
+
dispatch:
|
1728
|
+
CompositeImplicitAutograd: conv3d_padding_symint
|
1669
1729
|
|
1670
1730
|
- func: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
|
1671
1731
|
dispatch:
|
@@ -1675,15 +1735,15 @@
|
|
1675
1735
|
- func: conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)
|
1676
1736
|
|
1677
1737
|
# NB: we inherit the goofy argument order from PyTorch torch.nn.functional
|
1678
|
-
- func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None,
|
1738
|
+
- func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, SymInt groups=1, SymInt[1] dilation=1) -> Tensor
|
1679
1739
|
dispatch:
|
1680
1740
|
CompositeImplicitAutograd: conv_transpose1d_symint
|
1681
1741
|
|
1682
|
-
- func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None,
|
1742
|
+
- func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt groups=1, SymInt[2] dilation=1) -> Tensor
|
1683
1743
|
dispatch:
|
1684
1744
|
CompositeImplicitAutograd: conv_transpose2d_symint
|
1685
1745
|
|
1686
|
-
- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None,
|
1746
|
+
- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt groups=1, SymInt[3] dilation=1) -> Tensor
|
1687
1747
|
dispatch:
|
1688
1748
|
CompositeImplicitAutograd: conv_transpose3d_symint
|
1689
1749
|
|
@@ -1691,6 +1751,7 @@
|
|
1691
1751
|
variants: function
|
1692
1752
|
dispatch:
|
1693
1753
|
CompositeExplicitAutogradNonFunctional: copy
|
1754
|
+
tags: core
|
1694
1755
|
|
1695
1756
|
- func: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
|
1696
1757
|
variants: method
|
@@ -1720,6 +1781,8 @@
|
|
1720
1781
|
device_check: NoCheck # TensorIterator
|
1721
1782
|
variants: function, method
|
1722
1783
|
structured_delegate: cos.out
|
1784
|
+
dispatch:
|
1785
|
+
NestedTensorCPU, NestedTensorCUDA: cos_nested
|
1723
1786
|
tags: [core, pointwise]
|
1724
1787
|
|
1725
1788
|
- func: cos_(Tensor(a!) self) -> Tensor(a!)
|
@@ -1802,32 +1865,35 @@
|
|
1802
1865
|
CUDA: cudnn_batch_norm_backward
|
1803
1866
|
autogen: cudnn_batch_norm_backward.out
|
1804
1867
|
|
1805
|
-
- func: cudnn_convolution(Tensor self, Tensor weight,
|
1868
|
+
- func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1806
1869
|
dispatch:
|
1807
1870
|
CUDA: cudnn_convolution
|
1808
|
-
autogen: cudnn_convolution.out
|
1809
1871
|
|
1810
|
-
- func:
|
1872
|
+
- func: cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
|
1873
|
+
dispatch:
|
1874
|
+
CUDA: cudnn_convolution_out
|
1875
|
+
|
1876
|
+
- func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1811
1877
|
dispatch:
|
1812
1878
|
CUDA: cudnn_convolution_transpose
|
1813
1879
|
autogen: cudnn_convolution_transpose.out
|
1814
1880
|
|
1815
|
-
- func: _mps_convolution_transpose(Tensor self, Tensor weight,
|
1881
|
+
- func: _mps_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
|
1816
1882
|
dispatch:
|
1817
1883
|
MPS: _mps_convolution_transpose
|
1818
1884
|
autogen: _mps_convolution_transpose.out
|
1819
1885
|
|
1820
|
-
- func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight,
|
1886
|
+
- func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[2] output_mask) -> (Tensor, Tensor)
|
1821
1887
|
dispatch:
|
1822
1888
|
MPS: mps_convolution_transpose_backward
|
1823
1889
|
autogen: mps_convolution_transpose_backward.out
|
1824
1890
|
|
1825
|
-
- func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias,
|
1891
|
+
- func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
|
1826
1892
|
dispatch:
|
1827
1893
|
CUDA: cudnn_convolution_relu
|
1828
1894
|
autogen: cudnn_convolution_relu.out
|
1829
1895
|
|
1830
|
-
- func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias,
|
1896
|
+
- func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
|
1831
1897
|
dispatch:
|
1832
1898
|
CUDA: cudnn_convolution_add_relu
|
1833
1899
|
autogen: cudnn_convolution_add_relu.out
|
@@ -1967,6 +2033,7 @@
|
|
1967
2033
|
dispatch:
|
1968
2034
|
CPU: ctc_loss_cpu
|
1969
2035
|
CUDA: ctc_loss_gpu
|
2036
|
+
Meta: ctc_loss_meta
|
1970
2037
|
autogen: _ctc_loss.out
|
1971
2038
|
tags: dynamic_output_shape # the shape of second output is data dependent
|
1972
2039
|
|
@@ -1999,6 +2066,7 @@
|
|
1999
2066
|
variants: function, method
|
2000
2067
|
dispatch:
|
2001
2068
|
CompositeExplicitAutograd: diagonal
|
2069
|
+
tags: core
|
2002
2070
|
|
2003
2071
|
- func: linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a)
|
2004
2072
|
python_module: linalg
|
@@ -2079,7 +2147,7 @@
|
|
2079
2147
|
structured_delegate: div.out_mode
|
2080
2148
|
dispatch:
|
2081
2149
|
SparseCPU, SparseCUDA: div_sparse
|
2082
|
-
tags: pointwise
|
2150
|
+
tags: [core, pointwise]
|
2083
2151
|
|
2084
2152
|
- func: div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!)
|
2085
2153
|
device_check: NoCheck # TensorIterator
|
@@ -2120,7 +2188,7 @@
|
|
2120
2188
|
variants: function, method
|
2121
2189
|
dispatch:
|
2122
2190
|
CompositeExplicitAutograd: div
|
2123
|
-
tags: pointwise
|
2191
|
+
tags: [core, pointwise]
|
2124
2192
|
|
2125
2193
|
- func: div_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!)
|
2126
2194
|
variants: method
|
@@ -2302,7 +2370,7 @@
|
|
2302
2370
|
Meta: empty_meta_symint
|
2303
2371
|
MkldnnCPU: empty_mkldnn
|
2304
2372
|
SparseCPU, SparseCUDA, SparseMeta: empty_sparse
|
2305
|
-
SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
|
2373
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
|
2306
2374
|
QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
|
2307
2375
|
tags: core
|
2308
2376
|
|
@@ -2370,7 +2438,7 @@
|
|
2370
2438
|
variants: method
|
2371
2439
|
device_check: NoCheck
|
2372
2440
|
device_guard: False
|
2373
|
-
tags: inplace_view
|
2441
|
+
tags: [core, inplace_view]
|
2374
2442
|
dispatch:
|
2375
2443
|
Meta: resize__symint
|
2376
2444
|
CPU: resize_
|
@@ -2408,7 +2476,7 @@
|
|
2408
2476
|
CompositeExplicitAutograd: empty_like
|
2409
2477
|
QuantizedCPU, QuantizedCUDA: empty_like_quantized
|
2410
2478
|
SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
|
2411
|
-
SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
|
2479
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
|
2412
2480
|
NestedTensorCPU, NestedTensorCUDA: empty_like_nested
|
2413
2481
|
autogen: empty_like.out
|
2414
2482
|
|
@@ -2517,7 +2585,7 @@
|
|
2517
2585
|
dispatch:
|
2518
2586
|
SparseCPU, SparseCUDA: expm1_sparse
|
2519
2587
|
SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
|
2520
|
-
tags: pointwise
|
2588
|
+
tags: [core, pointwise]
|
2521
2589
|
|
2522
2590
|
- func: expm1_(Tensor(a!) self) -> Tensor(a!)
|
2523
2591
|
device_check: NoCheck # TensorIterator
|
@@ -2684,10 +2752,15 @@
|
|
2684
2752
|
- func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
|
2685
2753
|
device_check: NoCheck # TensorIterator
|
2686
2754
|
variants: function, method
|
2755
|
+
dispatch:
|
2756
|
+
CompositeExplicitAutograd: floor_divide
|
2687
2757
|
|
2688
2758
|
- func: floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
2689
2759
|
device_check: NoCheck # TensorIterator
|
2690
2760
|
variants: method
|
2761
|
+
dispatch:
|
2762
|
+
CompositeExplicitAutograd: floor_divide_
|
2763
|
+
autogen: floor_divide.Scalar_out
|
2691
2764
|
|
2692
2765
|
- func: frac(Tensor self) -> Tensor
|
2693
2766
|
device_check: NoCheck # TensorIterator
|
@@ -2905,12 +2978,14 @@
|
|
2905
2978
|
dispatch:
|
2906
2979
|
CPU: _fft_r2c_mkl
|
2907
2980
|
CUDA: _fft_r2c_cufft
|
2981
|
+
MPS: _fft_r2c_mps
|
2908
2982
|
|
2909
2983
|
- func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
|
2910
2984
|
variants: function
|
2911
2985
|
dispatch:
|
2912
2986
|
CPU: _fft_r2c_mkl_out
|
2913
2987
|
CUDA: _fft_r2c_cufft_out
|
2988
|
+
MPS: _fft_r2c_mps_out
|
2914
2989
|
|
2915
2990
|
# Complex to real inverse FFT
|
2916
2991
|
- func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
|
@@ -2918,12 +2993,14 @@
|
|
2918
2993
|
dispatch:
|
2919
2994
|
CPU: _fft_c2r_mkl
|
2920
2995
|
CUDA: _fft_c2r_cufft
|
2996
|
+
MPS: _fft_c2r_mps
|
2921
2997
|
|
2922
2998
|
- func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
|
2923
2999
|
variants: function
|
2924
3000
|
dispatch:
|
2925
3001
|
CPU: _fft_c2r_mkl_out
|
2926
3002
|
CUDA: _fft_c2r_cufft_out
|
3003
|
+
MPS: _fft_c2r_mps_out
|
2927
3004
|
|
2928
3005
|
# Standard complex to complex FFT (forward or backward)
|
2929
3006
|
- func: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
|
@@ -2931,12 +3008,14 @@
|
|
2931
3008
|
dispatch:
|
2932
3009
|
CPU: _fft_c2c_mkl
|
2933
3010
|
CUDA: _fft_c2c_cufft
|
3011
|
+
MPS: _fft_c2c_mps
|
2934
3012
|
|
2935
3013
|
- func: _fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)
|
2936
3014
|
variants: function
|
2937
3015
|
dispatch:
|
2938
3016
|
CPU: _fft_c2c_mkl_out
|
2939
3017
|
CUDA: _fft_c2c_cufft_out
|
3018
|
+
MPS: _fft_c2c_mps_out
|
2940
3019
|
|
2941
3020
|
- func: _validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()
|
2942
3021
|
device_check: NoCheck
|
@@ -2979,7 +3058,7 @@
|
|
2979
3058
|
- func: _unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
|
2980
3059
|
variants: function
|
2981
3060
|
dispatch:
|
2982
|
-
|
3061
|
+
CompositeExplicitAutograd: _unsafe_index
|
2983
3062
|
|
2984
3063
|
- func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
|
2985
3064
|
structured: True
|
@@ -3253,14 +3332,22 @@
|
|
3253
3332
|
dispatch:
|
3254
3333
|
CUDA: _cslt_compress
|
3255
3334
|
|
3256
|
-
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, bool transpose_result=False) -> Tensor
|
3335
|
+
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
|
3257
3336
|
dispatch:
|
3258
3337
|
CUDA: _cslt_sparse_mm
|
3259
3338
|
|
3260
|
-
- func:
|
3339
|
+
- func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
|
3340
|
+
dispatch:
|
3341
|
+
CUDA: _cslt_sparse_mm_search
|
3342
|
+
|
3343
|
+
- func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
|
3261
3344
|
dispatch:
|
3262
3345
|
CUDA: _sparse_semi_structured_linear
|
3263
3346
|
|
3347
|
+
- func: _mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor
|
3348
|
+
dispatch:
|
3349
|
+
CUDA: _mixed_dtypes_linear
|
3350
|
+
|
3264
3351
|
- func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
3265
3352
|
|
3266
3353
|
- func: fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
@@ -3291,12 +3378,42 @@
|
|
3291
3378
|
dispatch:
|
3292
3379
|
CompositeExplicitAutograd: linspace
|
3293
3380
|
|
3381
|
+
- func: linspace.Tensor_Tensor(Tensor start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3382
|
+
category_override: factory
|
3383
|
+
dispatch:
|
3384
|
+
CompositeExplicitAutograd: linspace
|
3385
|
+
|
3386
|
+
- func: linspace.Tensor_Scalar(Tensor start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3387
|
+
category_override: factory
|
3388
|
+
dispatch:
|
3389
|
+
CompositeExplicitAutograd: linspace
|
3390
|
+
|
3391
|
+
- func: linspace.Scalar_Tensor(Scalar start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3392
|
+
category_override: factory
|
3393
|
+
dispatch:
|
3394
|
+
CompositeExplicitAutograd: linspace
|
3395
|
+
|
3294
3396
|
- func: linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
|
3295
3397
|
dispatch:
|
3296
3398
|
CPU, Meta: linspace_out
|
3297
3399
|
CUDA: linspace_cuda_out
|
3298
3400
|
MPS: linspace_out_mps
|
3299
3401
|
|
3402
|
+
- func: linspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
|
3403
|
+
category_override: factory
|
3404
|
+
dispatch:
|
3405
|
+
CompositeExplicitAutograd: linspace_out
|
3406
|
+
|
3407
|
+
- func: linspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
|
3408
|
+
category_override: factory
|
3409
|
+
dispatch:
|
3410
|
+
CompositeExplicitAutograd: linspace_out
|
3411
|
+
|
3412
|
+
- func: linspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
|
3413
|
+
category_override: factory
|
3414
|
+
dispatch:
|
3415
|
+
CompositeExplicitAutograd: linspace_out
|
3416
|
+
|
3300
3417
|
- func: log(Tensor self) -> Tensor
|
3301
3418
|
device_check: NoCheck # TensorIterator
|
3302
3419
|
structured_delegate: log.out
|
@@ -3322,7 +3439,7 @@
|
|
3322
3439
|
device_check: NoCheck # TensorIterator
|
3323
3440
|
structured_delegate: log10.out
|
3324
3441
|
variants: function, method
|
3325
|
-
tags: pointwise
|
3442
|
+
tags: [core, pointwise]
|
3326
3443
|
|
3327
3444
|
- func: log10_(Tensor(a!) self) -> Tensor(a!)
|
3328
3445
|
device_check: NoCheck # TensorIterator
|
@@ -3346,7 +3463,7 @@
|
|
3346
3463
|
dispatch:
|
3347
3464
|
SparseCPU, SparseCUDA: log1p_sparse
|
3348
3465
|
SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
|
3349
|
-
tags: pointwise
|
3466
|
+
tags: [core, pointwise]
|
3350
3467
|
|
3351
3468
|
- func: log1p_(Tensor(a!) self) -> Tensor(a!)
|
3352
3469
|
device_check: NoCheck # TensorIterator
|
@@ -3372,7 +3489,7 @@
|
|
3372
3489
|
device_check: NoCheck # TensorIterator
|
3373
3490
|
structured_delegate: log2.out
|
3374
3491
|
variants: function, method
|
3375
|
-
tags: pointwise
|
3492
|
+
tags: [core, pointwise]
|
3376
3493
|
|
3377
3494
|
- func: log2_(Tensor(a!) self) -> Tensor(a!)
|
3378
3495
|
device_check: NoCheck # TensorIterator
|
@@ -3477,11 +3594,41 @@
|
|
3477
3594
|
dispatch:
|
3478
3595
|
CompositeExplicitAutograd: logspace
|
3479
3596
|
|
3597
|
+
- func: logspace.Tensor_Tensor(Tensor start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3598
|
+
category_override: factory
|
3599
|
+
dispatch:
|
3600
|
+
CompositeExplicitAutograd: logspace
|
3601
|
+
|
3602
|
+
- func: logspace.Tensor_Scalar(Tensor start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3603
|
+
category_override: factory
|
3604
|
+
dispatch:
|
3605
|
+
CompositeExplicitAutograd: logspace
|
3606
|
+
|
3607
|
+
- func: logspace.Scalar_Tensor(Scalar start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3608
|
+
category_override: factory
|
3609
|
+
dispatch:
|
3610
|
+
CompositeExplicitAutograd: logspace
|
3611
|
+
|
3480
3612
|
- func: logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
|
3481
3613
|
dispatch:
|
3482
3614
|
CPU, Meta: logspace_out
|
3483
3615
|
CUDA: logspace_cuda_out
|
3484
3616
|
|
3617
|
+
- func: logspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
|
3618
|
+
category_override: factory
|
3619
|
+
dispatch:
|
3620
|
+
CompositeExplicitAutograd: logspace_out
|
3621
|
+
|
3622
|
+
- func: logspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
|
3623
|
+
category_override: factory
|
3624
|
+
dispatch:
|
3625
|
+
CompositeExplicitAutograd: logspace_out
|
3626
|
+
|
3627
|
+
- func: logspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
|
3628
|
+
category_override: factory
|
3629
|
+
dispatch:
|
3630
|
+
CompositeExplicitAutograd: logspace_out
|
3631
|
+
|
3485
3632
|
# log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
|
3486
3633
|
- func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
|
3487
3634
|
variants: function, method
|
@@ -3847,17 +3994,17 @@
|
|
3847
3994
|
# TODO: Add this function to MPS dispatch key so that we avoid declaring it in
|
3848
3995
|
# native_functions.yaml
|
3849
3996
|
# https://github.com/pytorch/pytorch/issues/77394
|
3850
|
-
- func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias,
|
3997
|
+
- func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
|
3851
3998
|
dispatch:
|
3852
3999
|
MPS: _mps_convolution
|
3853
4000
|
autogen: _mps_convolution.out
|
3854
4001
|
|
3855
|
-
- func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight,
|
4002
|
+
- func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
3856
4003
|
dispatch:
|
3857
4004
|
MPS: mps_convolution_backward
|
3858
4005
|
autogen: mps_convolution_backward.out
|
3859
4006
|
|
3860
|
-
- func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding,
|
4007
|
+
- func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
|
3861
4008
|
dispatch:
|
3862
4009
|
CompositeExplicitAutograd: mkldnn_convolution
|
3863
4010
|
autogen: mkldnn_convolution.out
|
@@ -3883,26 +4030,26 @@
|
|
3883
4030
|
CUDA: miopen_batch_norm_backward
|
3884
4031
|
autogen: miopen_batch_norm_backward.out
|
3885
4032
|
|
3886
|
-
- func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding,
|
4033
|
+
- func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
|
3887
4034
|
dispatch:
|
3888
4035
|
CUDA: miopen_convolution
|
3889
4036
|
autogen: miopen_convolution.out
|
3890
4037
|
|
3891
|
-
- func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding,
|
4038
|
+
- func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
|
3892
4039
|
dispatch:
|
3893
4040
|
CUDA: miopen_convolution_transpose
|
3894
4041
|
autogen: miopen_convolution_transpose.out
|
3895
4042
|
|
3896
|
-
- func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding,
|
4043
|
+
- func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
|
3897
4044
|
dispatch:
|
3898
4045
|
CUDA: miopen_depthwise_convolution
|
3899
4046
|
autogen: miopen_depthwise_convolution.out
|
3900
4047
|
|
3901
|
-
- func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias,
|
4048
|
+
- func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
|
3902
4049
|
dispatch:
|
3903
4050
|
CUDA: miopen_convolution_relu
|
3904
4051
|
|
3905
|
-
- func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias,
|
4052
|
+
- func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
|
3906
4053
|
dispatch:
|
3907
4054
|
CUDA: miopen_convolution_add_relu
|
3908
4055
|
|
@@ -3943,6 +4090,20 @@
|
|
3943
4090
|
dispatch:
|
3944
4091
|
CUDA: _int_mm_out_cuda
|
3945
4092
|
|
4093
|
+
- func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
|
4094
|
+
dispatch:
|
4095
|
+
CPU: _convert_weight_to_int4pack_cpu
|
4096
|
+
CUDA: _convert_weight_to_int4pack_cuda
|
4097
|
+
|
4098
|
+
- func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
|
4099
|
+
dispatch:
|
4100
|
+
CPU: _weight_int4pack_mm_cpu
|
4101
|
+
CUDA: _weight_int4pack_mm_cuda
|
4102
|
+
|
4103
|
+
- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
|
4104
|
+
dispatch:
|
4105
|
+
CPU: _weight_int8pack_mm_cpu
|
4106
|
+
|
3946
4107
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
3947
4108
|
python_module: sparse
|
3948
4109
|
|
@@ -4087,6 +4248,7 @@
|
|
4087
4248
|
device_guard: False
|
4088
4249
|
dispatch:
|
4089
4250
|
CompositeImplicitAutograd: narrow_symint
|
4251
|
+
NestedTensorCPU, NestedTensorCUDA: narrow_nested_symint
|
4090
4252
|
|
4091
4253
|
- func: narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)
|
4092
4254
|
variants: function, method
|
@@ -4199,7 +4361,7 @@
|
|
4199
4361
|
|
4200
4362
|
- func: _nnpack_available() -> bool
|
4201
4363
|
|
4202
|
-
- func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding,
|
4364
|
+
- func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, SymInt[2] stride=1) -> Tensor
|
4203
4365
|
variants: function
|
4204
4366
|
dispatch:
|
4205
4367
|
CompositeExplicitAutograd: _nnpack_spatial_convolution
|
@@ -4314,23 +4476,24 @@
|
|
4314
4476
|
- func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
|
4315
4477
|
dispatch:
|
4316
4478
|
CPU: pixel_shuffle_cpu
|
4479
|
+
MPS: pixel_shuffle_mps
|
4317
4480
|
CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
|
4318
4481
|
autogen: pixel_shuffle.out
|
4319
|
-
tags: core
|
4320
4482
|
|
4321
4483
|
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
|
4322
4484
|
dispatch:
|
4323
4485
|
CPU: pixel_unshuffle_cpu
|
4486
|
+
MPS: pixel_unshuffle_mps
|
4324
4487
|
CompositeExplicitAutogradNonFunctional: math_pixel_unshuffle
|
4325
4488
|
autogen: pixel_unshuffle.out
|
4326
4489
|
|
4327
|
-
- func: channel_shuffle(Tensor self,
|
4490
|
+
- func: channel_shuffle(Tensor self, SymInt groups) -> Tensor
|
4328
4491
|
dispatch:
|
4329
4492
|
CPU, CUDA: channel_shuffle
|
4330
4493
|
QuantizedCPU: channel_shuffle_quantized_cpu
|
4331
4494
|
autogen: channel_shuffle.out
|
4332
4495
|
|
4333
|
-
- func: native_channel_shuffle(Tensor self,
|
4496
|
+
- func: native_channel_shuffle(Tensor self, SymInt groups) -> Tensor
|
4334
4497
|
dispatch:
|
4335
4498
|
CPU: channel_shuffle_cpu
|
4336
4499
|
CompositeImplicitAutograd: math_channel_shuffle
|
@@ -4338,7 +4501,7 @@
|
|
4338
4501
|
- func: is_pinned(Tensor self, Device? device=None) -> bool
|
4339
4502
|
variants: method
|
4340
4503
|
dispatch:
|
4341
|
-
CUDA: is_pinned_cuda
|
4504
|
+
NestedTensorCUDA, CUDA: is_pinned_cuda
|
4342
4505
|
MPS: is_pinned_mps
|
4343
4506
|
CompositeExplicitAutograd: is_pinned_default
|
4344
4507
|
|
@@ -4352,6 +4515,7 @@
|
|
4352
4515
|
dispatch:
|
4353
4516
|
CUDA: _pin_memory_cuda
|
4354
4517
|
MPS: _pin_memory_mps
|
4518
|
+
NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
|
4355
4519
|
autogen: _pin_memory.out
|
4356
4520
|
|
4357
4521
|
- func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
|
@@ -4660,7 +4824,7 @@
|
|
4660
4824
|
autogen: repeat.out
|
4661
4825
|
tags: core
|
4662
4826
|
|
4663
|
-
- func: repeat_interleave.Tensor(Tensor repeats, *,
|
4827
|
+
- func: repeat_interleave.Tensor(Tensor repeats, *, SymInt? output_size=None) -> Tensor
|
4664
4828
|
variants: function
|
4665
4829
|
dispatch:
|
4666
4830
|
CPU: repeat_interleave_cpu
|
@@ -4669,10 +4833,12 @@
|
|
4669
4833
|
tags: dynamic_output_shape
|
4670
4834
|
autogen: repeat_interleave.Tensor_out
|
4671
4835
|
|
4672
|
-
- func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *,
|
4836
|
+
- func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
|
4673
4837
|
variants: function, method
|
4838
|
+
dispatch:
|
4839
|
+
CompositeImplicitAutograd: repeat_interleave_symint
|
4674
4840
|
|
4675
|
-
- func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *,
|
4841
|
+
- func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
|
4676
4842
|
variants: function, method
|
4677
4843
|
dispatch:
|
4678
4844
|
CompositeImplicitAutograd: repeat_interleave_symint
|
@@ -4683,7 +4849,7 @@
|
|
4683
4849
|
device_guard: False
|
4684
4850
|
dispatch:
|
4685
4851
|
CompositeImplicitAutograd: reshape_symint
|
4686
|
-
CompositeImplicitAutogradNestedTensor:
|
4852
|
+
CompositeImplicitAutogradNestedTensor: reshape_nested_symint
|
4687
4853
|
|
4688
4854
|
- func: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
|
4689
4855
|
variants: function
|
@@ -4842,6 +5008,7 @@
|
|
4842
5008
|
device_check: NoCheck # TensorIterator
|
4843
5009
|
python_module: nn
|
4844
5010
|
dispatch:
|
5011
|
+
QuantizedCPU: gelu_quantized_cpu_
|
4845
5012
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
|
4846
5013
|
|
4847
5014
|
- func: gelu(Tensor self, *, str approximate='none') -> Tensor
|
@@ -4973,12 +5140,14 @@
|
|
4973
5140
|
python_module: nn
|
4974
5141
|
dispatch:
|
4975
5142
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
|
5143
|
+
tags: pointwise
|
4976
5144
|
|
4977
5145
|
- func: silu_(Tensor(a!) self) -> Tensor(a!)
|
4978
5146
|
structured_delegate: silu.out
|
4979
5147
|
python_module: nn
|
4980
5148
|
dispatch:
|
4981
5149
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
|
5150
|
+
tags: pointwise
|
4982
5151
|
|
4983
5152
|
- func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
4984
5153
|
structured: True
|
@@ -4987,6 +5156,7 @@
|
|
4987
5156
|
dispatch:
|
4988
5157
|
CPU, CUDA: silu_out
|
4989
5158
|
MPS: silu_out_mps
|
5159
|
+
tags: pointwise
|
4990
5160
|
|
4991
5161
|
- func: silu_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
|
4992
5162
|
structured: True
|
@@ -4995,6 +5165,7 @@
|
|
4995
5165
|
dispatch:
|
4996
5166
|
CPU, CUDA: silu_backward_out
|
4997
5167
|
MPS: silu_backward_out_mps
|
5168
|
+
tags: pointwise
|
4998
5169
|
|
4999
5170
|
- func: silu_backward(Tensor grad_output, Tensor self) -> Tensor
|
5000
5171
|
structured_delegate: silu_backward.grad_input
|
@@ -5002,6 +5173,7 @@
|
|
5002
5173
|
dispatch:
|
5003
5174
|
CompositeImplicitAutograd: math_silu_backward
|
5004
5175
|
NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
|
5176
|
+
tags: pointwise
|
5005
5177
|
|
5006
5178
|
- func: mish(Tensor self) -> Tensor
|
5007
5179
|
structured_delegate: mish.out
|
@@ -5017,11 +5189,13 @@
|
|
5017
5189
|
python_module: nn
|
5018
5190
|
dispatch:
|
5019
5191
|
CPU, CUDA: mish_out
|
5192
|
+
MPS: mish_out_mps
|
5020
5193
|
|
5021
5194
|
- func: mish_backward(Tensor grad_output, Tensor self) -> Tensor
|
5022
5195
|
python_module: nn
|
5023
5196
|
dispatch:
|
5024
5197
|
CPU, CUDA: mish_backward
|
5198
|
+
MPS: mish_backward_mps
|
5025
5199
|
CompositeImplicitAutograd: math_mish_backward
|
5026
5200
|
|
5027
5201
|
- func: sigmoid(Tensor self) -> Tensor
|
@@ -5076,6 +5250,7 @@
|
|
5076
5250
|
dispatch:
|
5077
5251
|
SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
|
5078
5252
|
SparseCPU, SparseCUDA: sin_sparse
|
5253
|
+
NestedTensorCPU, NestedTensorCUDA: sin_nested
|
5079
5254
|
tags: [core, pointwise]
|
5080
5255
|
|
5081
5256
|
- func: sin_(Tensor(a!) self) -> Tensor(a!)
|
@@ -5221,6 +5396,21 @@
|
|
5221
5396
|
CompositeExplicitAutograd: slice_backward
|
5222
5397
|
autogen: slice_backward.out
|
5223
5398
|
|
5399
|
+
# NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
|
5400
|
+
# slice.Tensor, split_with_sizes, et. al.). Currently, these are only used during fake-ification
|
5401
|
+
# of PT2 graph input subclass instances that are views. This means:
|
5402
|
+
# * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
|
5403
|
+
# * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
|
5404
|
+
# * A subclass will have to implement this to work in PT2 if a subclass view is used as a graph
|
5405
|
+
# input AND the view utilizes this op in its inverse. The idea is that slice_inverse() is
|
5406
|
+
# easier to implement for a subclass than as_strided()
|
5407
|
+
- func: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
|
5408
|
+
variants: function, method
|
5409
|
+
device_check: NoCheck
|
5410
|
+
device_guard: False
|
5411
|
+
dispatch:
|
5412
|
+
CompositeExplicitAutograd: slice_inverse_symint
|
5413
|
+
|
5224
5414
|
- func: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
|
5225
5415
|
variants: function, method
|
5226
5416
|
device_check: NoCheck
|
@@ -5228,7 +5418,7 @@
|
|
5228
5418
|
dispatch:
|
5229
5419
|
CompositeExplicitAutogradNonFunctional: slice_scatter
|
5230
5420
|
autogen: slice_scatter.out
|
5231
|
-
tags: core
|
5421
|
+
tags: [core, view_copy]
|
5232
5422
|
|
5233
5423
|
- func: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
|
5234
5424
|
variants: function, method
|
@@ -5427,6 +5617,14 @@
|
|
5427
5617
|
SparseCPU: _sspaddmm_out_cpu
|
5428
5618
|
SparseCUDA: _sspaddmm_out_cuda
|
5429
5619
|
|
5620
|
+
- func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
|
5621
|
+
dispatch:
|
5622
|
+
CompositeExplicitAutograd: _chunk_cat
|
5623
|
+
|
5624
|
+
- func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
|
5625
|
+
dispatch:
|
5626
|
+
CompositeExplicitAutograd: _chunk_cat_out
|
5627
|
+
|
5430
5628
|
- func: stack(Tensor[] tensors, int dim=0) -> Tensor
|
5431
5629
|
dispatch:
|
5432
5630
|
CompositeExplicitAutograd: stack
|
@@ -5618,6 +5816,7 @@
|
|
5618
5816
|
variants: function
|
5619
5817
|
dispatch:
|
5620
5818
|
CPU, CUDA: std_mean
|
5819
|
+
MPS: std_mean_mps
|
5621
5820
|
autogen: std_mean.correction_out
|
5622
5821
|
|
5623
5822
|
- func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -5873,7 +6072,6 @@
|
|
5873
6072
|
CPU, MPS: roll
|
5874
6073
|
CUDA: roll_cuda
|
5875
6074
|
autogen: roll.out
|
5876
|
-
tags: core
|
5877
6075
|
|
5878
6076
|
# default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
|
5879
6077
|
|
@@ -5956,6 +6154,52 @@
|
|
5956
6154
|
CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
|
5957
6155
|
autogen: _nested_view_from_buffer_copy.out
|
5958
6156
|
|
6157
|
+
- func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
|
6158
|
+
variants: function
|
6159
|
+
device_check: NoCheck
|
6160
|
+
dispatch: {}
|
6161
|
+
|
6162
|
+
- func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
|
6163
|
+
variants: function
|
6164
|
+
device_check: NoCheck
|
6165
|
+
tags: view_copy
|
6166
|
+
dispatch:
|
6167
|
+
CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
|
6168
|
+
autogen: _nested_view_from_jagged_copy.out
|
6169
|
+
|
6170
|
+
- func: _nested_get_values(Tensor(a) self) -> Tensor(a)
|
6171
|
+
variants: function
|
6172
|
+
device_check: NoCheck
|
6173
|
+
dispatch: {}
|
6174
|
+
|
6175
|
+
- func: _nested_get_values_copy(Tensor self) -> Tensor
|
6176
|
+
variants: function
|
6177
|
+
device_check: NoCheck
|
6178
|
+
tags: view_copy
|
6179
|
+
dispatch:
|
6180
|
+
CompositeExplicitAutogradNonFunctional: _nested_get_values_copy
|
6181
|
+
autogen: _nested_get_values_copy.out
|
6182
|
+
|
6183
|
+
- func: _nested_get_offsets(Tensor self) -> Tensor
|
6184
|
+
variants: function
|
6185
|
+
device_check: NoCheck
|
6186
|
+
dispatch: {}
|
6187
|
+
|
6188
|
+
# returns undefined Tensor if no lengths present
|
6189
|
+
- func: _nested_get_lengths(Tensor self) -> Tensor
|
6190
|
+
variants: function
|
6191
|
+
device_check: NoCheck
|
6192
|
+
dispatch: {}
|
6193
|
+
|
6194
|
+
- func: _nested_get_ragged_idx(Tensor self) -> int
|
6195
|
+
variants: function
|
6196
|
+
device_check: NoCheck
|
6197
|
+
dispatch: {}
|
6198
|
+
|
6199
|
+
- func: _nested_get_jagged_dummy(Tensor any) -> Tensor
|
6200
|
+
category_override: dummy
|
6201
|
+
dispatch: {}
|
6202
|
+
|
5959
6203
|
- func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
|
5960
6204
|
dispatch:
|
5961
6205
|
# calls unsqueeze
|
@@ -5971,7 +6215,7 @@
|
|
5971
6215
|
dispatch:
|
5972
6216
|
SparseCPU, SparseCUDA: trunc_sparse
|
5973
6217
|
SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
|
5974
|
-
tags: pointwise
|
6218
|
+
tags: [core, pointwise]
|
5975
6219
|
|
5976
6220
|
- func: trunc_(Tensor(a!) self) -> Tensor(a!)
|
5977
6221
|
structured_delegate: trunc.out
|
@@ -6140,6 +6384,7 @@
|
|
6140
6384
|
variants: function
|
6141
6385
|
dispatch:
|
6142
6386
|
CPU, CUDA: var_mean
|
6387
|
+
MPS: var_mean_mps
|
6143
6388
|
autogen: var_mean.correction_out
|
6144
6389
|
|
6145
6390
|
- func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -6160,15 +6405,13 @@
|
|
6160
6405
|
device_check: NoCheck # TensorIterator
|
6161
6406
|
variants: function, method
|
6162
6407
|
dispatch:
|
6163
|
-
CPU, CUDA: where
|
6164
|
-
MPS: where_mps
|
6408
|
+
CPU, CUDA, MPS: where
|
6165
6409
|
tags: [core, pointwise]
|
6166
6410
|
|
6167
6411
|
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
6168
6412
|
device_check: NoCheck # TensorIterator
|
6169
6413
|
dispatch:
|
6170
|
-
CPU, CUDA: where_self_out
|
6171
|
-
MPS: where_self_out_mps
|
6414
|
+
CPU, CUDA, MPS: where_self_out
|
6172
6415
|
|
6173
6416
|
- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
|
6174
6417
|
variants: function
|
@@ -6196,6 +6439,7 @@
|
|
6196
6439
|
dispatch:
|
6197
6440
|
CPU: weight_norm_cpu
|
6198
6441
|
CUDA: weight_norm_cuda
|
6442
|
+
MPS: weight_norm_mps
|
6199
6443
|
autogen: _weight_norm_interface.out
|
6200
6444
|
|
6201
6445
|
- func: _weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
|
@@ -6203,6 +6447,7 @@
|
|
6203
6447
|
dispatch:
|
6204
6448
|
CPU: weight_norm_backward_cpu
|
6205
6449
|
CUDA: weight_norm_backward_cuda
|
6450
|
+
MPS: weight_norm_backward_mps
|
6206
6451
|
autogen: _weight_norm_interface_backward.out
|
6207
6452
|
|
6208
6453
|
- func: _weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
|
@@ -6219,6 +6464,7 @@
|
|
6219
6464
|
dispatch:
|
6220
6465
|
CPU: _efficientzerotensor
|
6221
6466
|
CUDA: _efficientzerotensor_cuda
|
6467
|
+
MPS: _efficientzerotensor_mps
|
6222
6468
|
Meta: _efficientzerotensor_meta
|
6223
6469
|
autogen: _efficientzerotensor.out
|
6224
6470
|
|
@@ -6506,7 +6752,7 @@
|
|
6506
6752
|
MPS: zero_mps_
|
6507
6753
|
Meta: zero_meta_
|
6508
6754
|
SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
|
6509
|
-
SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
|
6755
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
|
6510
6756
|
MkldnnCPU: mkldnn_zero_
|
6511
6757
|
NestedTensorCPU, NestedTensorCUDA: zero_nested_
|
6512
6758
|
autogen: zero, zero.out
|
@@ -6675,12 +6921,12 @@
|
|
6675
6921
|
structured_delegate: _addmm_activation.out
|
6676
6922
|
variants: function, method
|
6677
6923
|
|
6678
|
-
- func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None) -> (Tensor, Tensor)
|
6924
|
+
- func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)
|
6679
6925
|
variants: function
|
6680
6926
|
dispatch:
|
6681
6927
|
CUDA: _scaled_mm_cuda
|
6682
6928
|
|
6683
|
-
- func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
|
6929
|
+
- func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
|
6684
6930
|
variants: function
|
6685
6931
|
dispatch:
|
6686
6932
|
CUDA: _scaled_mm_out_cuda
|
@@ -6796,7 +7042,7 @@
|
|
6796
7042
|
# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
|
6797
7043
|
# the default would never make sense.
|
6798
7044
|
|
6799
|
-
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values,
|
7045
|
+
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6800
7046
|
dispatch:
|
6801
7047
|
CompositeExplicitAutograd: sparse_compressed_tensor
|
6802
7048
|
|
@@ -6813,7 +7059,10 @@
|
|
6813
7059
|
- func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6814
7060
|
- func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6815
7061
|
|
6816
|
-
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values,
|
7062
|
+
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
7063
|
+
dispatch:
|
7064
|
+
CompositeImplicitAutograd: _sparse_compressed_tensor_unsafe_symint
|
7065
|
+
|
6817
7066
|
- func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6818
7067
|
- func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6819
7068
|
- func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -6899,7 +7148,7 @@
|
|
6899
7148
|
dispatch:
|
6900
7149
|
CPU, CUDA: sparse_dim_strided
|
6901
7150
|
SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
|
6902
|
-
SparseCsrCPU, SparseCsrCUDA: sparse_dim_sparse_csr
|
7151
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
|
6903
7152
|
device_check: NoCheck
|
6904
7153
|
device_guard: False
|
6905
7154
|
|
@@ -6916,7 +7165,7 @@
|
|
6916
7165
|
dispatch:
|
6917
7166
|
CPU, CUDA: dense_dim_strided
|
6918
7167
|
SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
|
6919
|
-
SparseCsrCPU, SparseCsrCUDA: dense_dim_sparse_csr
|
7168
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
|
6920
7169
|
device_check: NoCheck
|
6921
7170
|
device_guard: False
|
6922
7171
|
|
@@ -6932,7 +7181,7 @@
|
|
6932
7181
|
variants: method
|
6933
7182
|
dispatch:
|
6934
7183
|
SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
|
6935
|
-
SparseCsrCPU, SparseCsrCUDA: _nnz_sparse_csr
|
7184
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
|
6936
7185
|
device_check: NoCheck
|
6937
7186
|
device_guard: False
|
6938
7187
|
|
@@ -6995,7 +7244,7 @@
|
|
6995
7244
|
variants: method
|
6996
7245
|
dispatch:
|
6997
7246
|
SparseCPU, SparseCUDA, SparseMeta: values_sparse
|
6998
|
-
SparseCsrCPU, SparseCsrCUDA: values_sparse_csr
|
7247
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
|
6999
7248
|
NestedTensorCPU, NestedTensorCUDA: values_nested
|
7000
7249
|
CompositeExplicitAutograd: values_default
|
7001
7250
|
device_check: NoCheck
|
@@ -7004,7 +7253,7 @@
|
|
7004
7253
|
- func: crow_indices(Tensor(a) self) -> Tensor(a)
|
7005
7254
|
variants: method
|
7006
7255
|
dispatch:
|
7007
|
-
SparseCsrCPU, SparseCsrCUDA: crow_indices_sparse_csr
|
7256
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: crow_indices_sparse_csr
|
7008
7257
|
CompositeExplicitAutograd: crow_indices_default
|
7009
7258
|
device_check: NoCheck
|
7010
7259
|
device_guard: False
|
@@ -7012,7 +7261,7 @@
|
|
7012
7261
|
- func: col_indices(Tensor(a) self) -> Tensor(a)
|
7013
7262
|
variants: method
|
7014
7263
|
dispatch:
|
7015
|
-
SparseCsrCPU, SparseCsrCUDA: col_indices_sparse_csr
|
7264
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: col_indices_sparse_csr
|
7016
7265
|
CompositeExplicitAutograd: col_indices_default
|
7017
7266
|
device_check: NoCheck
|
7018
7267
|
device_guard: False
|
@@ -7020,7 +7269,7 @@
|
|
7020
7269
|
- func: ccol_indices(Tensor(a) self) -> Tensor(a)
|
7021
7270
|
variants: method
|
7022
7271
|
dispatch:
|
7023
|
-
SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
|
7272
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ccol_indices_sparse_csr
|
7024
7273
|
CompositeExplicitAutograd: ccol_indices_default
|
7025
7274
|
device_check: NoCheck
|
7026
7275
|
device_guard: False
|
@@ -7028,7 +7277,7 @@
|
|
7028
7277
|
- func: row_indices(Tensor(a) self) -> Tensor(a)
|
7029
7278
|
variants: method
|
7030
7279
|
dispatch:
|
7031
|
-
SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
|
7280
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: row_indices_sparse_csr
|
7032
7281
|
CompositeExplicitAutograd: row_indices_default
|
7033
7282
|
device_check: NoCheck
|
7034
7283
|
device_guard: False
|
@@ -7055,7 +7304,7 @@
|
|
7055
7304
|
variants: function, method
|
7056
7305
|
dispatch:
|
7057
7306
|
CompositeExplicitAutograd: unbind
|
7058
|
-
|
7307
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
|
7059
7308
|
|
7060
7309
|
- func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
|
7061
7310
|
variants: function, method
|
@@ -7143,14 +7392,14 @@
|
|
7143
7392
|
CPU: dense_to_mkldnn
|
7144
7393
|
autogen: to_mkldnn.out
|
7145
7394
|
|
7146
|
-
- func: mkldnn_reorder_conv2d_weight(Tensor self,
|
7395
|
+
- func: mkldnn_reorder_conv2d_weight(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor
|
7147
7396
|
variants: function
|
7148
7397
|
python_module: nn
|
7149
7398
|
dispatch:
|
7150
7399
|
MkldnnCPU: mkldnn_reorder_conv2d_weight
|
7151
7400
|
autogen: mkldnn_reorder_conv2d_weight.out
|
7152
7401
|
|
7153
|
-
- func: mkldnn_reorder_conv3d_weight(Tensor self,
|
7402
|
+
- func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
|
7154
7403
|
variants: function
|
7155
7404
|
python_module: nn
|
7156
7405
|
dispatch:
|
@@ -7537,6 +7786,7 @@
|
|
7537
7786
|
dispatch:
|
7538
7787
|
CPU, CUDA, Meta, MPS: set_
|
7539
7788
|
autogen: set.source_Storage, set.source_Storage_out
|
7789
|
+
tags: inplace_view
|
7540
7790
|
|
7541
7791
|
- func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7542
7792
|
variants: method
|
@@ -7549,6 +7799,7 @@
|
|
7549
7799
|
MPS: set_storage_mps_
|
7550
7800
|
QuantizedCPU, QuantizedCUDA: set_storage_quantized_
|
7551
7801
|
autogen: set.source_Storage_storage_offset, set.source_Storage_storage_offset_out
|
7802
|
+
tags: inplace_view
|
7552
7803
|
|
7553
7804
|
- func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7554
7805
|
variants: method
|
@@ -7556,6 +7807,7 @@
|
|
7556
7807
|
device_guard: False
|
7557
7808
|
dispatch:
|
7558
7809
|
CompositeImplicitAutograd: set__symint
|
7810
|
+
tags: inplace_view
|
7559
7811
|
|
7560
7812
|
- func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
|
7561
7813
|
variants: method
|
@@ -7564,6 +7816,7 @@
|
|
7564
7816
|
dispatch:
|
7565
7817
|
CPU, CUDA, Meta, MPS: set_tensor_
|
7566
7818
|
autogen: set.source_Tensor, set.source_Tensor_out
|
7819
|
+
tags: inplace_view
|
7567
7820
|
|
7568
7821
|
- func: set_(Tensor(a!) self) -> Tensor(a!)
|
7569
7822
|
variants: method
|
@@ -7573,6 +7826,7 @@
|
|
7573
7826
|
Meta: set_meta_
|
7574
7827
|
MPS: set_mps_
|
7575
7828
|
autogen: set, set.out
|
7829
|
+
tags: inplace_view
|
7576
7830
|
|
7577
7831
|
# Not making it CompositeImplicitAutograd because lift
|
7578
7832
|
# should be a primitive w.r.t. functorch
|
@@ -7656,6 +7910,10 @@
|
|
7656
7910
|
dispatch:
|
7657
7911
|
CompositeExplicitAutograd: masked_scatter
|
7658
7912
|
|
7913
|
+
- func: masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor
|
7914
|
+
dispatch:
|
7915
|
+
CompositeExplicitAutograd: masked_scatter_backward_symint
|
7916
|
+
|
7659
7917
|
- func: _masked_softmax(Tensor self, Tensor mask, int? dim=None, int? mask_type=None) -> Tensor
|
7660
7918
|
dispatch:
|
7661
7919
|
CUDA: masked_softmax_cuda
|
@@ -7938,6 +8196,8 @@
|
|
7938
8196
|
- func: bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
7939
8197
|
device_check: NoCheck # TensorIterator
|
7940
8198
|
variants: method
|
8199
|
+
dispatch:
|
8200
|
+
CompositeExplicitAutograd: bitwise_and_
|
7941
8201
|
tags: pointwise
|
7942
8202
|
|
7943
8203
|
- func: bitwise_and_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -7982,6 +8242,8 @@
|
|
7982
8242
|
- func: bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor
|
7983
8243
|
device_check: NoCheck # TensorIterator
|
7984
8244
|
variants: method, function
|
8245
|
+
dispatch:
|
8246
|
+
CompositeExplicitAutograd: bitwise_or
|
7985
8247
|
tags: [core, pointwise]
|
7986
8248
|
|
7987
8249
|
- func: bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
@@ -8001,6 +8263,8 @@
|
|
8001
8263
|
- func: bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
8002
8264
|
device_check: NoCheck # TensorIterator
|
8003
8265
|
variants: method
|
8266
|
+
dispatch:
|
8267
|
+
CompositeExplicitAutograd: bitwise_or_
|
8004
8268
|
tags: pointwise
|
8005
8269
|
|
8006
8270
|
- func: bitwise_or_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -8045,6 +8309,8 @@
|
|
8045
8309
|
- func: bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor
|
8046
8310
|
device_check: NoCheck # TensorIterator
|
8047
8311
|
variants: method, function
|
8312
|
+
dispatch:
|
8313
|
+
CompositeExplicitAutograd: bitwise_xor
|
8048
8314
|
tags: [core, pointwise]
|
8049
8315
|
|
8050
8316
|
- func: bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
@@ -8064,6 +8330,8 @@
|
|
8064
8330
|
- func: bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
8065
8331
|
device_check: NoCheck # TensorIterator
|
8066
8332
|
variants: method
|
8333
|
+
dispatch:
|
8334
|
+
CompositeExplicitAutograd: bitwise_xor_
|
8067
8335
|
tags: pointwise
|
8068
8336
|
|
8069
8337
|
- func: bitwise_xor_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -8504,6 +8772,7 @@
|
|
8504
8772
|
variants: method, function
|
8505
8773
|
dispatch:
|
8506
8774
|
QuantizedCPU: eq_quantized_cpu
|
8775
|
+
NestedTensorCPU, NestedTensorCUDA: eq_scalar_nested
|
8507
8776
|
tags: [core, pointwise]
|
8508
8777
|
|
8509
8778
|
- func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8540,6 +8809,7 @@
|
|
8540
8809
|
variants: method, function
|
8541
8810
|
dispatch:
|
8542
8811
|
QuantizedCPU: ge_quantized_cpu
|
8812
|
+
NestedTensorCPU, NestedTensorCUDA: ge_scalar_nested
|
8543
8813
|
tags: [core, pointwise]
|
8544
8814
|
|
8545
8815
|
- func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8666,6 +8936,7 @@
|
|
8666
8936
|
variants: method, function
|
8667
8937
|
dispatch:
|
8668
8938
|
QuantizedCPU: gt_quantized_cpu
|
8939
|
+
NestedTensorCPU, NestedTensorCUDA: gt_scalar_nested
|
8669
8940
|
tags: [core, pointwise]
|
8670
8941
|
|
8671
8942
|
- func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9106,6 +9377,7 @@
|
|
9106
9377
|
structured_inherits: TensorIteratorBase
|
9107
9378
|
dispatch:
|
9108
9379
|
CPU, CUDA: lgamma_out
|
9380
|
+
MPS: lgamma_out_mps
|
9109
9381
|
tags: pointwise
|
9110
9382
|
|
9111
9383
|
- func: lgamma_(Tensor(a!) self) -> Tensor(a!)
|
@@ -9126,6 +9398,7 @@
|
|
9126
9398
|
structured_inherits: TensorIteratorBase
|
9127
9399
|
dispatch:
|
9128
9400
|
CPU, CUDA: digamma_out
|
9401
|
+
MPS: digamma_out_mps
|
9129
9402
|
tags: pointwise
|
9130
9403
|
|
9131
9404
|
- func: digamma(Tensor self) -> Tensor
|
@@ -9140,6 +9413,7 @@
|
|
9140
9413
|
structured_inherits: TensorIteratorBase
|
9141
9414
|
dispatch:
|
9142
9415
|
CPU, CUDA: polygamma_out
|
9416
|
+
MPS: polygamma_out_mps
|
9143
9417
|
tags: pointwise
|
9144
9418
|
|
9145
9419
|
- func: polygamma(int n, Tensor self) -> Tensor
|
@@ -9263,7 +9537,7 @@
|
|
9263
9537
|
dispatch:
|
9264
9538
|
CPU, CUDA: atan2_out
|
9265
9539
|
MPS: atan2_out_mps
|
9266
|
-
tags: pointwise
|
9540
|
+
tags: [core, pointwise]
|
9267
9541
|
|
9268
9542
|
- func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
9269
9543
|
device_check: NoCheck # TensorIterator
|
@@ -9275,7 +9549,7 @@
|
|
9275
9549
|
device_check: NoCheck # TensorIterator
|
9276
9550
|
structured_delegate: atan2.out
|
9277
9551
|
variants: method, function
|
9278
|
-
tags: pointwise
|
9552
|
+
tags: [core, pointwise]
|
9279
9553
|
# arctan2, alias of atan2
|
9280
9554
|
|
9281
9555
|
- func: arctan2(Tensor self, Tensor other) -> Tensor
|
@@ -9464,7 +9738,7 @@
|
|
9464
9738
|
structured: True
|
9465
9739
|
structured_inherits: TensorIteratorBase
|
9466
9740
|
dispatch:
|
9467
|
-
CPU, CUDA: nextafter_out
|
9741
|
+
CPU, CUDA, MPS: nextafter_out
|
9468
9742
|
tags: pointwise
|
9469
9743
|
|
9470
9744
|
- func: nextafter(Tensor self, Tensor other) -> Tensor
|
@@ -9811,7 +10085,7 @@
|
|
9811
10085
|
- func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor
|
9812
10086
|
device_check: NoCheck # TensorIterator
|
9813
10087
|
structured_delegate: pow.Scalar_out
|
9814
|
-
tags: pointwise
|
10088
|
+
tags: [core, pointwise]
|
9815
10089
|
|
9816
10090
|
- func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)
|
9817
10091
|
device_check: NoCheck # TensorIterator
|
@@ -9954,12 +10228,14 @@
|
|
9954
10228
|
variants: function
|
9955
10229
|
dispatch:
|
9956
10230
|
CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
|
10231
|
+
CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
|
9957
10232
|
autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
|
9958
10233
|
|
9959
10234
|
- func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
|
9960
10235
|
variants: function
|
9961
10236
|
dispatch:
|
9962
10237
|
CUDA: _amp_update_scale_cuda_
|
10238
|
+
CPU: _amp_update_scale_cpu_
|
9963
10239
|
autogen: _amp_update_scale, _amp_update_scale.out
|
9964
10240
|
|
9965
10241
|
#- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
|
@@ -10020,6 +10296,21 @@
|
|
10020
10296
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
|
10021
10297
|
autogen: _foreach_add.ScalarList_out
|
10022
10298
|
|
10299
|
+
- func: _foreach_add.Tensor(Tensor[] self, Tensor other, *, Scalar alpha=1) -> Tensor[]
|
10300
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10301
|
+
variants: function
|
10302
|
+
dispatch:
|
10303
|
+
CPU: foreach_tensor_add_tensor_kernel_slow
|
10304
|
+
CUDA: foreach_tensor_add_tensor_kernel_cuda
|
10305
|
+
|
10306
|
+
- func: _foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()
|
10307
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10308
|
+
variants: function
|
10309
|
+
dispatch:
|
10310
|
+
CPU: foreach_tensor_add_tensor_kernel_slow_
|
10311
|
+
CUDA: foreach_tensor_add_tensor_kernel_cuda_
|
10312
|
+
autogen: _foreach_add.Tensor_out
|
10313
|
+
|
10023
10314
|
- func: _foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
10024
10315
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10025
10316
|
variants: function
|
@@ -10170,6 +10461,21 @@
|
|
10170
10461
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
|
10171
10462
|
autogen: _foreach_div.ScalarList_out
|
10172
10463
|
|
10464
|
+
- func: _foreach_div.Tensor(Tensor[] self, Tensor other) -> Tensor[]
|
10465
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10466
|
+
variants: function
|
10467
|
+
dispatch:
|
10468
|
+
CPU: foreach_tensor_div_tensor_kernel_slow
|
10469
|
+
CUDA: foreach_tensor_div_tensor_kernel_cuda
|
10470
|
+
|
10471
|
+
- func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
10472
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10473
|
+
variants: function
|
10474
|
+
dispatch:
|
10475
|
+
CPU: foreach_tensor_div_tensor_kernel_slow_
|
10476
|
+
CUDA: foreach_tensor_div_tensor_kernel_cuda_
|
10477
|
+
autogen: _foreach_div.Tensor_out
|
10478
|
+
|
10173
10479
|
- func: _foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
10174
10480
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10175
10481
|
variants: function
|
@@ -10990,37 +11296,44 @@
|
|
10990
11296
|
dispatch:
|
10991
11297
|
CPU: bucketize_cpu
|
10992
11298
|
CUDA: bucketize_cuda
|
11299
|
+
MPS: bucketize_mps
|
10993
11300
|
|
10994
11301
|
- func: bucketize.Tensor_out(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)
|
10995
11302
|
dispatch:
|
10996
11303
|
CPU: bucketize_out_cpu
|
10997
11304
|
CUDA: bucketize_out_cuda
|
11305
|
+
MPS: bucketize_out_mps
|
10998
11306
|
|
10999
11307
|
- func: bucketize.Scalar(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
|
11000
11308
|
dispatch:
|
11001
11309
|
CPU: bucketize_cpu
|
11002
11310
|
CUDA: bucketize_cuda
|
11311
|
+
MPS: bucketize_mps
|
11003
11312
|
autogen: bucketize.Scalar_out
|
11004
11313
|
|
11005
11314
|
- func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
|
11006
11315
|
dispatch:
|
11007
11316
|
CPU: searchsorted_cpu
|
11008
11317
|
CUDA: searchsorted_cuda
|
11318
|
+
MPS: searchsorted_mps
|
11009
11319
|
|
11010
11320
|
- func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
|
11011
11321
|
dispatch:
|
11012
11322
|
CPU: searchsorted_out_cpu
|
11013
11323
|
CUDA: searchsorted_out_cuda
|
11324
|
+
MPS: searchsorted_out_mps
|
11014
11325
|
|
11015
11326
|
- func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
|
11016
11327
|
dispatch:
|
11017
11328
|
CPU: searchsorted_cpu
|
11018
11329
|
CUDA: searchsorted_cuda
|
11330
|
+
MPS: searchsorted_mps
|
11019
11331
|
|
11020
11332
|
- func: searchsorted.Scalar_out(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
|
11021
11333
|
dispatch:
|
11022
11334
|
CPU: searchsorted_out_cpu
|
11023
11335
|
CUDA: searchsorted_out_cuda
|
11336
|
+
MPS: searchsorted_out_mps
|
11024
11337
|
|
11025
11338
|
- func: _convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor
|
11026
11339
|
structured_delegate: _convert_indices_from_coo_to_csr.out
|
@@ -11568,6 +11881,7 @@
|
|
11568
11881
|
python_module: nn
|
11569
11882
|
dispatch:
|
11570
11883
|
CPU, CUDA: softshrink_out
|
11884
|
+
MPS: softshrink_out_mps
|
11571
11885
|
|
11572
11886
|
- func: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
|
11573
11887
|
structured_delegate: softshrink.out
|
@@ -11580,6 +11894,7 @@
|
|
11580
11894
|
python_module: nn
|
11581
11895
|
dispatch:
|
11582
11896
|
CPU, CUDA: softshrink_backward_out
|
11897
|
+
MPS: softshrink_backward_out_mps
|
11583
11898
|
|
11584
11899
|
- func: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
|
11585
11900
|
structured_delegate: softshrink_backward.grad_input
|
@@ -12144,6 +12459,7 @@
|
|
12144
12459
|
dispatch:
|
12145
12460
|
CPU: upsample_linear1d_out_cpu
|
12146
12461
|
CUDA: upsample_linear1d_out_cuda
|
12462
|
+
MPS: upsample_linear1d_out_mps
|
12147
12463
|
|
12148
12464
|
- func: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
|
12149
12465
|
python_module: nn
|
@@ -12155,6 +12471,7 @@
|
|
12155
12471
|
dispatch:
|
12156
12472
|
CPU: upsample_linear1d_backward_out_cpu
|
12157
12473
|
CUDA: upsample_linear1d_backward_out_cuda
|
12474
|
+
MPS: upsample_linear1d_backward_out_mps
|
12158
12475
|
|
12159
12476
|
- func: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
|
12160
12477
|
python_module: nn
|
@@ -12482,101 +12799,101 @@
|
|
12482
12799
|
# make the operational distinction clear.
|
12483
12800
|
tags: pointwise
|
12484
12801
|
|
12485
|
-
- func: slow_conv_transpose2d.out(Tensor self, Tensor weight,
|
12802
|
+
- func: slow_conv_transpose2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
|
12486
12803
|
python_module: nn
|
12487
12804
|
structured: True
|
12488
12805
|
dispatch:
|
12489
12806
|
CPU: slow_conv_transpose2d_structured_cpu
|
12490
12807
|
CUDA: slow_conv_transpose2d_structured_cuda
|
12491
12808
|
|
12492
|
-
- func: slow_conv_transpose2d(Tensor self, Tensor weight,
|
12809
|
+
- func: slow_conv_transpose2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1) -> Tensor
|
12493
12810
|
python_module: nn
|
12494
12811
|
structured_delegate: slow_conv_transpose2d.out
|
12495
12812
|
|
12496
|
-
- func: slow_conv_transpose3d.out(Tensor self, Tensor weight,
|
12813
|
+
- func: slow_conv_transpose3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
|
12497
12814
|
python_module: nn
|
12498
12815
|
dispatch:
|
12499
12816
|
CPU: slow_conv_transpose3d_out_cpu
|
12500
12817
|
CUDA: slow_conv_transpose3d_out_cuda
|
12501
12818
|
|
12502
|
-
- func: slow_conv_transpose3d(Tensor self, Tensor weight,
|
12819
|
+
- func: slow_conv_transpose3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1) -> Tensor
|
12503
12820
|
python_module: nn
|
12504
12821
|
dispatch:
|
12505
12822
|
CPU: slow_conv_transpose3d_cpu
|
12506
12823
|
CUDA: slow_conv_transpose3d_cuda
|
12507
12824
|
|
12508
|
-
- func: thnn_conv2d.out(Tensor self, Tensor weight,
|
12825
|
+
- func: thnn_conv2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
|
12509
12826
|
python_module: nn
|
12510
12827
|
|
12511
|
-
- func: thnn_conv2d(Tensor self, Tensor weight,
|
12828
|
+
- func: thnn_conv2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0) -> Tensor
|
12512
12829
|
python_module: nn
|
12513
12830
|
|
12514
|
-
- func: _slow_conv2d_forward.output(Tensor self, Tensor weight,
|
12831
|
+
- func: _slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!)
|
12515
12832
|
python_module: nn
|
12516
12833
|
dispatch:
|
12517
12834
|
CPU: slow_conv2d_forward_out_cpu
|
12518
12835
|
CUDA: slow_conv2d_forward_out_cuda
|
12519
12836
|
|
12520
|
-
- func: _slow_conv2d_forward(Tensor self, Tensor weight,
|
12837
|
+
- func: _slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor
|
12521
12838
|
python_module: nn
|
12522
12839
|
dispatch:
|
12523
12840
|
CPU: slow_conv2d_forward_cpu
|
12524
12841
|
CUDA: slow_conv2d_forward_cuda
|
12525
12842
|
|
12526
|
-
- func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight,
|
12843
|
+
- func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
|
12527
12844
|
python_module: nn
|
12528
12845
|
dispatch:
|
12529
12846
|
CPU: slow_conv2d_backward_out_cpu
|
12530
12847
|
CUDA: slow_conv2d_backward_out_cuda
|
12531
12848
|
|
12532
|
-
- func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight,
|
12849
|
+
- func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
|
12533
12850
|
python_module: nn
|
12534
12851
|
dispatch:
|
12535
12852
|
CPU: slow_conv2d_backward_cpu
|
12536
12853
|
CUDA: slow_conv2d_backward_cuda
|
12537
12854
|
autogen: _slow_conv2d_backward.output_mask_out
|
12538
12855
|
|
12539
|
-
- func: _conv_depthwise2d.out(Tensor self, Tensor weight,
|
12856
|
+
- func: _conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
|
12540
12857
|
use_const_ref_for_mutable_tensors: True
|
12541
12858
|
python_module: nn
|
12542
12859
|
dispatch:
|
12543
12860
|
CUDA: conv_depthwise2d_cuda_out
|
12544
12861
|
|
12545
|
-
- func: _conv_depthwise2d(Tensor self, Tensor weight,
|
12862
|
+
- func: _conv_depthwise2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation) -> Tensor
|
12546
12863
|
python_module: nn
|
12547
12864
|
dispatch:
|
12548
12865
|
CUDA: conv_depthwise2d_cuda
|
12549
12866
|
|
12550
|
-
- func: conv_depthwise3d(Tensor self, Tensor weight,
|
12867
|
+
- func: conv_depthwise3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, SymInt[3] dilation) -> Tensor
|
12551
12868
|
python_module: nn
|
12552
12869
|
dispatch:
|
12553
12870
|
CUDA: conv_depthwise3d_cuda
|
12554
12871
|
autogen: conv_depthwise3d.out
|
12555
12872
|
|
12556
|
-
- func: slow_conv3d.out(Tensor self, Tensor weight,
|
12873
|
+
- func: slow_conv3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
|
12557
12874
|
python_module: nn
|
12558
12875
|
|
12559
|
-
- func: slow_conv3d(Tensor self, Tensor weight,
|
12876
|
+
- func: slow_conv3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0) -> Tensor
|
12560
12877
|
python_module: nn
|
12561
12878
|
|
12562
|
-
- func: slow_conv3d_forward.output(Tensor self, Tensor weight,
|
12879
|
+
- func: slow_conv3d_forward.output(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)
|
12563
12880
|
python_module: nn
|
12564
12881
|
dispatch:
|
12565
12882
|
CPU: slow_conv3d_forward_out_cpu
|
12566
12883
|
|
12567
|
-
- func: slow_conv3d_forward(Tensor self, Tensor weight,
|
12884
|
+
- func: slow_conv3d_forward(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding) -> Tensor
|
12568
12885
|
python_module: nn
|
12569
12886
|
dispatch:
|
12570
12887
|
CPU: slow_conv3d_forward_cpu
|
12571
12888
|
|
12572
|
-
- func: slow_conv_dilated2d(Tensor self, Tensor weight,
|
12889
|
+
- func: slow_conv_dilated2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1) -> Tensor
|
12573
12890
|
python_module: nn
|
12574
12891
|
dispatch:
|
12575
12892
|
CPU: slow_conv_dilated2d_cpu
|
12576
12893
|
CUDA: slow_conv_dilated2d_cuda
|
12577
12894
|
autogen: slow_conv_dilated2d.out
|
12578
12895
|
|
12579
|
-
- func: slow_conv_dilated3d(Tensor self, Tensor weight,
|
12896
|
+
- func: slow_conv_dilated3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1) -> Tensor
|
12580
12897
|
python_module: nn
|
12581
12898
|
dispatch:
|
12582
12899
|
CPU: slow_conv_dilated3d_cpu
|
@@ -12627,7 +12944,7 @@
|
|
12627
12944
|
SparseMeta: isinf_sparse_meta
|
12628
12945
|
SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
|
12629
12946
|
autogen: isinf.out
|
12630
|
-
tags: core
|
12947
|
+
tags: [core, pointwise]
|
12631
12948
|
|
12632
12949
|
- func: record_stream(Tensor(a!) self, Stream s) -> ()
|
12633
12950
|
variants: method
|
@@ -13553,11 +13870,18 @@
|
|
13553
13870
|
dispatch:
|
13554
13871
|
CPU, CUDA: linalg_eig_out
|
13555
13872
|
|
13873
|
+
- func: _linalg_eigvals(Tensor self) -> Tensor
|
13874
|
+
python_module: linalg
|
13875
|
+
dispatch:
|
13876
|
+
CPU, CUDA: _linalg_eigvals
|
13877
|
+
|
13556
13878
|
- func: linalg_eigvals(Tensor self) -> Tensor
|
13557
13879
|
python_module: linalg
|
13558
13880
|
|
13559
13881
|
- func: linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
13560
13882
|
python_module: linalg
|
13883
|
+
dispatch:
|
13884
|
+
CPU, CUDA: linalg_eigvals_out
|
13561
13885
|
|
13562
13886
|
# This function is exposes the `compute_v` flag, which is then used to implement `linalg.eigh` and
|
13563
13887
|
# `linalg.eigvalsh` as composite functions that call this one
|
@@ -13861,6 +14185,12 @@
|
|
13861
14185
|
# It is undocumented and should not be used outside of tests.
|
13862
14186
|
- func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
|
13863
14187
|
|
14188
|
+
# Note: for testing COW materialization within `at::parallel_for` loop function
|
14189
|
+
- func: _test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor
|
14190
|
+
variants: function
|
14191
|
+
dispatch:
|
14192
|
+
CompositeExplicitAutograd: _test_parallel_materialize
|
14193
|
+
|
13864
14194
|
# Note: this function is only for testing.
|
13865
14195
|
- func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
|
13866
14196
|
python_module: nn
|
@@ -14195,6 +14525,7 @@
|
|
14195
14525
|
variants: function
|
14196
14526
|
dispatch:
|
14197
14527
|
CompositeExplicitAutograd: split_with_sizes_copy_out
|
14528
|
+
CUDA: split_with_sizes_copy_out_cuda
|
14198
14529
|
|
14199
14530
|
- func: view_copy(Tensor self, SymInt[] size) -> Tensor
|
14200
14531
|
variants: function
|
@@ -14269,19 +14600,29 @@
|
|
14269
14600
|
variants: function
|
14270
14601
|
tags: nondeterministic_seeded
|
14271
14602
|
|
14272
|
-
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor
|
14603
|
+
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14273
14604
|
dispatch:
|
14274
|
-
CPU: _scaled_dot_product_flash_attention_cpu
|
14275
14605
|
CUDA: _scaled_dot_product_flash_attention_cuda
|
14276
14606
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
|
14277
14607
|
tags: nondeterministic_seeded
|
14278
14608
|
|
14279
|
-
- func:
|
14609
|
+
- func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
|
14610
|
+
dispatch:
|
14611
|
+
CPU: _scaled_dot_product_flash_attention_cpu
|
14612
|
+
tags: nondeterministic_seeded
|
14613
|
+
|
14614
|
+
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14280
14615
|
device_check: NoCheck
|
14281
14616
|
variants: function
|
14282
14617
|
dispatch:
|
14283
|
-
CPU: _scaled_dot_product_flash_attention_backward_cpu
|
14284
14618
|
CUDA: _scaled_dot_product_flash_attention_backward_cuda
|
14619
|
+
NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
|
14620
|
+
|
14621
|
+
- func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14622
|
+
device_check: NoCheck
|
14623
|
+
variants: function
|
14624
|
+
dispatch:
|
14625
|
+
CPU: _scaled_dot_product_flash_attention_cpu_backward
|
14285
14626
|
|
14286
14627
|
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
|
14287
14628
|
dispatch:
|
@@ -14295,26 +14636,31 @@
|
|
14295
14636
|
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
|
14296
14637
|
tags: nondeterministic_seeded
|
14297
14638
|
|
14298
|
-
- func:
|
14639
|
+
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
|
14640
|
+
dispatch:
|
14641
|
+
CUDA: _scaled_dot_product_cudnn_attention_cuda
|
14642
|
+
tags: nondeterministic_seeded
|
14643
|
+
|
14644
|
+
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14299
14645
|
variants: function
|
14300
14646
|
dispatch:
|
14301
14647
|
CUDA: _flash_attention_forward
|
14302
14648
|
tags: nondeterministic_seeded
|
14303
14649
|
|
14304
|
-
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k,
|
14650
|
+
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14305
14651
|
device_check: NoCheck
|
14306
14652
|
variants: function
|
14307
14653
|
dispatch:
|
14308
14654
|
CUDA: _flash_attention_backward
|
14309
14655
|
|
14310
|
-
# Returns
|
14311
|
-
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
|
14656
|
+
# Returns output, logsumexp if compute_logsumexp
|
14657
|
+
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, int? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
|
14312
14658
|
variants: function
|
14313
14659
|
dispatch:
|
14314
14660
|
CUDA: _efficient_attention_forward
|
14315
14661
|
tags: nondeterministic_seeded
|
14316
14662
|
|
14317
|
-
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k,
|
14663
|
+
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
|
14318
14664
|
device_check: NoCheck
|
14319
14665
|
variants: function
|
14320
14666
|
dispatch:
|
@@ -14422,12 +14768,16 @@
|
|
14422
14768
|
tags: pointwise
|
14423
14769
|
|
14424
14770
|
- func: special_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
|
14771
|
+
dispatch:
|
14772
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_t
|
14425
14773
|
device_check: NoCheck
|
14426
14774
|
python_module: special
|
14427
14775
|
variants: function
|
14428
14776
|
tags: pointwise
|
14429
14777
|
|
14430
14778
|
- func: special_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
|
14779
|
+
dispatch:
|
14780
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_t
|
14431
14781
|
device_check: NoCheck
|
14432
14782
|
python_module: special
|
14433
14783
|
variants: function
|
@@ -14444,6 +14794,8 @@
|
|
14444
14794
|
tags: pointwise
|
14445
14795
|
|
14446
14796
|
- func: special_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
14797
|
+
dispatch:
|
14798
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_t_out
|
14447
14799
|
device_check: NoCheck
|
14448
14800
|
python_module: special
|
14449
14801
|
variants: function
|
@@ -14465,12 +14817,16 @@
|
|
14465
14817
|
tags: pointwise
|
14466
14818
|
|
14467
14819
|
- func: special_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
|
14820
|
+
dispatch:
|
14821
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_u
|
14468
14822
|
device_check: NoCheck
|
14469
14823
|
python_module: special
|
14470
14824
|
variants: function
|
14471
14825
|
tags: pointwise
|
14472
14826
|
|
14473
14827
|
- func: special_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
|
14828
|
+
dispatch:
|
14829
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_u
|
14474
14830
|
device_check: NoCheck
|
14475
14831
|
python_module: special
|
14476
14832
|
variants: function
|
@@ -14487,6 +14843,8 @@
|
|
14487
14843
|
tags: pointwise
|
14488
14844
|
|
14489
14845
|
- func: special_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
14846
|
+
dispatch:
|
14847
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_u_out
|
14490
14848
|
device_check: NoCheck
|
14491
14849
|
python_module: special
|
14492
14850
|
variants: function
|
@@ -14508,12 +14866,16 @@
|
|
14508
14866
|
tags: pointwise
|
14509
14867
|
|
14510
14868
|
- func: special_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
|
14869
|
+
dispatch:
|
14870
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_v
|
14511
14871
|
device_check: NoCheck
|
14512
14872
|
python_module: special
|
14513
14873
|
variants: function
|
14514
14874
|
tags: pointwise
|
14515
14875
|
|
14516
14876
|
- func: special_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
|
14877
|
+
dispatch:
|
14878
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_v
|
14517
14879
|
device_check: NoCheck
|
14518
14880
|
python_module: special
|
14519
14881
|
variants: function
|
@@ -14530,6 +14892,8 @@
|
|
14530
14892
|
tags: pointwise
|
14531
14893
|
|
14532
14894
|
- func: special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
14895
|
+
dispatch:
|
14896
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_v_out
|
14533
14897
|
device_check: NoCheck
|
14534
14898
|
python_module: special
|
14535
14899
|
variants: function
|
@@ -14551,12 +14915,16 @@
|
|
14551
14915
|
tags: pointwise
|
14552
14916
|
|
14553
14917
|
- func: special_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
|
14918
|
+
dispatch:
|
14919
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_w
|
14554
14920
|
device_check: NoCheck
|
14555
14921
|
python_module: special
|
14556
14922
|
variants: function
|
14557
14923
|
tags: pointwise
|
14558
14924
|
|
14559
14925
|
- func: special_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
|
14926
|
+
dispatch:
|
14927
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_w
|
14560
14928
|
device_check: NoCheck
|
14561
14929
|
python_module: special
|
14562
14930
|
variants: function
|
@@ -14573,6 +14941,8 @@
|
|
14573
14941
|
tags: pointwise
|
14574
14942
|
|
14575
14943
|
- func: special_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
14944
|
+
dispatch:
|
14945
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_w_out
|
14576
14946
|
device_check: NoCheck
|
14577
14947
|
python_module: special
|
14578
14948
|
variants: function
|
@@ -14594,12 +14964,16 @@
|
|
14594
14964
|
tags: pointwise
|
14595
14965
|
|
14596
14966
|
- func: special_hermite_polynomial_h.x_scalar(Scalar x, Tensor n) -> Tensor
|
14967
|
+
dispatch:
|
14968
|
+
CompositeExplicitAutograd: special_hermite_polynomial_h
|
14597
14969
|
device_check: NoCheck
|
14598
14970
|
python_module: special
|
14599
14971
|
variants: function
|
14600
14972
|
tags: pointwise
|
14601
14973
|
|
14602
14974
|
- func: special_hermite_polynomial_h.n_scalar(Tensor x, Scalar n) -> Tensor
|
14975
|
+
dispatch:
|
14976
|
+
CompositeExplicitAutograd: special_hermite_polynomial_h
|
14603
14977
|
device_check: NoCheck
|
14604
14978
|
python_module: special
|
14605
14979
|
variants: function
|
@@ -14616,6 +14990,8 @@
|
|
14616
14990
|
tags: pointwise
|
14617
14991
|
|
14618
14992
|
- func: special_hermite_polynomial_h.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
14993
|
+
dispatch:
|
14994
|
+
CompositeExplicitAutograd: special_hermite_polynomial_h_out
|
14619
14995
|
device_check: NoCheck
|
14620
14996
|
python_module: special
|
14621
14997
|
variants: function
|
@@ -14637,12 +15013,16 @@
|
|
14637
15013
|
tags: pointwise
|
14638
15014
|
|
14639
15015
|
- func: special_hermite_polynomial_he.x_scalar(Scalar x, Tensor n) -> Tensor
|
15016
|
+
dispatch:
|
15017
|
+
CompositeExplicitAutograd: special_hermite_polynomial_he
|
14640
15018
|
device_check: NoCheck
|
14641
15019
|
python_module: special
|
14642
15020
|
variants: function
|
14643
15021
|
tags: pointwise
|
14644
15022
|
|
14645
15023
|
- func: special_hermite_polynomial_he.n_scalar(Tensor x, Scalar n) -> Tensor
|
15024
|
+
dispatch:
|
15025
|
+
CompositeExplicitAutograd: special_hermite_polynomial_he
|
14646
15026
|
device_check: NoCheck
|
14647
15027
|
python_module: special
|
14648
15028
|
variants: function
|
@@ -14659,6 +15039,8 @@
|
|
14659
15039
|
tags: pointwise
|
14660
15040
|
|
14661
15041
|
- func: special_hermite_polynomial_he.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15042
|
+
dispatch:
|
15043
|
+
CompositeExplicitAutograd: special_hermite_polynomial_he_out
|
14662
15044
|
device_check: NoCheck
|
14663
15045
|
python_module: special
|
14664
15046
|
variants: function
|
@@ -14680,12 +15062,16 @@
|
|
14680
15062
|
tags: pointwise
|
14681
15063
|
|
14682
15064
|
- func: special_laguerre_polynomial_l.x_scalar(Scalar x, Tensor n) -> Tensor
|
15065
|
+
dispatch:
|
15066
|
+
CompositeExplicitAutograd: special_laguerre_polynomial_l
|
14683
15067
|
device_check: NoCheck
|
14684
15068
|
python_module: special
|
14685
15069
|
variants: function
|
14686
15070
|
tags: pointwise
|
14687
15071
|
|
14688
15072
|
- func: special_laguerre_polynomial_l.n_scalar(Tensor x, Scalar n) -> Tensor
|
15073
|
+
dispatch:
|
15074
|
+
CompositeExplicitAutograd: special_laguerre_polynomial_l
|
14689
15075
|
device_check: NoCheck
|
14690
15076
|
python_module: special
|
14691
15077
|
variants: function
|
@@ -14702,6 +15088,8 @@
|
|
14702
15088
|
tags: pointwise
|
14703
15089
|
|
14704
15090
|
- func: special_laguerre_polynomial_l.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15091
|
+
dispatch:
|
15092
|
+
CompositeExplicitAutograd: special_laguerre_polynomial_l_out
|
14705
15093
|
device_check: NoCheck
|
14706
15094
|
python_module: special
|
14707
15095
|
variants: function
|
@@ -14723,12 +15111,16 @@
|
|
14723
15111
|
tags: pointwise
|
14724
15112
|
|
14725
15113
|
- func: special_legendre_polynomial_p.x_scalar(Scalar x, Tensor n) -> Tensor
|
15114
|
+
dispatch:
|
15115
|
+
CompositeExplicitAutograd: special_legendre_polynomial_p
|
14726
15116
|
device_check: NoCheck
|
14727
15117
|
python_module: special
|
14728
15118
|
variants: function
|
14729
15119
|
tags: pointwise
|
14730
15120
|
|
14731
15121
|
- func: special_legendre_polynomial_p.n_scalar(Tensor x, Scalar n) -> Tensor
|
15122
|
+
dispatch:
|
15123
|
+
CompositeExplicitAutograd: special_legendre_polynomial_p
|
14732
15124
|
device_check: NoCheck
|
14733
15125
|
python_module: special
|
14734
15126
|
variants: function
|
@@ -14745,6 +15137,8 @@
|
|
14745
15137
|
tags: pointwise
|
14746
15138
|
|
14747
15139
|
- func: special_legendre_polynomial_p.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15140
|
+
dispatch:
|
15141
|
+
CompositeExplicitAutograd: special_legendre_polynomial_p_out
|
14748
15142
|
device_check: NoCheck
|
14749
15143
|
python_module: special
|
14750
15144
|
variants: function
|
@@ -14856,12 +15250,16 @@
|
|
14856
15250
|
tags: pointwise
|
14857
15251
|
|
14858
15252
|
- func: special_shifted_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
|
15253
|
+
dispatch:
|
15254
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
|
14859
15255
|
device_check: NoCheck
|
14860
15256
|
python_module: special
|
14861
15257
|
variants: function
|
14862
15258
|
tags: pointwise
|
14863
15259
|
|
14864
15260
|
- func: special_shifted_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
|
15261
|
+
dispatch:
|
15262
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
|
14865
15263
|
device_check: NoCheck
|
14866
15264
|
python_module: special
|
14867
15265
|
variants: function
|
@@ -14878,6 +15276,8 @@
|
|
14878
15276
|
tags: pointwise
|
14879
15277
|
|
14880
15278
|
- func: special_shifted_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15279
|
+
dispatch:
|
15280
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t_out
|
14881
15281
|
device_check: NoCheck
|
14882
15282
|
python_module: special
|
14883
15283
|
variants: function
|
@@ -14899,12 +15299,16 @@
|
|
14899
15299
|
tags: pointwise
|
14900
15300
|
|
14901
15301
|
- func: special_shifted_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
|
15302
|
+
dispatch:
|
15303
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
|
14902
15304
|
device_check: NoCheck
|
14903
15305
|
python_module: special
|
14904
15306
|
variants: function
|
14905
15307
|
tags: pointwise
|
14906
15308
|
|
14907
15309
|
- func: special_shifted_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
|
15310
|
+
dispatch:
|
15311
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
|
14908
15312
|
device_check: NoCheck
|
14909
15313
|
python_module: special
|
14910
15314
|
variants: function
|
@@ -14921,6 +15325,8 @@
|
|
14921
15325
|
tags: pointwise
|
14922
15326
|
|
14923
15327
|
- func: special_shifted_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15328
|
+
dispatch:
|
15329
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u_out
|
14924
15330
|
device_check: NoCheck
|
14925
15331
|
python_module: special
|
14926
15332
|
variants: function
|
@@ -14942,12 +15348,16 @@
|
|
14942
15348
|
tags: pointwise
|
14943
15349
|
|
14944
15350
|
- func: special_shifted_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
|
15351
|
+
dispatch:
|
15352
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
|
14945
15353
|
device_check: NoCheck
|
14946
15354
|
python_module: special
|
14947
15355
|
variants: function
|
14948
15356
|
tags: pointwise
|
14949
15357
|
|
14950
15358
|
- func: special_shifted_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
|
15359
|
+
dispatch:
|
15360
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
|
14951
15361
|
device_check: NoCheck
|
14952
15362
|
python_module: special
|
14953
15363
|
variants: function
|
@@ -14964,6 +15374,8 @@
|
|
14964
15374
|
tags: pointwise
|
14965
15375
|
|
14966
15376
|
- func: special_shifted_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15377
|
+
dispatch:
|
15378
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v_out
|
14967
15379
|
device_check: NoCheck
|
14968
15380
|
python_module: special
|
14969
15381
|
variants: function
|
@@ -14985,12 +15397,16 @@
|
|
14985
15397
|
tags: pointwise
|
14986
15398
|
|
14987
15399
|
- func: special_shifted_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
|
15400
|
+
dispatch:
|
15401
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
|
14988
15402
|
device_check: NoCheck
|
14989
15403
|
python_module: special
|
14990
15404
|
variants: function
|
14991
15405
|
tags: pointwise
|
14992
15406
|
|
14993
15407
|
- func: special_shifted_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
|
15408
|
+
dispatch:
|
15409
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
|
14994
15410
|
device_check: NoCheck
|
14995
15411
|
python_module: special
|
14996
15412
|
variants: function
|
@@ -15007,6 +15423,8 @@
|
|
15007
15423
|
tags: pointwise
|
15008
15424
|
|
15009
15425
|
- func: special_shifted_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15426
|
+
dispatch:
|
15427
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w_out
|
15010
15428
|
device_check: NoCheck
|
15011
15429
|
python_module: special
|
15012
15430
|
variants: function
|
@@ -15075,6 +15493,22 @@
|
|
15075
15493
|
CUDA: _fused_adamw_kernel_cuda_
|
15076
15494
|
autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
|
15077
15495
|
|
15496
|
+
- func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15497
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15498
|
+
variants: function
|
15499
|
+
dispatch:
|
15500
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15501
|
+
autogen: _fused_sgd, _fused_sgd.out
|
15502
|
+
|
15503
|
+
- func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15504
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15505
|
+
# but still skip the device check as the Tensor LR can be on CPU
|
15506
|
+
device_check: NoCheck
|
15507
|
+
variants: function
|
15508
|
+
dispatch:
|
15509
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15510
|
+
autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
|
15511
|
+
|
15078
15512
|
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15079
15513
|
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15080
15514
|
variants: function
|