torch-rb 0.14.1 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +4 -6
- data/codegen/native_functions.yaml +552 -118
- data/ext/torch/extconf.rb +3 -0
- data/ext/torch/templates.h +0 -23
- data/ext/torch/tensor.cpp +1 -0
- data/ext/torch/utils.h +1 -1
- data/lib/torch/inspector.rb +8 -3
- data/lib/torch/nn/elu.rb +20 -0
- data/lib/torch/nn/functional.rb +12 -0
- data/lib/torch/nn/gelu.rb +18 -0
- data/lib/torch/nn/leaky_relu.rb +1 -1
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +2 -0
- metadata +6 -11
- data/ext/torch/fft_functions.h +0 -6
- data/ext/torch/linalg_functions.h +0 -6
- data/ext/torch/nn_functions.h +0 -6
- data/ext/torch/sparse_functions.h +0 -6
- data/ext/torch/special_functions.h +0 -6
- data/ext/torch/tensor_functions.h +0 -6
- data/ext/torch/torch_functions.h +0 -6
@@ -134,7 +134,7 @@
|
|
134
134
|
autogen: _new_zeros_with_same_feature_meta.out
|
135
135
|
|
136
136
|
# This function compares the storage numel of self with that of other, where
|
137
|
-
# storage numel is
|
137
|
+
# storage numel is computed as: `other.storage().nbytes() / other.itemsize()`.
|
138
138
|
# We create this function for composite compliance purposes. The batching rule
|
139
139
|
# always returns true because vmapped as_strided does not support accessing
|
140
140
|
# storage locations not indexable by the input tensor.
|
@@ -175,17 +175,29 @@
|
|
175
175
|
CPU: _assert_async_msg_cpu
|
176
176
|
CUDA: _assert_async_msg_cuda
|
177
177
|
|
178
|
+
- func: _assert_scalar(Scalar self, str assert_msg) -> ()
|
179
|
+
dispatch:
|
180
|
+
CompositeExplicitAutograd: _assert_scalar
|
181
|
+
|
182
|
+
- func: _functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor
|
183
|
+
dispatch:
|
184
|
+
CompositeExplicitAutograd: _functional_assert_scalar
|
185
|
+
|
178
186
|
- func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
|
179
187
|
dispatch:
|
180
188
|
CPU: _functional_assert_async_msg_cpu
|
181
189
|
|
182
190
|
- func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
|
183
191
|
|
192
|
+
- func: _print(str s) -> ()
|
193
|
+
dispatch:
|
194
|
+
CompositeExplicitAutograd: _print
|
195
|
+
|
184
196
|
- func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
|
185
197
|
dispatch:
|
186
198
|
CompositeExplicitAutograd: sym_constrain_range
|
187
199
|
|
188
|
-
- func: sym_constrain_range_for_size(Scalar size, *, int? min, int? max) -> ()
|
200
|
+
- func: sym_constrain_range_for_size(Scalar size, *, int? min=None, int? max=None) -> ()
|
189
201
|
dispatch:
|
190
202
|
CompositeExplicitAutograd: sym_constrain_range_for_size
|
191
203
|
|
@@ -431,6 +443,7 @@
|
|
431
443
|
structured_inherits: TensorIteratorBase
|
432
444
|
dispatch:
|
433
445
|
CPU, CUDA: sgn_out
|
446
|
+
MPS: sgn_out_mps
|
434
447
|
SparseCPU, SparseCUDA: sgn_sparse_out
|
435
448
|
SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
|
436
449
|
tags: pointwise
|
@@ -469,6 +482,7 @@
|
|
469
482
|
- func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
470
483
|
dispatch:
|
471
484
|
CPU, CUDA: conj_physical_out
|
485
|
+
MPS: conj_physical_out_mps
|
472
486
|
SparseCPU, SparseCUDA: conj_physical_out_sparse
|
473
487
|
SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
|
474
488
|
tags: pointwise
|
@@ -563,8 +577,8 @@
|
|
563
577
|
dispatch:
|
564
578
|
SparseCPU: add_out_sparse_cpu
|
565
579
|
SparseCUDA: add_out_sparse_cuda
|
566
|
-
SparseCsrCPU:
|
567
|
-
SparseCsrCUDA:
|
580
|
+
SparseCsrCPU: add_out_sparse_compressed_cpu
|
581
|
+
SparseCsrCUDA: add_out_sparse_compressed_cuda
|
568
582
|
MkldnnCPU: mkldnn_add_out
|
569
583
|
MPS: add_out_mps
|
570
584
|
tags: pointwise
|
@@ -681,15 +695,29 @@
|
|
681
695
|
structured_delegate: all.out
|
682
696
|
variants: function, method
|
683
697
|
|
698
|
+
- func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
|
699
|
+
device_check: NoCheck # TensorIterator
|
700
|
+
structured_delegate: all.dims_out
|
701
|
+
variants: function, method
|
702
|
+
cpp_no_default_args: ['dim']
|
703
|
+
dispatch:
|
704
|
+
CompositeExplicitAutograd: all_dims_default
|
705
|
+
|
684
706
|
- func: all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
685
707
|
device_check: NoCheck # TensorIterator
|
686
708
|
structured: True
|
687
|
-
precomputed:
|
688
|
-
- dim -> int dim
|
689
709
|
dispatch:
|
690
710
|
CPU, CUDA: all_out
|
691
711
|
MPS: all_out_mps
|
692
712
|
|
713
|
+
- func: all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
714
|
+
device_check: NoCheck # TensorIterator
|
715
|
+
structured: True
|
716
|
+
dispatch:
|
717
|
+
CPU, CUDA: all_dims_out
|
718
|
+
CompositeExplicitAutograd: all_dims_out_default
|
719
|
+
cpp_no_default_args: ['dim']
|
720
|
+
|
693
721
|
- func: all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
|
694
722
|
device_check: NoCheck # TensorIterator
|
695
723
|
variants: function, method
|
@@ -709,15 +737,30 @@
|
|
709
737
|
variants: function, method
|
710
738
|
tags: core
|
711
739
|
|
740
|
+
- func: any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
|
741
|
+
device_check: NoCheck # TensorIterator
|
742
|
+
structured_delegate: any.dims_out
|
743
|
+
variants: function, method
|
744
|
+
cpp_no_default_args: ['dim']
|
745
|
+
tags: core
|
746
|
+
dispatch:
|
747
|
+
CompositeExplicitAutograd: any_dims_default
|
748
|
+
|
712
749
|
- func: any.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
713
750
|
device_check: NoCheck # TensorIterator
|
714
751
|
structured: True
|
715
|
-
precomputed:
|
716
|
-
- dim -> int dim
|
717
752
|
dispatch:
|
718
753
|
CPU, CUDA: any_out
|
719
754
|
MPS: any_out_mps
|
720
755
|
|
756
|
+
- func: any.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
757
|
+
device_check: NoCheck # TensorIterator
|
758
|
+
structured: True
|
759
|
+
dispatch:
|
760
|
+
CPU, CUDA: any_dims_out
|
761
|
+
CompositeExplicitAutograd: any_dims_out_default
|
762
|
+
cpp_no_default_args: ['dim']
|
763
|
+
|
721
764
|
- func: any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
|
722
765
|
device_check: NoCheck # TensorIterator
|
723
766
|
variants: function, method
|
@@ -733,7 +776,7 @@
|
|
733
776
|
dispatch:
|
734
777
|
CompositeExplicitAutograd: arange
|
735
778
|
|
736
|
-
# This operator should be named `
|
779
|
+
# This operator should be named `arange.start_out` if following the naming convention. However that
|
737
780
|
# name is already taken. Disabled because of CI job failures.
|
738
781
|
# FIXME: enable this
|
739
782
|
#- func: arange.start_out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1190,6 +1233,13 @@
|
|
1190
1233
|
CompositeExplicitAutograd: copysign_out
|
1191
1234
|
tags: pointwise
|
1192
1235
|
|
1236
|
+
- func: _lazy_clone(Tensor self) -> Tensor
|
1237
|
+
# Like clone, but the copy takes place lazily, only if either the
|
1238
|
+
# input or the output are written.
|
1239
|
+
variants: function, method
|
1240
|
+
dispatch:
|
1241
|
+
CompositeExplicitAutograd: _lazy_clone
|
1242
|
+
|
1193
1243
|
- func: logical_not(Tensor self) -> Tensor
|
1194
1244
|
device_check: NoCheck # TensorIterator
|
1195
1245
|
variants: function, method
|
@@ -1326,6 +1376,7 @@
|
|
1326
1376
|
dispatch:
|
1327
1377
|
SparseCPU, SparseCUDA: cat_sparse
|
1328
1378
|
QuantizedCPU: cat_quantized_cpu
|
1379
|
+
NestedTensorCPU, NestedTensorCUDA: cat_nested
|
1329
1380
|
tags: core
|
1330
1381
|
|
1331
1382
|
- func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1590,6 +1641,7 @@
|
|
1590
1641
|
- func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
|
1591
1642
|
dispatch:
|
1592
1643
|
CPU, CUDA: complex_out
|
1644
|
+
MPS: complex_out_mps
|
1593
1645
|
|
1594
1646
|
- func: polar(Tensor abs, Tensor angle) -> Tensor
|
1595
1647
|
variants: function
|
@@ -1613,59 +1665,67 @@
|
|
1613
1665
|
variants: method
|
1614
1666
|
manual_cpp_binding: True
|
1615
1667
|
|
1616
|
-
- func: convolution(Tensor input, Tensor weight, Tensor? bias,
|
1668
|
+
- func: convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
|
1617
1669
|
dispatch:
|
1618
1670
|
CompositeExplicitAutograd: convolution
|
1619
1671
|
autogen: convolution.out
|
1620
1672
|
tags: core
|
1621
1673
|
|
1622
|
-
- func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes,
|
1674
|
+
- func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
1623
1675
|
dispatch:
|
1624
1676
|
CompositeExplicitAutograd, CUDA: convolution_backward
|
1625
1677
|
autogen: convolution_backward.out
|
1626
1678
|
tags: core
|
1627
1679
|
|
1628
|
-
- func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias,
|
1680
|
+
- func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
|
1629
1681
|
dispatch:
|
1630
1682
|
CompositeExplicitAutograd: convolution_overrideable
|
1631
1683
|
autogen: convolution_overrideable.out
|
1632
1684
|
|
1633
|
-
- func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight,
|
1685
|
+
- func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
|
1634
1686
|
dispatch:
|
1635
1687
|
CompositeExplicitAutograd: convolution_backward_overrideable
|
1636
1688
|
autogen: convolution_backward_overrideable.out
|
1637
1689
|
|
1638
|
-
- func: _convolution(Tensor input, Tensor weight, Tensor? bias,
|
1690
|
+
- func: _convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
|
1639
1691
|
dispatch:
|
1640
1692
|
CompositeExplicitAutograd: _convolution
|
1641
1693
|
autogen: _convolution.out
|
1642
1694
|
|
1643
|
-
- func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias,
|
1695
|
+
- func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, int[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
|
1644
1696
|
|
1645
|
-
- func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias,
|
1697
|
+
- func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, str padding, SymInt[] dilation, SymInt groups) -> Tensor
|
1698
|
+
dispatch:
|
1699
|
+
CompositeImplicitAutograd: _convolution_mode_symint
|
1646
1700
|
|
1647
|
-
- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self,
|
1701
|
+
- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
1648
1702
|
|
1649
|
-
- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None,
|
1703
|
+
- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] dilation=1, SymInt groups=1) -> Tensor
|
1650
1704
|
dispatch:
|
1651
1705
|
CompositeImplicitAutograd: conv1d_symint
|
1652
1706
|
|
1653
|
-
- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None,
|
1707
|
+
- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, SymInt groups=1) -> Tensor
|
1654
1708
|
dispatch:
|
1655
1709
|
CompositeImplicitAutograd: conv2d_symint
|
1656
1710
|
|
1657
|
-
- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None,
|
1711
|
+
- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
|
1658
1712
|
dispatch:
|
1659
1713
|
CompositeImplicitAutograd: conv3d_symint
|
1660
1714
|
|
1661
|
-
- func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None,
|
1715
|
+
- func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, str padding="valid", SymInt[1] dilation=1, SymInt groups=1) -> Tensor
|
1662
1716
|
cpp_no_default_args: ['bias', 'stride', 'padding']
|
1717
|
+
dispatch:
|
1718
|
+
CompositeImplicitAutograd: conv1d_padding_symint
|
1663
1719
|
|
1664
|
-
- func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None,
|
1720
|
+
- func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, str padding="valid", SymInt[2] dilation=1, SymInt groups=1) -> Tensor
|
1665
1721
|
cpp_no_default_args: ['bias', 'stride', 'padding']
|
1722
|
+
dispatch:
|
1723
|
+
CompositeImplicitAutograd: conv2d_padding_symint
|
1666
1724
|
|
1667
|
-
- func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None,
|
1725
|
+
- func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, str padding="valid", SymInt[3] dilation=1, SymInt groups=1) -> Tensor
|
1668
1726
|
cpp_no_default_args: ['bias', 'stride', 'padding']
|
1727
|
+
dispatch:
|
1728
|
+
CompositeImplicitAutograd: conv3d_padding_symint
|
1669
1729
|
|
1670
1730
|
- func: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
|
1671
1731
|
dispatch:
|
@@ -1675,15 +1735,15 @@
|
|
1675
1735
|
- func: conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)
|
1676
1736
|
|
1677
1737
|
# NB: we inherit the goofy argument order from PyTorch torch.nn.functional
|
1678
|
-
- func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None,
|
1738
|
+
- func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, SymInt groups=1, SymInt[1] dilation=1) -> Tensor
|
1679
1739
|
dispatch:
|
1680
1740
|
CompositeImplicitAutograd: conv_transpose1d_symint
|
1681
1741
|
|
1682
|
-
- func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None,
|
1742
|
+
- func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt groups=1, SymInt[2] dilation=1) -> Tensor
|
1683
1743
|
dispatch:
|
1684
1744
|
CompositeImplicitAutograd: conv_transpose2d_symint
|
1685
1745
|
|
1686
|
-
- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None,
|
1746
|
+
- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt groups=1, SymInt[3] dilation=1) -> Tensor
|
1687
1747
|
dispatch:
|
1688
1748
|
CompositeImplicitAutograd: conv_transpose3d_symint
|
1689
1749
|
|
@@ -1691,6 +1751,7 @@
|
|
1691
1751
|
variants: function
|
1692
1752
|
dispatch:
|
1693
1753
|
CompositeExplicitAutogradNonFunctional: copy
|
1754
|
+
tags: core
|
1694
1755
|
|
1695
1756
|
- func: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
|
1696
1757
|
variants: method
|
@@ -1720,6 +1781,8 @@
|
|
1720
1781
|
device_check: NoCheck # TensorIterator
|
1721
1782
|
variants: function, method
|
1722
1783
|
structured_delegate: cos.out
|
1784
|
+
dispatch:
|
1785
|
+
NestedTensorCPU, NestedTensorCUDA: cos_nested
|
1723
1786
|
tags: [core, pointwise]
|
1724
1787
|
|
1725
1788
|
- func: cos_(Tensor(a!) self) -> Tensor(a!)
|
@@ -1802,32 +1865,35 @@
|
|
1802
1865
|
CUDA: cudnn_batch_norm_backward
|
1803
1866
|
autogen: cudnn_batch_norm_backward.out
|
1804
1867
|
|
1805
|
-
- func: cudnn_convolution(Tensor self, Tensor weight,
|
1868
|
+
- func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1806
1869
|
dispatch:
|
1807
1870
|
CUDA: cudnn_convolution
|
1808
|
-
autogen: cudnn_convolution.out
|
1809
1871
|
|
1810
|
-
- func:
|
1872
|
+
- func: cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
|
1873
|
+
dispatch:
|
1874
|
+
CUDA: cudnn_convolution_out
|
1875
|
+
|
1876
|
+
- func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1811
1877
|
dispatch:
|
1812
1878
|
CUDA: cudnn_convolution_transpose
|
1813
1879
|
autogen: cudnn_convolution_transpose.out
|
1814
1880
|
|
1815
|
-
- func: _mps_convolution_transpose(Tensor self, Tensor weight,
|
1881
|
+
- func: _mps_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
|
1816
1882
|
dispatch:
|
1817
1883
|
MPS: _mps_convolution_transpose
|
1818
1884
|
autogen: _mps_convolution_transpose.out
|
1819
1885
|
|
1820
|
-
- func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight,
|
1886
|
+
- func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[2] output_mask) -> (Tensor, Tensor)
|
1821
1887
|
dispatch:
|
1822
1888
|
MPS: mps_convolution_transpose_backward
|
1823
1889
|
autogen: mps_convolution_transpose_backward.out
|
1824
1890
|
|
1825
|
-
- func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias,
|
1891
|
+
- func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
|
1826
1892
|
dispatch:
|
1827
1893
|
CUDA: cudnn_convolution_relu
|
1828
1894
|
autogen: cudnn_convolution_relu.out
|
1829
1895
|
|
1830
|
-
- func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias,
|
1896
|
+
- func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
|
1831
1897
|
dispatch:
|
1832
1898
|
CUDA: cudnn_convolution_add_relu
|
1833
1899
|
autogen: cudnn_convolution_add_relu.out
|
@@ -1967,6 +2033,7 @@
|
|
1967
2033
|
dispatch:
|
1968
2034
|
CPU: ctc_loss_cpu
|
1969
2035
|
CUDA: ctc_loss_gpu
|
2036
|
+
Meta: ctc_loss_meta
|
1970
2037
|
autogen: _ctc_loss.out
|
1971
2038
|
tags: dynamic_output_shape # the shape of second output is data dependent
|
1972
2039
|
|
@@ -1999,6 +2066,7 @@
|
|
1999
2066
|
variants: function, method
|
2000
2067
|
dispatch:
|
2001
2068
|
CompositeExplicitAutograd: diagonal
|
2069
|
+
tags: core
|
2002
2070
|
|
2003
2071
|
- func: linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a)
|
2004
2072
|
python_module: linalg
|
@@ -2079,7 +2147,7 @@
|
|
2079
2147
|
structured_delegate: div.out_mode
|
2080
2148
|
dispatch:
|
2081
2149
|
SparseCPU, SparseCUDA: div_sparse
|
2082
|
-
tags: pointwise
|
2150
|
+
tags: [core, pointwise]
|
2083
2151
|
|
2084
2152
|
- func: div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!)
|
2085
2153
|
device_check: NoCheck # TensorIterator
|
@@ -2120,7 +2188,7 @@
|
|
2120
2188
|
variants: function, method
|
2121
2189
|
dispatch:
|
2122
2190
|
CompositeExplicitAutograd: div
|
2123
|
-
tags: pointwise
|
2191
|
+
tags: [core, pointwise]
|
2124
2192
|
|
2125
2193
|
- func: div_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!)
|
2126
2194
|
variants: method
|
@@ -2302,7 +2370,7 @@
|
|
2302
2370
|
Meta: empty_meta_symint
|
2303
2371
|
MkldnnCPU: empty_mkldnn
|
2304
2372
|
SparseCPU, SparseCUDA, SparseMeta: empty_sparse
|
2305
|
-
SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
|
2373
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
|
2306
2374
|
QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
|
2307
2375
|
tags: core
|
2308
2376
|
|
@@ -2370,7 +2438,7 @@
|
|
2370
2438
|
variants: method
|
2371
2439
|
device_check: NoCheck
|
2372
2440
|
device_guard: False
|
2373
|
-
tags: inplace_view
|
2441
|
+
tags: [core, inplace_view]
|
2374
2442
|
dispatch:
|
2375
2443
|
Meta: resize__symint
|
2376
2444
|
CPU: resize_
|
@@ -2408,7 +2476,7 @@
|
|
2408
2476
|
CompositeExplicitAutograd: empty_like
|
2409
2477
|
QuantizedCPU, QuantizedCUDA: empty_like_quantized
|
2410
2478
|
SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
|
2411
|
-
SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
|
2479
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
|
2412
2480
|
NestedTensorCPU, NestedTensorCUDA: empty_like_nested
|
2413
2481
|
autogen: empty_like.out
|
2414
2482
|
|
@@ -2517,7 +2585,7 @@
|
|
2517
2585
|
dispatch:
|
2518
2586
|
SparseCPU, SparseCUDA: expm1_sparse
|
2519
2587
|
SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
|
2520
|
-
tags: pointwise
|
2588
|
+
tags: [core, pointwise]
|
2521
2589
|
|
2522
2590
|
- func: expm1_(Tensor(a!) self) -> Tensor(a!)
|
2523
2591
|
device_check: NoCheck # TensorIterator
|
@@ -2684,10 +2752,15 @@
|
|
2684
2752
|
- func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
|
2685
2753
|
device_check: NoCheck # TensorIterator
|
2686
2754
|
variants: function, method
|
2755
|
+
dispatch:
|
2756
|
+
CompositeExplicitAutograd: floor_divide
|
2687
2757
|
|
2688
2758
|
- func: floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
2689
2759
|
device_check: NoCheck # TensorIterator
|
2690
2760
|
variants: method
|
2761
|
+
dispatch:
|
2762
|
+
CompositeExplicitAutograd: floor_divide_
|
2763
|
+
autogen: floor_divide.Scalar_out
|
2691
2764
|
|
2692
2765
|
- func: frac(Tensor self) -> Tensor
|
2693
2766
|
device_check: NoCheck # TensorIterator
|
@@ -2905,12 +2978,14 @@
|
|
2905
2978
|
dispatch:
|
2906
2979
|
CPU: _fft_r2c_mkl
|
2907
2980
|
CUDA: _fft_r2c_cufft
|
2981
|
+
MPS: _fft_r2c_mps
|
2908
2982
|
|
2909
2983
|
- func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
|
2910
2984
|
variants: function
|
2911
2985
|
dispatch:
|
2912
2986
|
CPU: _fft_r2c_mkl_out
|
2913
2987
|
CUDA: _fft_r2c_cufft_out
|
2988
|
+
MPS: _fft_r2c_mps_out
|
2914
2989
|
|
2915
2990
|
# Complex to real inverse FFT
|
2916
2991
|
- func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
|
@@ -2918,12 +2993,14 @@
|
|
2918
2993
|
dispatch:
|
2919
2994
|
CPU: _fft_c2r_mkl
|
2920
2995
|
CUDA: _fft_c2r_cufft
|
2996
|
+
MPS: _fft_c2r_mps
|
2921
2997
|
|
2922
2998
|
- func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
|
2923
2999
|
variants: function
|
2924
3000
|
dispatch:
|
2925
3001
|
CPU: _fft_c2r_mkl_out
|
2926
3002
|
CUDA: _fft_c2r_cufft_out
|
3003
|
+
MPS: _fft_c2r_mps_out
|
2927
3004
|
|
2928
3005
|
# Standard complex to complex FFT (forward or backward)
|
2929
3006
|
- func: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
|
@@ -2931,12 +3008,14 @@
|
|
2931
3008
|
dispatch:
|
2932
3009
|
CPU: _fft_c2c_mkl
|
2933
3010
|
CUDA: _fft_c2c_cufft
|
3011
|
+
MPS: _fft_c2c_mps
|
2934
3012
|
|
2935
3013
|
- func: _fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)
|
2936
3014
|
variants: function
|
2937
3015
|
dispatch:
|
2938
3016
|
CPU: _fft_c2c_mkl_out
|
2939
3017
|
CUDA: _fft_c2c_cufft_out
|
3018
|
+
MPS: _fft_c2c_mps_out
|
2940
3019
|
|
2941
3020
|
- func: _validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()
|
2942
3021
|
device_check: NoCheck
|
@@ -2979,7 +3058,7 @@
|
|
2979
3058
|
- func: _unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
|
2980
3059
|
variants: function
|
2981
3060
|
dispatch:
|
2982
|
-
|
3061
|
+
CompositeExplicitAutograd: _unsafe_index
|
2983
3062
|
|
2984
3063
|
- func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
|
2985
3064
|
structured: True
|
@@ -3253,14 +3332,22 @@
|
|
3253
3332
|
dispatch:
|
3254
3333
|
CUDA: _cslt_compress
|
3255
3334
|
|
3256
|
-
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, bool transpose_result=False) -> Tensor
|
3335
|
+
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
|
3257
3336
|
dispatch:
|
3258
3337
|
CUDA: _cslt_sparse_mm
|
3259
3338
|
|
3260
|
-
- func:
|
3339
|
+
- func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
|
3340
|
+
dispatch:
|
3341
|
+
CUDA: _cslt_sparse_mm_search
|
3342
|
+
|
3343
|
+
- func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
|
3261
3344
|
dispatch:
|
3262
3345
|
CUDA: _sparse_semi_structured_linear
|
3263
3346
|
|
3347
|
+
- func: _mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor
|
3348
|
+
dispatch:
|
3349
|
+
CUDA: _mixed_dtypes_linear
|
3350
|
+
|
3264
3351
|
- func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
3265
3352
|
|
3266
3353
|
- func: fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
@@ -3291,12 +3378,42 @@
|
|
3291
3378
|
dispatch:
|
3292
3379
|
CompositeExplicitAutograd: linspace
|
3293
3380
|
|
3381
|
+
- func: linspace.Tensor_Tensor(Tensor start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3382
|
+
category_override: factory
|
3383
|
+
dispatch:
|
3384
|
+
CompositeExplicitAutograd: linspace
|
3385
|
+
|
3386
|
+
- func: linspace.Tensor_Scalar(Tensor start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3387
|
+
category_override: factory
|
3388
|
+
dispatch:
|
3389
|
+
CompositeExplicitAutograd: linspace
|
3390
|
+
|
3391
|
+
- func: linspace.Scalar_Tensor(Scalar start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3392
|
+
category_override: factory
|
3393
|
+
dispatch:
|
3394
|
+
CompositeExplicitAutograd: linspace
|
3395
|
+
|
3294
3396
|
- func: linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
|
3295
3397
|
dispatch:
|
3296
3398
|
CPU, Meta: linspace_out
|
3297
3399
|
CUDA: linspace_cuda_out
|
3298
3400
|
MPS: linspace_out_mps
|
3299
3401
|
|
3402
|
+
- func: linspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
|
3403
|
+
category_override: factory
|
3404
|
+
dispatch:
|
3405
|
+
CompositeExplicitAutograd: linspace_out
|
3406
|
+
|
3407
|
+
- func: linspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
|
3408
|
+
category_override: factory
|
3409
|
+
dispatch:
|
3410
|
+
CompositeExplicitAutograd: linspace_out
|
3411
|
+
|
3412
|
+
- func: linspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
|
3413
|
+
category_override: factory
|
3414
|
+
dispatch:
|
3415
|
+
CompositeExplicitAutograd: linspace_out
|
3416
|
+
|
3300
3417
|
- func: log(Tensor self) -> Tensor
|
3301
3418
|
device_check: NoCheck # TensorIterator
|
3302
3419
|
structured_delegate: log.out
|
@@ -3322,7 +3439,7 @@
|
|
3322
3439
|
device_check: NoCheck # TensorIterator
|
3323
3440
|
structured_delegate: log10.out
|
3324
3441
|
variants: function, method
|
3325
|
-
tags: pointwise
|
3442
|
+
tags: [core, pointwise]
|
3326
3443
|
|
3327
3444
|
- func: log10_(Tensor(a!) self) -> Tensor(a!)
|
3328
3445
|
device_check: NoCheck # TensorIterator
|
@@ -3346,7 +3463,7 @@
|
|
3346
3463
|
dispatch:
|
3347
3464
|
SparseCPU, SparseCUDA: log1p_sparse
|
3348
3465
|
SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
|
3349
|
-
tags: pointwise
|
3466
|
+
tags: [core, pointwise]
|
3350
3467
|
|
3351
3468
|
- func: log1p_(Tensor(a!) self) -> Tensor(a!)
|
3352
3469
|
device_check: NoCheck # TensorIterator
|
@@ -3372,7 +3489,7 @@
|
|
3372
3489
|
device_check: NoCheck # TensorIterator
|
3373
3490
|
structured_delegate: log2.out
|
3374
3491
|
variants: function, method
|
3375
|
-
tags: pointwise
|
3492
|
+
tags: [core, pointwise]
|
3376
3493
|
|
3377
3494
|
- func: log2_(Tensor(a!) self) -> Tensor(a!)
|
3378
3495
|
device_check: NoCheck # TensorIterator
|
@@ -3477,11 +3594,41 @@
|
|
3477
3594
|
dispatch:
|
3478
3595
|
CompositeExplicitAutograd: logspace
|
3479
3596
|
|
3597
|
+
- func: logspace.Tensor_Tensor(Tensor start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3598
|
+
category_override: factory
|
3599
|
+
dispatch:
|
3600
|
+
CompositeExplicitAutograd: logspace
|
3601
|
+
|
3602
|
+
- func: logspace.Tensor_Scalar(Tensor start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3603
|
+
category_override: factory
|
3604
|
+
dispatch:
|
3605
|
+
CompositeExplicitAutograd: logspace
|
3606
|
+
|
3607
|
+
- func: logspace.Scalar_Tensor(Scalar start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
3608
|
+
category_override: factory
|
3609
|
+
dispatch:
|
3610
|
+
CompositeExplicitAutograd: logspace
|
3611
|
+
|
3480
3612
|
- func: logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
|
3481
3613
|
dispatch:
|
3482
3614
|
CPU, Meta: logspace_out
|
3483
3615
|
CUDA: logspace_cuda_out
|
3484
3616
|
|
3617
|
+
- func: logspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
|
3618
|
+
category_override: factory
|
3619
|
+
dispatch:
|
3620
|
+
CompositeExplicitAutograd: logspace_out
|
3621
|
+
|
3622
|
+
- func: logspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
|
3623
|
+
category_override: factory
|
3624
|
+
dispatch:
|
3625
|
+
CompositeExplicitAutograd: logspace_out
|
3626
|
+
|
3627
|
+
- func: logspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
|
3628
|
+
category_override: factory
|
3629
|
+
dispatch:
|
3630
|
+
CompositeExplicitAutograd: logspace_out
|
3631
|
+
|
3485
3632
|
# log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
|
3486
3633
|
- func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
|
3487
3634
|
variants: function, method
|
@@ -3847,17 +3994,17 @@
|
|
3847
3994
|
# TODO: Add this function to MPS dispatch key so that we avoid declaring it in
|
3848
3995
|
# native_functions.yaml
|
3849
3996
|
# https://github.com/pytorch/pytorch/issues/77394
|
3850
|
-
- func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias,
|
3997
|
+
- func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
|
3851
3998
|
dispatch:
|
3852
3999
|
MPS: _mps_convolution
|
3853
4000
|
autogen: _mps_convolution.out
|
3854
4001
|
|
3855
|
-
- func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight,
|
4002
|
+
- func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
3856
4003
|
dispatch:
|
3857
4004
|
MPS: mps_convolution_backward
|
3858
4005
|
autogen: mps_convolution_backward.out
|
3859
4006
|
|
3860
|
-
- func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding,
|
4007
|
+
- func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
|
3861
4008
|
dispatch:
|
3862
4009
|
CompositeExplicitAutograd: mkldnn_convolution
|
3863
4010
|
autogen: mkldnn_convolution.out
|
@@ -3883,26 +4030,26 @@
|
|
3883
4030
|
CUDA: miopen_batch_norm_backward
|
3884
4031
|
autogen: miopen_batch_norm_backward.out
|
3885
4032
|
|
3886
|
-
- func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding,
|
4033
|
+
- func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
|
3887
4034
|
dispatch:
|
3888
4035
|
CUDA: miopen_convolution
|
3889
4036
|
autogen: miopen_convolution.out
|
3890
4037
|
|
3891
|
-
- func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding,
|
4038
|
+
- func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
|
3892
4039
|
dispatch:
|
3893
4040
|
CUDA: miopen_convolution_transpose
|
3894
4041
|
autogen: miopen_convolution_transpose.out
|
3895
4042
|
|
3896
|
-
- func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding,
|
4043
|
+
- func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
|
3897
4044
|
dispatch:
|
3898
4045
|
CUDA: miopen_depthwise_convolution
|
3899
4046
|
autogen: miopen_depthwise_convolution.out
|
3900
4047
|
|
3901
|
-
- func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias,
|
4048
|
+
- func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
|
3902
4049
|
dispatch:
|
3903
4050
|
CUDA: miopen_convolution_relu
|
3904
4051
|
|
3905
|
-
- func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias,
|
4052
|
+
- func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
|
3906
4053
|
dispatch:
|
3907
4054
|
CUDA: miopen_convolution_add_relu
|
3908
4055
|
|
@@ -3943,6 +4090,20 @@
|
|
3943
4090
|
dispatch:
|
3944
4091
|
CUDA: _int_mm_out_cuda
|
3945
4092
|
|
4093
|
+
- func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
|
4094
|
+
dispatch:
|
4095
|
+
CPU: _convert_weight_to_int4pack_cpu
|
4096
|
+
CUDA: _convert_weight_to_int4pack_cuda
|
4097
|
+
|
4098
|
+
- func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
|
4099
|
+
dispatch:
|
4100
|
+
CPU: _weight_int4pack_mm_cpu
|
4101
|
+
CUDA: _weight_int4pack_mm_cuda
|
4102
|
+
|
4103
|
+
- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
|
4104
|
+
dispatch:
|
4105
|
+
CPU: _weight_int8pack_mm_cpu
|
4106
|
+
|
3946
4107
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
3947
4108
|
python_module: sparse
|
3948
4109
|
|
@@ -4087,6 +4248,7 @@
|
|
4087
4248
|
device_guard: False
|
4088
4249
|
dispatch:
|
4089
4250
|
CompositeImplicitAutograd: narrow_symint
|
4251
|
+
NestedTensorCPU, NestedTensorCUDA: narrow_nested_symint
|
4090
4252
|
|
4091
4253
|
- func: narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)
|
4092
4254
|
variants: function, method
|
@@ -4199,7 +4361,7 @@
|
|
4199
4361
|
|
4200
4362
|
- func: _nnpack_available() -> bool
|
4201
4363
|
|
4202
|
-
- func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding,
|
4364
|
+
- func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, SymInt[2] stride=1) -> Tensor
|
4203
4365
|
variants: function
|
4204
4366
|
dispatch:
|
4205
4367
|
CompositeExplicitAutograd: _nnpack_spatial_convolution
|
@@ -4314,23 +4476,24 @@
|
|
4314
4476
|
- func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
|
4315
4477
|
dispatch:
|
4316
4478
|
CPU: pixel_shuffle_cpu
|
4479
|
+
MPS: pixel_shuffle_mps
|
4317
4480
|
CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
|
4318
4481
|
autogen: pixel_shuffle.out
|
4319
|
-
tags: core
|
4320
4482
|
|
4321
4483
|
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
|
4322
4484
|
dispatch:
|
4323
4485
|
CPU: pixel_unshuffle_cpu
|
4486
|
+
MPS: pixel_unshuffle_mps
|
4324
4487
|
CompositeExplicitAutogradNonFunctional: math_pixel_unshuffle
|
4325
4488
|
autogen: pixel_unshuffle.out
|
4326
4489
|
|
4327
|
-
- func: channel_shuffle(Tensor self,
|
4490
|
+
- func: channel_shuffle(Tensor self, SymInt groups) -> Tensor
|
4328
4491
|
dispatch:
|
4329
4492
|
CPU, CUDA: channel_shuffle
|
4330
4493
|
QuantizedCPU: channel_shuffle_quantized_cpu
|
4331
4494
|
autogen: channel_shuffle.out
|
4332
4495
|
|
4333
|
-
- func: native_channel_shuffle(Tensor self,
|
4496
|
+
- func: native_channel_shuffle(Tensor self, SymInt groups) -> Tensor
|
4334
4497
|
dispatch:
|
4335
4498
|
CPU: channel_shuffle_cpu
|
4336
4499
|
CompositeImplicitAutograd: math_channel_shuffle
|
@@ -4338,7 +4501,7 @@
|
|
4338
4501
|
- func: is_pinned(Tensor self, Device? device=None) -> bool
|
4339
4502
|
variants: method
|
4340
4503
|
dispatch:
|
4341
|
-
CUDA: is_pinned_cuda
|
4504
|
+
NestedTensorCUDA, CUDA: is_pinned_cuda
|
4342
4505
|
MPS: is_pinned_mps
|
4343
4506
|
CompositeExplicitAutograd: is_pinned_default
|
4344
4507
|
|
@@ -4352,6 +4515,7 @@
|
|
4352
4515
|
dispatch:
|
4353
4516
|
CUDA: _pin_memory_cuda
|
4354
4517
|
MPS: _pin_memory_mps
|
4518
|
+
NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
|
4355
4519
|
autogen: _pin_memory.out
|
4356
4520
|
|
4357
4521
|
- func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
|
@@ -4660,7 +4824,7 @@
|
|
4660
4824
|
autogen: repeat.out
|
4661
4825
|
tags: core
|
4662
4826
|
|
4663
|
-
- func: repeat_interleave.Tensor(Tensor repeats, *,
|
4827
|
+
- func: repeat_interleave.Tensor(Tensor repeats, *, SymInt? output_size=None) -> Tensor
|
4664
4828
|
variants: function
|
4665
4829
|
dispatch:
|
4666
4830
|
CPU: repeat_interleave_cpu
|
@@ -4669,10 +4833,12 @@
|
|
4669
4833
|
tags: dynamic_output_shape
|
4670
4834
|
autogen: repeat_interleave.Tensor_out
|
4671
4835
|
|
4672
|
-
- func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *,
|
4836
|
+
- func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
|
4673
4837
|
variants: function, method
|
4838
|
+
dispatch:
|
4839
|
+
CompositeImplicitAutograd: repeat_interleave_symint
|
4674
4840
|
|
4675
|
-
- func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *,
|
4841
|
+
- func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
|
4676
4842
|
variants: function, method
|
4677
4843
|
dispatch:
|
4678
4844
|
CompositeImplicitAutograd: repeat_interleave_symint
|
@@ -4683,7 +4849,7 @@
|
|
4683
4849
|
device_guard: False
|
4684
4850
|
dispatch:
|
4685
4851
|
CompositeImplicitAutograd: reshape_symint
|
4686
|
-
CompositeImplicitAutogradNestedTensor:
|
4852
|
+
CompositeImplicitAutogradNestedTensor: reshape_nested_symint
|
4687
4853
|
|
4688
4854
|
- func: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
|
4689
4855
|
variants: function
|
@@ -4842,6 +5008,7 @@
|
|
4842
5008
|
device_check: NoCheck # TensorIterator
|
4843
5009
|
python_module: nn
|
4844
5010
|
dispatch:
|
5011
|
+
QuantizedCPU: gelu_quantized_cpu_
|
4845
5012
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
|
4846
5013
|
|
4847
5014
|
- func: gelu(Tensor self, *, str approximate='none') -> Tensor
|
@@ -4973,12 +5140,14 @@
|
|
4973
5140
|
python_module: nn
|
4974
5141
|
dispatch:
|
4975
5142
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
|
5143
|
+
tags: pointwise
|
4976
5144
|
|
4977
5145
|
- func: silu_(Tensor(a!) self) -> Tensor(a!)
|
4978
5146
|
structured_delegate: silu.out
|
4979
5147
|
python_module: nn
|
4980
5148
|
dispatch:
|
4981
5149
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
|
5150
|
+
tags: pointwise
|
4982
5151
|
|
4983
5152
|
- func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
4984
5153
|
structured: True
|
@@ -4987,6 +5156,7 @@
|
|
4987
5156
|
dispatch:
|
4988
5157
|
CPU, CUDA: silu_out
|
4989
5158
|
MPS: silu_out_mps
|
5159
|
+
tags: pointwise
|
4990
5160
|
|
4991
5161
|
- func: silu_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
|
4992
5162
|
structured: True
|
@@ -4995,6 +5165,7 @@
|
|
4995
5165
|
dispatch:
|
4996
5166
|
CPU, CUDA: silu_backward_out
|
4997
5167
|
MPS: silu_backward_out_mps
|
5168
|
+
tags: pointwise
|
4998
5169
|
|
4999
5170
|
- func: silu_backward(Tensor grad_output, Tensor self) -> Tensor
|
5000
5171
|
structured_delegate: silu_backward.grad_input
|
@@ -5002,6 +5173,7 @@
|
|
5002
5173
|
dispatch:
|
5003
5174
|
CompositeImplicitAutograd: math_silu_backward
|
5004
5175
|
NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
|
5176
|
+
tags: pointwise
|
5005
5177
|
|
5006
5178
|
- func: mish(Tensor self) -> Tensor
|
5007
5179
|
structured_delegate: mish.out
|
@@ -5017,11 +5189,13 @@
|
|
5017
5189
|
python_module: nn
|
5018
5190
|
dispatch:
|
5019
5191
|
CPU, CUDA: mish_out
|
5192
|
+
MPS: mish_out_mps
|
5020
5193
|
|
5021
5194
|
- func: mish_backward(Tensor grad_output, Tensor self) -> Tensor
|
5022
5195
|
python_module: nn
|
5023
5196
|
dispatch:
|
5024
5197
|
CPU, CUDA: mish_backward
|
5198
|
+
MPS: mish_backward_mps
|
5025
5199
|
CompositeImplicitAutograd: math_mish_backward
|
5026
5200
|
|
5027
5201
|
- func: sigmoid(Tensor self) -> Tensor
|
@@ -5076,6 +5250,7 @@
|
|
5076
5250
|
dispatch:
|
5077
5251
|
SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
|
5078
5252
|
SparseCPU, SparseCUDA: sin_sparse
|
5253
|
+
NestedTensorCPU, NestedTensorCUDA: sin_nested
|
5079
5254
|
tags: [core, pointwise]
|
5080
5255
|
|
5081
5256
|
- func: sin_(Tensor(a!) self) -> Tensor(a!)
|
@@ -5221,6 +5396,21 @@
|
|
5221
5396
|
CompositeExplicitAutograd: slice_backward
|
5222
5397
|
autogen: slice_backward.out
|
5223
5398
|
|
5399
|
+
# NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
|
5400
|
+
# slice.Tensor, split_with_sizes, et. al.). Currently, these are only used during fake-ification
|
5401
|
+
# of PT2 graph input subclass instances that are views. This means:
|
5402
|
+
# * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
|
5403
|
+
# * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
|
5404
|
+
# * A subclass will have to implement this to work in PT2 if a subclass view is used as a graph
|
5405
|
+
# input AND the view utilizes this op in its inverse. The idea is that slice_inverse() is
|
5406
|
+
# easier to implement for a subclass than as_strided()
|
5407
|
+
- func: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
|
5408
|
+
variants: function, method
|
5409
|
+
device_check: NoCheck
|
5410
|
+
device_guard: False
|
5411
|
+
dispatch:
|
5412
|
+
CompositeExplicitAutograd: slice_inverse_symint
|
5413
|
+
|
5224
5414
|
- func: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
|
5225
5415
|
variants: function, method
|
5226
5416
|
device_check: NoCheck
|
@@ -5228,7 +5418,7 @@
|
|
5228
5418
|
dispatch:
|
5229
5419
|
CompositeExplicitAutogradNonFunctional: slice_scatter
|
5230
5420
|
autogen: slice_scatter.out
|
5231
|
-
tags: core
|
5421
|
+
tags: [core, view_copy]
|
5232
5422
|
|
5233
5423
|
- func: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
|
5234
5424
|
variants: function, method
|
@@ -5427,6 +5617,14 @@
|
|
5427
5617
|
SparseCPU: _sspaddmm_out_cpu
|
5428
5618
|
SparseCUDA: _sspaddmm_out_cuda
|
5429
5619
|
|
5620
|
+
- func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
|
5621
|
+
dispatch:
|
5622
|
+
CompositeExplicitAutograd: _chunk_cat
|
5623
|
+
|
5624
|
+
- func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
|
5625
|
+
dispatch:
|
5626
|
+
CompositeExplicitAutograd: _chunk_cat_out
|
5627
|
+
|
5430
5628
|
- func: stack(Tensor[] tensors, int dim=0) -> Tensor
|
5431
5629
|
dispatch:
|
5432
5630
|
CompositeExplicitAutograd: stack
|
@@ -5618,6 +5816,7 @@
|
|
5618
5816
|
variants: function
|
5619
5817
|
dispatch:
|
5620
5818
|
CPU, CUDA: std_mean
|
5819
|
+
MPS: std_mean_mps
|
5621
5820
|
autogen: std_mean.correction_out
|
5622
5821
|
|
5623
5822
|
- func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -5873,7 +6072,6 @@
|
|
5873
6072
|
CPU, MPS: roll
|
5874
6073
|
CUDA: roll_cuda
|
5875
6074
|
autogen: roll.out
|
5876
|
-
tags: core
|
5877
6075
|
|
5878
6076
|
# default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
|
5879
6077
|
|
@@ -5956,6 +6154,52 @@
|
|
5956
6154
|
CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
|
5957
6155
|
autogen: _nested_view_from_buffer_copy.out
|
5958
6156
|
|
6157
|
+
- func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
|
6158
|
+
variants: function
|
6159
|
+
device_check: NoCheck
|
6160
|
+
dispatch: {}
|
6161
|
+
|
6162
|
+
- func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
|
6163
|
+
variants: function
|
6164
|
+
device_check: NoCheck
|
6165
|
+
tags: view_copy
|
6166
|
+
dispatch:
|
6167
|
+
CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
|
6168
|
+
autogen: _nested_view_from_jagged_copy.out
|
6169
|
+
|
6170
|
+
- func: _nested_get_values(Tensor(a) self) -> Tensor(a)
|
6171
|
+
variants: function
|
6172
|
+
device_check: NoCheck
|
6173
|
+
dispatch: {}
|
6174
|
+
|
6175
|
+
- func: _nested_get_values_copy(Tensor self) -> Tensor
|
6176
|
+
variants: function
|
6177
|
+
device_check: NoCheck
|
6178
|
+
tags: view_copy
|
6179
|
+
dispatch:
|
6180
|
+
CompositeExplicitAutogradNonFunctional: _nested_get_values_copy
|
6181
|
+
autogen: _nested_get_values_copy.out
|
6182
|
+
|
6183
|
+
- func: _nested_get_offsets(Tensor self) -> Tensor
|
6184
|
+
variants: function
|
6185
|
+
device_check: NoCheck
|
6186
|
+
dispatch: {}
|
6187
|
+
|
6188
|
+
# returns undefined Tensor if no lengths present
|
6189
|
+
- func: _nested_get_lengths(Tensor self) -> Tensor
|
6190
|
+
variants: function
|
6191
|
+
device_check: NoCheck
|
6192
|
+
dispatch: {}
|
6193
|
+
|
6194
|
+
- func: _nested_get_ragged_idx(Tensor self) -> int
|
6195
|
+
variants: function
|
6196
|
+
device_check: NoCheck
|
6197
|
+
dispatch: {}
|
6198
|
+
|
6199
|
+
- func: _nested_get_jagged_dummy(Tensor any) -> Tensor
|
6200
|
+
category_override: dummy
|
6201
|
+
dispatch: {}
|
6202
|
+
|
5959
6203
|
- func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
|
5960
6204
|
dispatch:
|
5961
6205
|
# calls unsqueeze
|
@@ -5971,7 +6215,7 @@
|
|
5971
6215
|
dispatch:
|
5972
6216
|
SparseCPU, SparseCUDA: trunc_sparse
|
5973
6217
|
SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
|
5974
|
-
tags: pointwise
|
6218
|
+
tags: [core, pointwise]
|
5975
6219
|
|
5976
6220
|
- func: trunc_(Tensor(a!) self) -> Tensor(a!)
|
5977
6221
|
structured_delegate: trunc.out
|
@@ -6140,6 +6384,7 @@
|
|
6140
6384
|
variants: function
|
6141
6385
|
dispatch:
|
6142
6386
|
CPU, CUDA: var_mean
|
6387
|
+
MPS: var_mean_mps
|
6143
6388
|
autogen: var_mean.correction_out
|
6144
6389
|
|
6145
6390
|
- func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -6160,15 +6405,13 @@
|
|
6160
6405
|
device_check: NoCheck # TensorIterator
|
6161
6406
|
variants: function, method
|
6162
6407
|
dispatch:
|
6163
|
-
CPU, CUDA: where
|
6164
|
-
MPS: where_mps
|
6408
|
+
CPU, CUDA, MPS: where
|
6165
6409
|
tags: [core, pointwise]
|
6166
6410
|
|
6167
6411
|
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
6168
6412
|
device_check: NoCheck # TensorIterator
|
6169
6413
|
dispatch:
|
6170
|
-
CPU, CUDA: where_self_out
|
6171
|
-
MPS: where_self_out_mps
|
6414
|
+
CPU, CUDA, MPS: where_self_out
|
6172
6415
|
|
6173
6416
|
- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
|
6174
6417
|
variants: function
|
@@ -6196,6 +6439,7 @@
|
|
6196
6439
|
dispatch:
|
6197
6440
|
CPU: weight_norm_cpu
|
6198
6441
|
CUDA: weight_norm_cuda
|
6442
|
+
MPS: weight_norm_mps
|
6199
6443
|
autogen: _weight_norm_interface.out
|
6200
6444
|
|
6201
6445
|
- func: _weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
|
@@ -6203,6 +6447,7 @@
|
|
6203
6447
|
dispatch:
|
6204
6448
|
CPU: weight_norm_backward_cpu
|
6205
6449
|
CUDA: weight_norm_backward_cuda
|
6450
|
+
MPS: weight_norm_backward_mps
|
6206
6451
|
autogen: _weight_norm_interface_backward.out
|
6207
6452
|
|
6208
6453
|
- func: _weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
|
@@ -6219,6 +6464,7 @@
|
|
6219
6464
|
dispatch:
|
6220
6465
|
CPU: _efficientzerotensor
|
6221
6466
|
CUDA: _efficientzerotensor_cuda
|
6467
|
+
MPS: _efficientzerotensor_mps
|
6222
6468
|
Meta: _efficientzerotensor_meta
|
6223
6469
|
autogen: _efficientzerotensor.out
|
6224
6470
|
|
@@ -6506,7 +6752,7 @@
|
|
6506
6752
|
MPS: zero_mps_
|
6507
6753
|
Meta: zero_meta_
|
6508
6754
|
SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
|
6509
|
-
SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
|
6755
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
|
6510
6756
|
MkldnnCPU: mkldnn_zero_
|
6511
6757
|
NestedTensorCPU, NestedTensorCUDA: zero_nested_
|
6512
6758
|
autogen: zero, zero.out
|
@@ -6675,12 +6921,12 @@
|
|
6675
6921
|
structured_delegate: _addmm_activation.out
|
6676
6922
|
variants: function, method
|
6677
6923
|
|
6678
|
-
- func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None) -> (Tensor, Tensor)
|
6924
|
+
- func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)
|
6679
6925
|
variants: function
|
6680
6926
|
dispatch:
|
6681
6927
|
CUDA: _scaled_mm_cuda
|
6682
6928
|
|
6683
|
-
- func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
|
6929
|
+
- func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
|
6684
6930
|
variants: function
|
6685
6931
|
dispatch:
|
6686
6932
|
CUDA: _scaled_mm_out_cuda
|
@@ -6796,7 +7042,7 @@
|
|
6796
7042
|
# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
|
6797
7043
|
# the default would never make sense.
|
6798
7044
|
|
6799
|
-
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values,
|
7045
|
+
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6800
7046
|
dispatch:
|
6801
7047
|
CompositeExplicitAutograd: sparse_compressed_tensor
|
6802
7048
|
|
@@ -6813,7 +7059,10 @@
|
|
6813
7059
|
- func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6814
7060
|
- func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6815
7061
|
|
6816
|
-
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values,
|
7062
|
+
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
7063
|
+
dispatch:
|
7064
|
+
CompositeImplicitAutograd: _sparse_compressed_tensor_unsafe_symint
|
7065
|
+
|
6817
7066
|
- func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6818
7067
|
- func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6819
7068
|
- func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -6899,7 +7148,7 @@
|
|
6899
7148
|
dispatch:
|
6900
7149
|
CPU, CUDA: sparse_dim_strided
|
6901
7150
|
SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
|
6902
|
-
SparseCsrCPU, SparseCsrCUDA: sparse_dim_sparse_csr
|
7151
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
|
6903
7152
|
device_check: NoCheck
|
6904
7153
|
device_guard: False
|
6905
7154
|
|
@@ -6916,7 +7165,7 @@
|
|
6916
7165
|
dispatch:
|
6917
7166
|
CPU, CUDA: dense_dim_strided
|
6918
7167
|
SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
|
6919
|
-
SparseCsrCPU, SparseCsrCUDA: dense_dim_sparse_csr
|
7168
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
|
6920
7169
|
device_check: NoCheck
|
6921
7170
|
device_guard: False
|
6922
7171
|
|
@@ -6932,7 +7181,7 @@
|
|
6932
7181
|
variants: method
|
6933
7182
|
dispatch:
|
6934
7183
|
SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
|
6935
|
-
SparseCsrCPU, SparseCsrCUDA: _nnz_sparse_csr
|
7184
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
|
6936
7185
|
device_check: NoCheck
|
6937
7186
|
device_guard: False
|
6938
7187
|
|
@@ -6995,7 +7244,7 @@
|
|
6995
7244
|
variants: method
|
6996
7245
|
dispatch:
|
6997
7246
|
SparseCPU, SparseCUDA, SparseMeta: values_sparse
|
6998
|
-
SparseCsrCPU, SparseCsrCUDA: values_sparse_csr
|
7247
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
|
6999
7248
|
NestedTensorCPU, NestedTensorCUDA: values_nested
|
7000
7249
|
CompositeExplicitAutograd: values_default
|
7001
7250
|
device_check: NoCheck
|
@@ -7004,7 +7253,7 @@
|
|
7004
7253
|
- func: crow_indices(Tensor(a) self) -> Tensor(a)
|
7005
7254
|
variants: method
|
7006
7255
|
dispatch:
|
7007
|
-
SparseCsrCPU, SparseCsrCUDA: crow_indices_sparse_csr
|
7256
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: crow_indices_sparse_csr
|
7008
7257
|
CompositeExplicitAutograd: crow_indices_default
|
7009
7258
|
device_check: NoCheck
|
7010
7259
|
device_guard: False
|
@@ -7012,7 +7261,7 @@
|
|
7012
7261
|
- func: col_indices(Tensor(a) self) -> Tensor(a)
|
7013
7262
|
variants: method
|
7014
7263
|
dispatch:
|
7015
|
-
SparseCsrCPU, SparseCsrCUDA: col_indices_sparse_csr
|
7264
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: col_indices_sparse_csr
|
7016
7265
|
CompositeExplicitAutograd: col_indices_default
|
7017
7266
|
device_check: NoCheck
|
7018
7267
|
device_guard: False
|
@@ -7020,7 +7269,7 @@
|
|
7020
7269
|
- func: ccol_indices(Tensor(a) self) -> Tensor(a)
|
7021
7270
|
variants: method
|
7022
7271
|
dispatch:
|
7023
|
-
SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
|
7272
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ccol_indices_sparse_csr
|
7024
7273
|
CompositeExplicitAutograd: ccol_indices_default
|
7025
7274
|
device_check: NoCheck
|
7026
7275
|
device_guard: False
|
@@ -7028,7 +7277,7 @@
|
|
7028
7277
|
- func: row_indices(Tensor(a) self) -> Tensor(a)
|
7029
7278
|
variants: method
|
7030
7279
|
dispatch:
|
7031
|
-
SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
|
7280
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: row_indices_sparse_csr
|
7032
7281
|
CompositeExplicitAutograd: row_indices_default
|
7033
7282
|
device_check: NoCheck
|
7034
7283
|
device_guard: False
|
@@ -7055,7 +7304,7 @@
|
|
7055
7304
|
variants: function, method
|
7056
7305
|
dispatch:
|
7057
7306
|
CompositeExplicitAutograd: unbind
|
7058
|
-
|
7307
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
|
7059
7308
|
|
7060
7309
|
- func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
|
7061
7310
|
variants: function, method
|
@@ -7143,14 +7392,14 @@
|
|
7143
7392
|
CPU: dense_to_mkldnn
|
7144
7393
|
autogen: to_mkldnn.out
|
7145
7394
|
|
7146
|
-
- func: mkldnn_reorder_conv2d_weight(Tensor self,
|
7395
|
+
- func: mkldnn_reorder_conv2d_weight(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor
|
7147
7396
|
variants: function
|
7148
7397
|
python_module: nn
|
7149
7398
|
dispatch:
|
7150
7399
|
MkldnnCPU: mkldnn_reorder_conv2d_weight
|
7151
7400
|
autogen: mkldnn_reorder_conv2d_weight.out
|
7152
7401
|
|
7153
|
-
- func: mkldnn_reorder_conv3d_weight(Tensor self,
|
7402
|
+
- func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
|
7154
7403
|
variants: function
|
7155
7404
|
python_module: nn
|
7156
7405
|
dispatch:
|
@@ -7537,6 +7786,7 @@
|
|
7537
7786
|
dispatch:
|
7538
7787
|
CPU, CUDA, Meta, MPS: set_
|
7539
7788
|
autogen: set.source_Storage, set.source_Storage_out
|
7789
|
+
tags: inplace_view
|
7540
7790
|
|
7541
7791
|
- func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7542
7792
|
variants: method
|
@@ -7549,6 +7799,7 @@
|
|
7549
7799
|
MPS: set_storage_mps_
|
7550
7800
|
QuantizedCPU, QuantizedCUDA: set_storage_quantized_
|
7551
7801
|
autogen: set.source_Storage_storage_offset, set.source_Storage_storage_offset_out
|
7802
|
+
tags: inplace_view
|
7552
7803
|
|
7553
7804
|
- func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7554
7805
|
variants: method
|
@@ -7556,6 +7807,7 @@
|
|
7556
7807
|
device_guard: False
|
7557
7808
|
dispatch:
|
7558
7809
|
CompositeImplicitAutograd: set__symint
|
7810
|
+
tags: inplace_view
|
7559
7811
|
|
7560
7812
|
- func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
|
7561
7813
|
variants: method
|
@@ -7564,6 +7816,7 @@
|
|
7564
7816
|
dispatch:
|
7565
7817
|
CPU, CUDA, Meta, MPS: set_tensor_
|
7566
7818
|
autogen: set.source_Tensor, set.source_Tensor_out
|
7819
|
+
tags: inplace_view
|
7567
7820
|
|
7568
7821
|
- func: set_(Tensor(a!) self) -> Tensor(a!)
|
7569
7822
|
variants: method
|
@@ -7573,6 +7826,7 @@
|
|
7573
7826
|
Meta: set_meta_
|
7574
7827
|
MPS: set_mps_
|
7575
7828
|
autogen: set, set.out
|
7829
|
+
tags: inplace_view
|
7576
7830
|
|
7577
7831
|
# Not making it CompositeImplicitAutograd because lift
|
7578
7832
|
# should be a primitive w.r.t. functorch
|
@@ -7656,6 +7910,10 @@
|
|
7656
7910
|
dispatch:
|
7657
7911
|
CompositeExplicitAutograd: masked_scatter
|
7658
7912
|
|
7913
|
+
- func: masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor
|
7914
|
+
dispatch:
|
7915
|
+
CompositeExplicitAutograd: masked_scatter_backward_symint
|
7916
|
+
|
7659
7917
|
- func: _masked_softmax(Tensor self, Tensor mask, int? dim=None, int? mask_type=None) -> Tensor
|
7660
7918
|
dispatch:
|
7661
7919
|
CUDA: masked_softmax_cuda
|
@@ -7938,6 +8196,8 @@
|
|
7938
8196
|
- func: bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
7939
8197
|
device_check: NoCheck # TensorIterator
|
7940
8198
|
variants: method
|
8199
|
+
dispatch:
|
8200
|
+
CompositeExplicitAutograd: bitwise_and_
|
7941
8201
|
tags: pointwise
|
7942
8202
|
|
7943
8203
|
- func: bitwise_and_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -7982,6 +8242,8 @@
|
|
7982
8242
|
- func: bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor
|
7983
8243
|
device_check: NoCheck # TensorIterator
|
7984
8244
|
variants: method, function
|
8245
|
+
dispatch:
|
8246
|
+
CompositeExplicitAutograd: bitwise_or
|
7985
8247
|
tags: [core, pointwise]
|
7986
8248
|
|
7987
8249
|
- func: bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
@@ -8001,6 +8263,8 @@
|
|
8001
8263
|
- func: bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
8002
8264
|
device_check: NoCheck # TensorIterator
|
8003
8265
|
variants: method
|
8266
|
+
dispatch:
|
8267
|
+
CompositeExplicitAutograd: bitwise_or_
|
8004
8268
|
tags: pointwise
|
8005
8269
|
|
8006
8270
|
- func: bitwise_or_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -8045,6 +8309,8 @@
|
|
8045
8309
|
- func: bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor
|
8046
8310
|
device_check: NoCheck # TensorIterator
|
8047
8311
|
variants: method, function
|
8312
|
+
dispatch:
|
8313
|
+
CompositeExplicitAutograd: bitwise_xor
|
8048
8314
|
tags: [core, pointwise]
|
8049
8315
|
|
8050
8316
|
- func: bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
@@ -8064,6 +8330,8 @@
|
|
8064
8330
|
- func: bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
8065
8331
|
device_check: NoCheck # TensorIterator
|
8066
8332
|
variants: method
|
8333
|
+
dispatch:
|
8334
|
+
CompositeExplicitAutograd: bitwise_xor_
|
8067
8335
|
tags: pointwise
|
8068
8336
|
|
8069
8337
|
- func: bitwise_xor_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -8504,6 +8772,7 @@
|
|
8504
8772
|
variants: method, function
|
8505
8773
|
dispatch:
|
8506
8774
|
QuantizedCPU: eq_quantized_cpu
|
8775
|
+
NestedTensorCPU, NestedTensorCUDA: eq_scalar_nested
|
8507
8776
|
tags: [core, pointwise]
|
8508
8777
|
|
8509
8778
|
- func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8540,6 +8809,7 @@
|
|
8540
8809
|
variants: method, function
|
8541
8810
|
dispatch:
|
8542
8811
|
QuantizedCPU: ge_quantized_cpu
|
8812
|
+
NestedTensorCPU, NestedTensorCUDA: ge_scalar_nested
|
8543
8813
|
tags: [core, pointwise]
|
8544
8814
|
|
8545
8815
|
- func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8666,6 +8936,7 @@
|
|
8666
8936
|
variants: method, function
|
8667
8937
|
dispatch:
|
8668
8938
|
QuantizedCPU: gt_quantized_cpu
|
8939
|
+
NestedTensorCPU, NestedTensorCUDA: gt_scalar_nested
|
8669
8940
|
tags: [core, pointwise]
|
8670
8941
|
|
8671
8942
|
- func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9106,6 +9377,7 @@
|
|
9106
9377
|
structured_inherits: TensorIteratorBase
|
9107
9378
|
dispatch:
|
9108
9379
|
CPU, CUDA: lgamma_out
|
9380
|
+
MPS: lgamma_out_mps
|
9109
9381
|
tags: pointwise
|
9110
9382
|
|
9111
9383
|
- func: lgamma_(Tensor(a!) self) -> Tensor(a!)
|
@@ -9126,6 +9398,7 @@
|
|
9126
9398
|
structured_inherits: TensorIteratorBase
|
9127
9399
|
dispatch:
|
9128
9400
|
CPU, CUDA: digamma_out
|
9401
|
+
MPS: digamma_out_mps
|
9129
9402
|
tags: pointwise
|
9130
9403
|
|
9131
9404
|
- func: digamma(Tensor self) -> Tensor
|
@@ -9140,6 +9413,7 @@
|
|
9140
9413
|
structured_inherits: TensorIteratorBase
|
9141
9414
|
dispatch:
|
9142
9415
|
CPU, CUDA: polygamma_out
|
9416
|
+
MPS: polygamma_out_mps
|
9143
9417
|
tags: pointwise
|
9144
9418
|
|
9145
9419
|
- func: polygamma(int n, Tensor self) -> Tensor
|
@@ -9263,7 +9537,7 @@
|
|
9263
9537
|
dispatch:
|
9264
9538
|
CPU, CUDA: atan2_out
|
9265
9539
|
MPS: atan2_out_mps
|
9266
|
-
tags: pointwise
|
9540
|
+
tags: [core, pointwise]
|
9267
9541
|
|
9268
9542
|
- func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
9269
9543
|
device_check: NoCheck # TensorIterator
|
@@ -9275,7 +9549,7 @@
|
|
9275
9549
|
device_check: NoCheck # TensorIterator
|
9276
9550
|
structured_delegate: atan2.out
|
9277
9551
|
variants: method, function
|
9278
|
-
tags: pointwise
|
9552
|
+
tags: [core, pointwise]
|
9279
9553
|
# arctan2, alias of atan2
|
9280
9554
|
|
9281
9555
|
- func: arctan2(Tensor self, Tensor other) -> Tensor
|
@@ -9464,7 +9738,7 @@
|
|
9464
9738
|
structured: True
|
9465
9739
|
structured_inherits: TensorIteratorBase
|
9466
9740
|
dispatch:
|
9467
|
-
CPU, CUDA: nextafter_out
|
9741
|
+
CPU, CUDA, MPS: nextafter_out
|
9468
9742
|
tags: pointwise
|
9469
9743
|
|
9470
9744
|
- func: nextafter(Tensor self, Tensor other) -> Tensor
|
@@ -9811,7 +10085,7 @@
|
|
9811
10085
|
- func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor
|
9812
10086
|
device_check: NoCheck # TensorIterator
|
9813
10087
|
structured_delegate: pow.Scalar_out
|
9814
|
-
tags: pointwise
|
10088
|
+
tags: [core, pointwise]
|
9815
10089
|
|
9816
10090
|
- func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)
|
9817
10091
|
device_check: NoCheck # TensorIterator
|
@@ -9954,12 +10228,14 @@
|
|
9954
10228
|
variants: function
|
9955
10229
|
dispatch:
|
9956
10230
|
CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
|
10231
|
+
CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
|
9957
10232
|
autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
|
9958
10233
|
|
9959
10234
|
- func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
|
9960
10235
|
variants: function
|
9961
10236
|
dispatch:
|
9962
10237
|
CUDA: _amp_update_scale_cuda_
|
10238
|
+
CPU: _amp_update_scale_cpu_
|
9963
10239
|
autogen: _amp_update_scale, _amp_update_scale.out
|
9964
10240
|
|
9965
10241
|
#- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
|
@@ -10020,6 +10296,21 @@
|
|
10020
10296
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
|
10021
10297
|
autogen: _foreach_add.ScalarList_out
|
10022
10298
|
|
10299
|
+
- func: _foreach_add.Tensor(Tensor[] self, Tensor other, *, Scalar alpha=1) -> Tensor[]
|
10300
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10301
|
+
variants: function
|
10302
|
+
dispatch:
|
10303
|
+
CPU: foreach_tensor_add_tensor_kernel_slow
|
10304
|
+
CUDA: foreach_tensor_add_tensor_kernel_cuda
|
10305
|
+
|
10306
|
+
- func: _foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()
|
10307
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10308
|
+
variants: function
|
10309
|
+
dispatch:
|
10310
|
+
CPU: foreach_tensor_add_tensor_kernel_slow_
|
10311
|
+
CUDA: foreach_tensor_add_tensor_kernel_cuda_
|
10312
|
+
autogen: _foreach_add.Tensor_out
|
10313
|
+
|
10023
10314
|
- func: _foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
10024
10315
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10025
10316
|
variants: function
|
@@ -10170,6 +10461,21 @@
|
|
10170
10461
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
|
10171
10462
|
autogen: _foreach_div.ScalarList_out
|
10172
10463
|
|
10464
|
+
- func: _foreach_div.Tensor(Tensor[] self, Tensor other) -> Tensor[]
|
10465
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10466
|
+
variants: function
|
10467
|
+
dispatch:
|
10468
|
+
CPU: foreach_tensor_div_tensor_kernel_slow
|
10469
|
+
CUDA: foreach_tensor_div_tensor_kernel_cuda
|
10470
|
+
|
10471
|
+
- func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
10472
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10473
|
+
variants: function
|
10474
|
+
dispatch:
|
10475
|
+
CPU: foreach_tensor_div_tensor_kernel_slow_
|
10476
|
+
CUDA: foreach_tensor_div_tensor_kernel_cuda_
|
10477
|
+
autogen: _foreach_div.Tensor_out
|
10478
|
+
|
10173
10479
|
- func: _foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
10174
10480
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10175
10481
|
variants: function
|
@@ -10990,37 +11296,44 @@
|
|
10990
11296
|
dispatch:
|
10991
11297
|
CPU: bucketize_cpu
|
10992
11298
|
CUDA: bucketize_cuda
|
11299
|
+
MPS: bucketize_mps
|
10993
11300
|
|
10994
11301
|
- func: bucketize.Tensor_out(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)
|
10995
11302
|
dispatch:
|
10996
11303
|
CPU: bucketize_out_cpu
|
10997
11304
|
CUDA: bucketize_out_cuda
|
11305
|
+
MPS: bucketize_out_mps
|
10998
11306
|
|
10999
11307
|
- func: bucketize.Scalar(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
|
11000
11308
|
dispatch:
|
11001
11309
|
CPU: bucketize_cpu
|
11002
11310
|
CUDA: bucketize_cuda
|
11311
|
+
MPS: bucketize_mps
|
11003
11312
|
autogen: bucketize.Scalar_out
|
11004
11313
|
|
11005
11314
|
- func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
|
11006
11315
|
dispatch:
|
11007
11316
|
CPU: searchsorted_cpu
|
11008
11317
|
CUDA: searchsorted_cuda
|
11318
|
+
MPS: searchsorted_mps
|
11009
11319
|
|
11010
11320
|
- func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
|
11011
11321
|
dispatch:
|
11012
11322
|
CPU: searchsorted_out_cpu
|
11013
11323
|
CUDA: searchsorted_out_cuda
|
11324
|
+
MPS: searchsorted_out_mps
|
11014
11325
|
|
11015
11326
|
- func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
|
11016
11327
|
dispatch:
|
11017
11328
|
CPU: searchsorted_cpu
|
11018
11329
|
CUDA: searchsorted_cuda
|
11330
|
+
MPS: searchsorted_mps
|
11019
11331
|
|
11020
11332
|
- func: searchsorted.Scalar_out(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
|
11021
11333
|
dispatch:
|
11022
11334
|
CPU: searchsorted_out_cpu
|
11023
11335
|
CUDA: searchsorted_out_cuda
|
11336
|
+
MPS: searchsorted_out_mps
|
11024
11337
|
|
11025
11338
|
- func: _convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor
|
11026
11339
|
structured_delegate: _convert_indices_from_coo_to_csr.out
|
@@ -11568,6 +11881,7 @@
|
|
11568
11881
|
python_module: nn
|
11569
11882
|
dispatch:
|
11570
11883
|
CPU, CUDA: softshrink_out
|
11884
|
+
MPS: softshrink_out_mps
|
11571
11885
|
|
11572
11886
|
- func: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
|
11573
11887
|
structured_delegate: softshrink.out
|
@@ -11580,6 +11894,7 @@
|
|
11580
11894
|
python_module: nn
|
11581
11895
|
dispatch:
|
11582
11896
|
CPU, CUDA: softshrink_backward_out
|
11897
|
+
MPS: softshrink_backward_out_mps
|
11583
11898
|
|
11584
11899
|
- func: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
|
11585
11900
|
structured_delegate: softshrink_backward.grad_input
|
@@ -12144,6 +12459,7 @@
|
|
12144
12459
|
dispatch:
|
12145
12460
|
CPU: upsample_linear1d_out_cpu
|
12146
12461
|
CUDA: upsample_linear1d_out_cuda
|
12462
|
+
MPS: upsample_linear1d_out_mps
|
12147
12463
|
|
12148
12464
|
- func: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
|
12149
12465
|
python_module: nn
|
@@ -12155,6 +12471,7 @@
|
|
12155
12471
|
dispatch:
|
12156
12472
|
CPU: upsample_linear1d_backward_out_cpu
|
12157
12473
|
CUDA: upsample_linear1d_backward_out_cuda
|
12474
|
+
MPS: upsample_linear1d_backward_out_mps
|
12158
12475
|
|
12159
12476
|
- func: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
|
12160
12477
|
python_module: nn
|
@@ -12482,101 +12799,101 @@
|
|
12482
12799
|
# make the operational distinction clear.
|
12483
12800
|
tags: pointwise
|
12484
12801
|
|
12485
|
-
- func: slow_conv_transpose2d.out(Tensor self, Tensor weight,
|
12802
|
+
- func: slow_conv_transpose2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
|
12486
12803
|
python_module: nn
|
12487
12804
|
structured: True
|
12488
12805
|
dispatch:
|
12489
12806
|
CPU: slow_conv_transpose2d_structured_cpu
|
12490
12807
|
CUDA: slow_conv_transpose2d_structured_cuda
|
12491
12808
|
|
12492
|
-
- func: slow_conv_transpose2d(Tensor self, Tensor weight,
|
12809
|
+
- func: slow_conv_transpose2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1) -> Tensor
|
12493
12810
|
python_module: nn
|
12494
12811
|
structured_delegate: slow_conv_transpose2d.out
|
12495
12812
|
|
12496
|
-
- func: slow_conv_transpose3d.out(Tensor self, Tensor weight,
|
12813
|
+
- func: slow_conv_transpose3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
|
12497
12814
|
python_module: nn
|
12498
12815
|
dispatch:
|
12499
12816
|
CPU: slow_conv_transpose3d_out_cpu
|
12500
12817
|
CUDA: slow_conv_transpose3d_out_cuda
|
12501
12818
|
|
12502
|
-
- func: slow_conv_transpose3d(Tensor self, Tensor weight,
|
12819
|
+
- func: slow_conv_transpose3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1) -> Tensor
|
12503
12820
|
python_module: nn
|
12504
12821
|
dispatch:
|
12505
12822
|
CPU: slow_conv_transpose3d_cpu
|
12506
12823
|
CUDA: slow_conv_transpose3d_cuda
|
12507
12824
|
|
12508
|
-
- func: thnn_conv2d.out(Tensor self, Tensor weight,
|
12825
|
+
- func: thnn_conv2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
|
12509
12826
|
python_module: nn
|
12510
12827
|
|
12511
|
-
- func: thnn_conv2d(Tensor self, Tensor weight,
|
12828
|
+
- func: thnn_conv2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0) -> Tensor
|
12512
12829
|
python_module: nn
|
12513
12830
|
|
12514
|
-
- func: _slow_conv2d_forward.output(Tensor self, Tensor weight,
|
12831
|
+
- func: _slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!)
|
12515
12832
|
python_module: nn
|
12516
12833
|
dispatch:
|
12517
12834
|
CPU: slow_conv2d_forward_out_cpu
|
12518
12835
|
CUDA: slow_conv2d_forward_out_cuda
|
12519
12836
|
|
12520
|
-
- func: _slow_conv2d_forward(Tensor self, Tensor weight,
|
12837
|
+
- func: _slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor
|
12521
12838
|
python_module: nn
|
12522
12839
|
dispatch:
|
12523
12840
|
CPU: slow_conv2d_forward_cpu
|
12524
12841
|
CUDA: slow_conv2d_forward_cuda
|
12525
12842
|
|
12526
|
-
- func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight,
|
12843
|
+
- func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
|
12527
12844
|
python_module: nn
|
12528
12845
|
dispatch:
|
12529
12846
|
CPU: slow_conv2d_backward_out_cpu
|
12530
12847
|
CUDA: slow_conv2d_backward_out_cuda
|
12531
12848
|
|
12532
|
-
- func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight,
|
12849
|
+
- func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
|
12533
12850
|
python_module: nn
|
12534
12851
|
dispatch:
|
12535
12852
|
CPU: slow_conv2d_backward_cpu
|
12536
12853
|
CUDA: slow_conv2d_backward_cuda
|
12537
12854
|
autogen: _slow_conv2d_backward.output_mask_out
|
12538
12855
|
|
12539
|
-
- func: _conv_depthwise2d.out(Tensor self, Tensor weight,
|
12856
|
+
- func: _conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
|
12540
12857
|
use_const_ref_for_mutable_tensors: True
|
12541
12858
|
python_module: nn
|
12542
12859
|
dispatch:
|
12543
12860
|
CUDA: conv_depthwise2d_cuda_out
|
12544
12861
|
|
12545
|
-
- func: _conv_depthwise2d(Tensor self, Tensor weight,
|
12862
|
+
- func: _conv_depthwise2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation) -> Tensor
|
12546
12863
|
python_module: nn
|
12547
12864
|
dispatch:
|
12548
12865
|
CUDA: conv_depthwise2d_cuda
|
12549
12866
|
|
12550
|
-
- func: conv_depthwise3d(Tensor self, Tensor weight,
|
12867
|
+
- func: conv_depthwise3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, SymInt[3] dilation) -> Tensor
|
12551
12868
|
python_module: nn
|
12552
12869
|
dispatch:
|
12553
12870
|
CUDA: conv_depthwise3d_cuda
|
12554
12871
|
autogen: conv_depthwise3d.out
|
12555
12872
|
|
12556
|
-
- func: slow_conv3d.out(Tensor self, Tensor weight,
|
12873
|
+
- func: slow_conv3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
|
12557
12874
|
python_module: nn
|
12558
12875
|
|
12559
|
-
- func: slow_conv3d(Tensor self, Tensor weight,
|
12876
|
+
- func: slow_conv3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0) -> Tensor
|
12560
12877
|
python_module: nn
|
12561
12878
|
|
12562
|
-
- func: slow_conv3d_forward.output(Tensor self, Tensor weight,
|
12879
|
+
- func: slow_conv3d_forward.output(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)
|
12563
12880
|
python_module: nn
|
12564
12881
|
dispatch:
|
12565
12882
|
CPU: slow_conv3d_forward_out_cpu
|
12566
12883
|
|
12567
|
-
- func: slow_conv3d_forward(Tensor self, Tensor weight,
|
12884
|
+
- func: slow_conv3d_forward(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding) -> Tensor
|
12568
12885
|
python_module: nn
|
12569
12886
|
dispatch:
|
12570
12887
|
CPU: slow_conv3d_forward_cpu
|
12571
12888
|
|
12572
|
-
- func: slow_conv_dilated2d(Tensor self, Tensor weight,
|
12889
|
+
- func: slow_conv_dilated2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1) -> Tensor
|
12573
12890
|
python_module: nn
|
12574
12891
|
dispatch:
|
12575
12892
|
CPU: slow_conv_dilated2d_cpu
|
12576
12893
|
CUDA: slow_conv_dilated2d_cuda
|
12577
12894
|
autogen: slow_conv_dilated2d.out
|
12578
12895
|
|
12579
|
-
- func: slow_conv_dilated3d(Tensor self, Tensor weight,
|
12896
|
+
- func: slow_conv_dilated3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1) -> Tensor
|
12580
12897
|
python_module: nn
|
12581
12898
|
dispatch:
|
12582
12899
|
CPU: slow_conv_dilated3d_cpu
|
@@ -12627,7 +12944,7 @@
|
|
12627
12944
|
SparseMeta: isinf_sparse_meta
|
12628
12945
|
SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
|
12629
12946
|
autogen: isinf.out
|
12630
|
-
tags: core
|
12947
|
+
tags: [core, pointwise]
|
12631
12948
|
|
12632
12949
|
- func: record_stream(Tensor(a!) self, Stream s) -> ()
|
12633
12950
|
variants: method
|
@@ -13553,11 +13870,18 @@
|
|
13553
13870
|
dispatch:
|
13554
13871
|
CPU, CUDA: linalg_eig_out
|
13555
13872
|
|
13873
|
+
- func: _linalg_eigvals(Tensor self) -> Tensor
|
13874
|
+
python_module: linalg
|
13875
|
+
dispatch:
|
13876
|
+
CPU, CUDA: _linalg_eigvals
|
13877
|
+
|
13556
13878
|
- func: linalg_eigvals(Tensor self) -> Tensor
|
13557
13879
|
python_module: linalg
|
13558
13880
|
|
13559
13881
|
- func: linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
13560
13882
|
python_module: linalg
|
13883
|
+
dispatch:
|
13884
|
+
CPU, CUDA: linalg_eigvals_out
|
13561
13885
|
|
13562
13886
|
# This function is exposes the `compute_v` flag, which is then used to implement `linalg.eigh` and
|
13563
13887
|
# `linalg.eigvalsh` as composite functions that call this one
|
@@ -13861,6 +14185,12 @@
|
|
13861
14185
|
# It is undocumented and should not be used outside of tests.
|
13862
14186
|
- func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
|
13863
14187
|
|
14188
|
+
# Note: for testing COW materialization within `at::parallel_for` loop function
|
14189
|
+
- func: _test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor
|
14190
|
+
variants: function
|
14191
|
+
dispatch:
|
14192
|
+
CompositeExplicitAutograd: _test_parallel_materialize
|
14193
|
+
|
13864
14194
|
# Note: this function is only for testing.
|
13865
14195
|
- func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
|
13866
14196
|
python_module: nn
|
@@ -14195,6 +14525,7 @@
|
|
14195
14525
|
variants: function
|
14196
14526
|
dispatch:
|
14197
14527
|
CompositeExplicitAutograd: split_with_sizes_copy_out
|
14528
|
+
CUDA: split_with_sizes_copy_out_cuda
|
14198
14529
|
|
14199
14530
|
- func: view_copy(Tensor self, SymInt[] size) -> Tensor
|
14200
14531
|
variants: function
|
@@ -14269,19 +14600,29 @@
|
|
14269
14600
|
variants: function
|
14270
14601
|
tags: nondeterministic_seeded
|
14271
14602
|
|
14272
|
-
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor
|
14603
|
+
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14273
14604
|
dispatch:
|
14274
|
-
CPU: _scaled_dot_product_flash_attention_cpu
|
14275
14605
|
CUDA: _scaled_dot_product_flash_attention_cuda
|
14276
14606
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
|
14277
14607
|
tags: nondeterministic_seeded
|
14278
14608
|
|
14279
|
-
- func:
|
14609
|
+
- func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
|
14610
|
+
dispatch:
|
14611
|
+
CPU: _scaled_dot_product_flash_attention_cpu
|
14612
|
+
tags: nondeterministic_seeded
|
14613
|
+
|
14614
|
+
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14280
14615
|
device_check: NoCheck
|
14281
14616
|
variants: function
|
14282
14617
|
dispatch:
|
14283
|
-
CPU: _scaled_dot_product_flash_attention_backward_cpu
|
14284
14618
|
CUDA: _scaled_dot_product_flash_attention_backward_cuda
|
14619
|
+
NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
|
14620
|
+
|
14621
|
+
- func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14622
|
+
device_check: NoCheck
|
14623
|
+
variants: function
|
14624
|
+
dispatch:
|
14625
|
+
CPU: _scaled_dot_product_flash_attention_cpu_backward
|
14285
14626
|
|
14286
14627
|
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
|
14287
14628
|
dispatch:
|
@@ -14295,26 +14636,31 @@
|
|
14295
14636
|
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
|
14296
14637
|
tags: nondeterministic_seeded
|
14297
14638
|
|
14298
|
-
- func:
|
14639
|
+
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
|
14640
|
+
dispatch:
|
14641
|
+
CUDA: _scaled_dot_product_cudnn_attention_cuda
|
14642
|
+
tags: nondeterministic_seeded
|
14643
|
+
|
14644
|
+
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14299
14645
|
variants: function
|
14300
14646
|
dispatch:
|
14301
14647
|
CUDA: _flash_attention_forward
|
14302
14648
|
tags: nondeterministic_seeded
|
14303
14649
|
|
14304
|
-
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k,
|
14650
|
+
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14305
14651
|
device_check: NoCheck
|
14306
14652
|
variants: function
|
14307
14653
|
dispatch:
|
14308
14654
|
CUDA: _flash_attention_backward
|
14309
14655
|
|
14310
|
-
# Returns
|
14311
|
-
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
|
14656
|
+
# Returns output, logsumexp if compute_logsumexp
|
14657
|
+
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, int? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
|
14312
14658
|
variants: function
|
14313
14659
|
dispatch:
|
14314
14660
|
CUDA: _efficient_attention_forward
|
14315
14661
|
tags: nondeterministic_seeded
|
14316
14662
|
|
14317
|
-
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k,
|
14663
|
+
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
|
14318
14664
|
device_check: NoCheck
|
14319
14665
|
variants: function
|
14320
14666
|
dispatch:
|
@@ -14422,12 +14768,16 @@
|
|
14422
14768
|
tags: pointwise
|
14423
14769
|
|
14424
14770
|
- func: special_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
|
14771
|
+
dispatch:
|
14772
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_t
|
14425
14773
|
device_check: NoCheck
|
14426
14774
|
python_module: special
|
14427
14775
|
variants: function
|
14428
14776
|
tags: pointwise
|
14429
14777
|
|
14430
14778
|
- func: special_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
|
14779
|
+
dispatch:
|
14780
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_t
|
14431
14781
|
device_check: NoCheck
|
14432
14782
|
python_module: special
|
14433
14783
|
variants: function
|
@@ -14444,6 +14794,8 @@
|
|
14444
14794
|
tags: pointwise
|
14445
14795
|
|
14446
14796
|
- func: special_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
14797
|
+
dispatch:
|
14798
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_t_out
|
14447
14799
|
device_check: NoCheck
|
14448
14800
|
python_module: special
|
14449
14801
|
variants: function
|
@@ -14465,12 +14817,16 @@
|
|
14465
14817
|
tags: pointwise
|
14466
14818
|
|
14467
14819
|
- func: special_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
|
14820
|
+
dispatch:
|
14821
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_u
|
14468
14822
|
device_check: NoCheck
|
14469
14823
|
python_module: special
|
14470
14824
|
variants: function
|
14471
14825
|
tags: pointwise
|
14472
14826
|
|
14473
14827
|
- func: special_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
|
14828
|
+
dispatch:
|
14829
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_u
|
14474
14830
|
device_check: NoCheck
|
14475
14831
|
python_module: special
|
14476
14832
|
variants: function
|
@@ -14487,6 +14843,8 @@
|
|
14487
14843
|
tags: pointwise
|
14488
14844
|
|
14489
14845
|
- func: special_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
14846
|
+
dispatch:
|
14847
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_u_out
|
14490
14848
|
device_check: NoCheck
|
14491
14849
|
python_module: special
|
14492
14850
|
variants: function
|
@@ -14508,12 +14866,16 @@
|
|
14508
14866
|
tags: pointwise
|
14509
14867
|
|
14510
14868
|
- func: special_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
|
14869
|
+
dispatch:
|
14870
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_v
|
14511
14871
|
device_check: NoCheck
|
14512
14872
|
python_module: special
|
14513
14873
|
variants: function
|
14514
14874
|
tags: pointwise
|
14515
14875
|
|
14516
14876
|
- func: special_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
|
14877
|
+
dispatch:
|
14878
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_v
|
14517
14879
|
device_check: NoCheck
|
14518
14880
|
python_module: special
|
14519
14881
|
variants: function
|
@@ -14530,6 +14892,8 @@
|
|
14530
14892
|
tags: pointwise
|
14531
14893
|
|
14532
14894
|
- func: special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
14895
|
+
dispatch:
|
14896
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_v_out
|
14533
14897
|
device_check: NoCheck
|
14534
14898
|
python_module: special
|
14535
14899
|
variants: function
|
@@ -14551,12 +14915,16 @@
|
|
14551
14915
|
tags: pointwise
|
14552
14916
|
|
14553
14917
|
- func: special_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
|
14918
|
+
dispatch:
|
14919
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_w
|
14554
14920
|
device_check: NoCheck
|
14555
14921
|
python_module: special
|
14556
14922
|
variants: function
|
14557
14923
|
tags: pointwise
|
14558
14924
|
|
14559
14925
|
- func: special_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
|
14926
|
+
dispatch:
|
14927
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_w
|
14560
14928
|
device_check: NoCheck
|
14561
14929
|
python_module: special
|
14562
14930
|
variants: function
|
@@ -14573,6 +14941,8 @@
|
|
14573
14941
|
tags: pointwise
|
14574
14942
|
|
14575
14943
|
- func: special_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
14944
|
+
dispatch:
|
14945
|
+
CompositeExplicitAutograd: special_chebyshev_polynomial_w_out
|
14576
14946
|
device_check: NoCheck
|
14577
14947
|
python_module: special
|
14578
14948
|
variants: function
|
@@ -14594,12 +14964,16 @@
|
|
14594
14964
|
tags: pointwise
|
14595
14965
|
|
14596
14966
|
- func: special_hermite_polynomial_h.x_scalar(Scalar x, Tensor n) -> Tensor
|
14967
|
+
dispatch:
|
14968
|
+
CompositeExplicitAutograd: special_hermite_polynomial_h
|
14597
14969
|
device_check: NoCheck
|
14598
14970
|
python_module: special
|
14599
14971
|
variants: function
|
14600
14972
|
tags: pointwise
|
14601
14973
|
|
14602
14974
|
- func: special_hermite_polynomial_h.n_scalar(Tensor x, Scalar n) -> Tensor
|
14975
|
+
dispatch:
|
14976
|
+
CompositeExplicitAutograd: special_hermite_polynomial_h
|
14603
14977
|
device_check: NoCheck
|
14604
14978
|
python_module: special
|
14605
14979
|
variants: function
|
@@ -14616,6 +14990,8 @@
|
|
14616
14990
|
tags: pointwise
|
14617
14991
|
|
14618
14992
|
- func: special_hermite_polynomial_h.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
14993
|
+
dispatch:
|
14994
|
+
CompositeExplicitAutograd: special_hermite_polynomial_h_out
|
14619
14995
|
device_check: NoCheck
|
14620
14996
|
python_module: special
|
14621
14997
|
variants: function
|
@@ -14637,12 +15013,16 @@
|
|
14637
15013
|
tags: pointwise
|
14638
15014
|
|
14639
15015
|
- func: special_hermite_polynomial_he.x_scalar(Scalar x, Tensor n) -> Tensor
|
15016
|
+
dispatch:
|
15017
|
+
CompositeExplicitAutograd: special_hermite_polynomial_he
|
14640
15018
|
device_check: NoCheck
|
14641
15019
|
python_module: special
|
14642
15020
|
variants: function
|
14643
15021
|
tags: pointwise
|
14644
15022
|
|
14645
15023
|
- func: special_hermite_polynomial_he.n_scalar(Tensor x, Scalar n) -> Tensor
|
15024
|
+
dispatch:
|
15025
|
+
CompositeExplicitAutograd: special_hermite_polynomial_he
|
14646
15026
|
device_check: NoCheck
|
14647
15027
|
python_module: special
|
14648
15028
|
variants: function
|
@@ -14659,6 +15039,8 @@
|
|
14659
15039
|
tags: pointwise
|
14660
15040
|
|
14661
15041
|
- func: special_hermite_polynomial_he.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15042
|
+
dispatch:
|
15043
|
+
CompositeExplicitAutograd: special_hermite_polynomial_he_out
|
14662
15044
|
device_check: NoCheck
|
14663
15045
|
python_module: special
|
14664
15046
|
variants: function
|
@@ -14680,12 +15062,16 @@
|
|
14680
15062
|
tags: pointwise
|
14681
15063
|
|
14682
15064
|
- func: special_laguerre_polynomial_l.x_scalar(Scalar x, Tensor n) -> Tensor
|
15065
|
+
dispatch:
|
15066
|
+
CompositeExplicitAutograd: special_laguerre_polynomial_l
|
14683
15067
|
device_check: NoCheck
|
14684
15068
|
python_module: special
|
14685
15069
|
variants: function
|
14686
15070
|
tags: pointwise
|
14687
15071
|
|
14688
15072
|
- func: special_laguerre_polynomial_l.n_scalar(Tensor x, Scalar n) -> Tensor
|
15073
|
+
dispatch:
|
15074
|
+
CompositeExplicitAutograd: special_laguerre_polynomial_l
|
14689
15075
|
device_check: NoCheck
|
14690
15076
|
python_module: special
|
14691
15077
|
variants: function
|
@@ -14702,6 +15088,8 @@
|
|
14702
15088
|
tags: pointwise
|
14703
15089
|
|
14704
15090
|
- func: special_laguerre_polynomial_l.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15091
|
+
dispatch:
|
15092
|
+
CompositeExplicitAutograd: special_laguerre_polynomial_l_out
|
14705
15093
|
device_check: NoCheck
|
14706
15094
|
python_module: special
|
14707
15095
|
variants: function
|
@@ -14723,12 +15111,16 @@
|
|
14723
15111
|
tags: pointwise
|
14724
15112
|
|
14725
15113
|
- func: special_legendre_polynomial_p.x_scalar(Scalar x, Tensor n) -> Tensor
|
15114
|
+
dispatch:
|
15115
|
+
CompositeExplicitAutograd: special_legendre_polynomial_p
|
14726
15116
|
device_check: NoCheck
|
14727
15117
|
python_module: special
|
14728
15118
|
variants: function
|
14729
15119
|
tags: pointwise
|
14730
15120
|
|
14731
15121
|
- func: special_legendre_polynomial_p.n_scalar(Tensor x, Scalar n) -> Tensor
|
15122
|
+
dispatch:
|
15123
|
+
CompositeExplicitAutograd: special_legendre_polynomial_p
|
14732
15124
|
device_check: NoCheck
|
14733
15125
|
python_module: special
|
14734
15126
|
variants: function
|
@@ -14745,6 +15137,8 @@
|
|
14745
15137
|
tags: pointwise
|
14746
15138
|
|
14747
15139
|
- func: special_legendre_polynomial_p.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15140
|
+
dispatch:
|
15141
|
+
CompositeExplicitAutograd: special_legendre_polynomial_p_out
|
14748
15142
|
device_check: NoCheck
|
14749
15143
|
python_module: special
|
14750
15144
|
variants: function
|
@@ -14856,12 +15250,16 @@
|
|
14856
15250
|
tags: pointwise
|
14857
15251
|
|
14858
15252
|
- func: special_shifted_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
|
15253
|
+
dispatch:
|
15254
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
|
14859
15255
|
device_check: NoCheck
|
14860
15256
|
python_module: special
|
14861
15257
|
variants: function
|
14862
15258
|
tags: pointwise
|
14863
15259
|
|
14864
15260
|
- func: special_shifted_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
|
15261
|
+
dispatch:
|
15262
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
|
14865
15263
|
device_check: NoCheck
|
14866
15264
|
python_module: special
|
14867
15265
|
variants: function
|
@@ -14878,6 +15276,8 @@
|
|
14878
15276
|
tags: pointwise
|
14879
15277
|
|
14880
15278
|
- func: special_shifted_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15279
|
+
dispatch:
|
15280
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t_out
|
14881
15281
|
device_check: NoCheck
|
14882
15282
|
python_module: special
|
14883
15283
|
variants: function
|
@@ -14899,12 +15299,16 @@
|
|
14899
15299
|
tags: pointwise
|
14900
15300
|
|
14901
15301
|
- func: special_shifted_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
|
15302
|
+
dispatch:
|
15303
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
|
14902
15304
|
device_check: NoCheck
|
14903
15305
|
python_module: special
|
14904
15306
|
variants: function
|
14905
15307
|
tags: pointwise
|
14906
15308
|
|
14907
15309
|
- func: special_shifted_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
|
15310
|
+
dispatch:
|
15311
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
|
14908
15312
|
device_check: NoCheck
|
14909
15313
|
python_module: special
|
14910
15314
|
variants: function
|
@@ -14921,6 +15325,8 @@
|
|
14921
15325
|
tags: pointwise
|
14922
15326
|
|
14923
15327
|
- func: special_shifted_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15328
|
+
dispatch:
|
15329
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u_out
|
14924
15330
|
device_check: NoCheck
|
14925
15331
|
python_module: special
|
14926
15332
|
variants: function
|
@@ -14942,12 +15348,16 @@
|
|
14942
15348
|
tags: pointwise
|
14943
15349
|
|
14944
15350
|
- func: special_shifted_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
|
15351
|
+
dispatch:
|
15352
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
|
14945
15353
|
device_check: NoCheck
|
14946
15354
|
python_module: special
|
14947
15355
|
variants: function
|
14948
15356
|
tags: pointwise
|
14949
15357
|
|
14950
15358
|
- func: special_shifted_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
|
15359
|
+
dispatch:
|
15360
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
|
14951
15361
|
device_check: NoCheck
|
14952
15362
|
python_module: special
|
14953
15363
|
variants: function
|
@@ -14964,6 +15374,8 @@
|
|
14964
15374
|
tags: pointwise
|
14965
15375
|
|
14966
15376
|
- func: special_shifted_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15377
|
+
dispatch:
|
15378
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v_out
|
14967
15379
|
device_check: NoCheck
|
14968
15380
|
python_module: special
|
14969
15381
|
variants: function
|
@@ -14985,12 +15397,16 @@
|
|
14985
15397
|
tags: pointwise
|
14986
15398
|
|
14987
15399
|
- func: special_shifted_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
|
15400
|
+
dispatch:
|
15401
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
|
14988
15402
|
device_check: NoCheck
|
14989
15403
|
python_module: special
|
14990
15404
|
variants: function
|
14991
15405
|
tags: pointwise
|
14992
15406
|
|
14993
15407
|
- func: special_shifted_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
|
15408
|
+
dispatch:
|
15409
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
|
14994
15410
|
device_check: NoCheck
|
14995
15411
|
python_module: special
|
14996
15412
|
variants: function
|
@@ -15007,6 +15423,8 @@
|
|
15007
15423
|
tags: pointwise
|
15008
15424
|
|
15009
15425
|
- func: special_shifted_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15426
|
+
dispatch:
|
15427
|
+
CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w_out
|
15010
15428
|
device_check: NoCheck
|
15011
15429
|
python_module: special
|
15012
15430
|
variants: function
|
@@ -15075,6 +15493,22 @@
|
|
15075
15493
|
CUDA: _fused_adamw_kernel_cuda_
|
15076
15494
|
autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
|
15077
15495
|
|
15496
|
+
- func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15497
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15498
|
+
variants: function
|
15499
|
+
dispatch:
|
15500
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15501
|
+
autogen: _fused_sgd, _fused_sgd.out
|
15502
|
+
|
15503
|
+
- func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15504
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15505
|
+
# but still skip the device check as the Tensor LR can be on CPU
|
15506
|
+
device_check: NoCheck
|
15507
|
+
variants: function
|
15508
|
+
dispatch:
|
15509
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15510
|
+
autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
|
15511
|
+
|
15078
15512
|
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15079
15513
|
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15080
15514
|
variants: function
|