torch-rb 0.14.1 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +4 -6
- data/codegen/native_functions.yaml +552 -118
- data/ext/torch/extconf.rb +3 -0
- data/ext/torch/templates.h +0 -23
- data/ext/torch/tensor.cpp +1 -0
- data/ext/torch/utils.h +1 -1
- data/lib/torch/inspector.rb +8 -3
- data/lib/torch/nn/elu.rb +20 -0
- data/lib/torch/nn/functional.rb +12 -0
- data/lib/torch/nn/gelu.rb +18 -0
- data/lib/torch/nn/leaky_relu.rb +1 -1
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +2 -0
- metadata +6 -11
- data/ext/torch/fft_functions.h +0 -6
- data/ext/torch/linalg_functions.h +0 -6
- data/ext/torch/nn_functions.h +0 -6
- data/ext/torch/sparse_functions.h +0 -6
- data/ext/torch/special_functions.h +0 -6
- data/ext/torch/tensor_functions.h +0 -6
- data/ext/torch/torch_functions.h +0 -6
| @@ -134,7 +134,7 @@ | |
| 134 134 | 
             
              autogen: _new_zeros_with_same_feature_meta.out
         | 
| 135 135 |  | 
| 136 136 | 
             
            # This function compares the storage numel of self with that of other, where
         | 
| 137 | 
            -
            # storage numel is  | 
| 137 | 
            +
            # storage numel is computed as: `other.storage().nbytes() / other.itemsize()`.
         | 
| 138 138 | 
             
            # We create this function for composite compliance purposes. The batching rule
         | 
| 139 139 | 
             
            # always returns true because vmapped as_strided does not support accessing
         | 
| 140 140 | 
             
            # storage locations not indexable by the input tensor.
         | 
| @@ -175,17 +175,29 @@ | |
| 175 175 | 
             
                CPU: _assert_async_msg_cpu
         | 
| 176 176 | 
             
                CUDA: _assert_async_msg_cuda
         | 
| 177 177 |  | 
| 178 | 
            +
            - func: _assert_scalar(Scalar self, str assert_msg) -> ()
         | 
| 179 | 
            +
              dispatch:
         | 
| 180 | 
            +
                CompositeExplicitAutograd: _assert_scalar
         | 
| 181 | 
            +
             | 
| 182 | 
            +
            - func: _functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor
         | 
| 183 | 
            +
              dispatch:
         | 
| 184 | 
            +
                CompositeExplicitAutograd: _functional_assert_scalar
         | 
| 185 | 
            +
             | 
| 178 186 | 
             
            - func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
         | 
| 179 187 | 
             
              dispatch:
         | 
| 180 188 | 
             
                CPU: _functional_assert_async_msg_cpu
         | 
| 181 189 |  | 
| 182 190 | 
             
            - func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
         | 
| 183 191 |  | 
| 192 | 
            +
            - func: _print(str s) -> ()
         | 
| 193 | 
            +
              dispatch:
         | 
| 194 | 
            +
                CompositeExplicitAutograd: _print
         | 
| 195 | 
            +
             | 
| 184 196 | 
             
            - func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
         | 
| 185 197 | 
             
              dispatch:
         | 
| 186 198 | 
             
                CompositeExplicitAutograd: sym_constrain_range
         | 
| 187 199 |  | 
| 188 | 
            -
            - func: sym_constrain_range_for_size(Scalar size, *, int? min, int? max) -> ()
         | 
| 200 | 
            +
            - func: sym_constrain_range_for_size(Scalar size, *, int? min=None, int? max=None) -> ()
         | 
| 189 201 | 
             
              dispatch:
         | 
| 190 202 | 
             
                CompositeExplicitAutograd: sym_constrain_range_for_size
         | 
| 191 203 |  | 
| @@ -431,6 +443,7 @@ | |
| 431 443 | 
             
              structured_inherits: TensorIteratorBase
         | 
| 432 444 | 
             
              dispatch:
         | 
| 433 445 | 
             
                CPU, CUDA: sgn_out
         | 
| 446 | 
            +
                MPS: sgn_out_mps
         | 
| 434 447 | 
             
                SparseCPU, SparseCUDA: sgn_sparse_out
         | 
| 435 448 | 
             
                SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
         | 
| 436 449 | 
             
              tags: pointwise
         | 
| @@ -469,6 +482,7 @@ | |
| 469 482 | 
             
            - func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 470 483 | 
             
              dispatch:
         | 
| 471 484 | 
             
                CPU, CUDA: conj_physical_out
         | 
| 485 | 
            +
                MPS: conj_physical_out_mps
         | 
| 472 486 | 
             
                SparseCPU, SparseCUDA: conj_physical_out_sparse
         | 
| 473 487 | 
             
                SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
         | 
| 474 488 | 
             
              tags: pointwise
         | 
| @@ -563,8 +577,8 @@ | |
| 563 577 | 
             
              dispatch:
         | 
| 564 578 | 
             
                SparseCPU: add_out_sparse_cpu
         | 
| 565 579 | 
             
                SparseCUDA: add_out_sparse_cuda
         | 
| 566 | 
            -
                SparseCsrCPU:  | 
| 567 | 
            -
                SparseCsrCUDA:  | 
| 580 | 
            +
                SparseCsrCPU: add_out_sparse_compressed_cpu
         | 
| 581 | 
            +
                SparseCsrCUDA: add_out_sparse_compressed_cuda
         | 
| 568 582 | 
             
                MkldnnCPU: mkldnn_add_out
         | 
| 569 583 | 
             
                MPS: add_out_mps
         | 
| 570 584 | 
             
              tags: pointwise
         | 
| @@ -681,15 +695,29 @@ | |
| 681 695 | 
             
              structured_delegate: all.out
         | 
| 682 696 | 
             
              variants: function, method
         | 
| 683 697 |  | 
| 698 | 
            +
            - func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
         | 
| 699 | 
            +
              device_check: NoCheck   # TensorIterator
         | 
| 700 | 
            +
              structured_delegate: all.dims_out
         | 
| 701 | 
            +
              variants: function, method
         | 
| 702 | 
            +
              cpp_no_default_args: ['dim']
         | 
| 703 | 
            +
              dispatch:
         | 
| 704 | 
            +
                CompositeExplicitAutograd: all_dims_default
         | 
| 705 | 
            +
             | 
| 684 706 | 
             
            - func: all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 685 707 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 686 708 | 
             
              structured: True
         | 
| 687 | 
            -
              precomputed:
         | 
| 688 | 
            -
              - dim -> int dim
         | 
| 689 709 | 
             
              dispatch:
         | 
| 690 710 | 
             
                CPU, CUDA: all_out
         | 
| 691 711 | 
             
                MPS: all_out_mps
         | 
| 692 712 |  | 
| 713 | 
            +
            - func: all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 714 | 
            +
              device_check: NoCheck   # TensorIterator
         | 
| 715 | 
            +
              structured: True
         | 
| 716 | 
            +
              dispatch:
         | 
| 717 | 
            +
                CPU, CUDA: all_dims_out
         | 
| 718 | 
            +
                CompositeExplicitAutograd: all_dims_out_default
         | 
| 719 | 
            +
              cpp_no_default_args: ['dim']
         | 
| 720 | 
            +
             | 
| 693 721 | 
             
            - func: all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
         | 
| 694 722 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 695 723 | 
             
              variants: function, method
         | 
| @@ -709,15 +737,30 @@ | |
| 709 737 | 
             
              variants: function, method
         | 
| 710 738 | 
             
              tags: core
         | 
| 711 739 |  | 
| 740 | 
            +
            - func: any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
         | 
| 741 | 
            +
              device_check: NoCheck   # TensorIterator
         | 
| 742 | 
            +
              structured_delegate: any.dims_out
         | 
| 743 | 
            +
              variants: function, method
         | 
| 744 | 
            +
              cpp_no_default_args: ['dim']
         | 
| 745 | 
            +
              tags: core
         | 
| 746 | 
            +
              dispatch:
         | 
| 747 | 
            +
                CompositeExplicitAutograd: any_dims_default
         | 
| 748 | 
            +
             | 
| 712 749 | 
             
            - func: any.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 713 750 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 714 751 | 
             
              structured: True
         | 
| 715 | 
            -
              precomputed:
         | 
| 716 | 
            -
              - dim -> int dim
         | 
| 717 752 | 
             
              dispatch:
         | 
| 718 753 | 
             
                CPU, CUDA: any_out
         | 
| 719 754 | 
             
                MPS: any_out_mps
         | 
| 720 755 |  | 
| 756 | 
            +
            - func: any.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 757 | 
            +
              device_check: NoCheck   # TensorIterator
         | 
| 758 | 
            +
              structured: True
         | 
| 759 | 
            +
              dispatch:
         | 
| 760 | 
            +
                CPU, CUDA: any_dims_out
         | 
| 761 | 
            +
                CompositeExplicitAutograd: any_dims_out_default
         | 
| 762 | 
            +
              cpp_no_default_args: ['dim']
         | 
| 763 | 
            +
             | 
| 721 764 | 
             
            - func: any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
         | 
| 722 765 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 723 766 | 
             
              variants: function, method
         | 
| @@ -733,7 +776,7 @@ | |
| 733 776 | 
             
              dispatch:
         | 
| 734 777 | 
             
                CompositeExplicitAutograd: arange
         | 
| 735 778 |  | 
| 736 | 
            -
            # This operator should be named ` | 
| 779 | 
            +
            # This operator should be named `arange.start_out` if following the naming convention. However that
         | 
| 737 780 | 
             
            # name is already taken. Disabled because of CI job failures.
         | 
| 738 781 | 
             
            # FIXME: enable this
         | 
| 739 782 | 
             
            #- func: arange.start_out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)
         | 
| @@ -1190,6 +1233,13 @@ | |
| 1190 1233 | 
             
                CompositeExplicitAutograd: copysign_out
         | 
| 1191 1234 | 
             
              tags: pointwise
         | 
| 1192 1235 |  | 
| 1236 | 
            +
            - func: _lazy_clone(Tensor self) -> Tensor
         | 
| 1237 | 
            +
              # Like clone, but the copy takes place lazily, only if either the
         | 
| 1238 | 
            +
              # input or the output are written.
         | 
| 1239 | 
            +
              variants: function, method
         | 
| 1240 | 
            +
              dispatch:
         | 
| 1241 | 
            +
                CompositeExplicitAutograd: _lazy_clone
         | 
| 1242 | 
            +
             | 
| 1193 1243 | 
             
            - func: logical_not(Tensor self) -> Tensor
         | 
| 1194 1244 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 1195 1245 | 
             
              variants: function, method
         | 
| @@ -1326,6 +1376,7 @@ | |
| 1326 1376 | 
             
              dispatch:
         | 
| 1327 1377 | 
             
                SparseCPU, SparseCUDA: cat_sparse
         | 
| 1328 1378 | 
             
                QuantizedCPU: cat_quantized_cpu
         | 
| 1379 | 
            +
                NestedTensorCPU, NestedTensorCUDA: cat_nested
         | 
| 1329 1380 | 
             
              tags: core
         | 
| 1330 1381 |  | 
| 1331 1382 | 
             
            - func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
         | 
| @@ -1590,6 +1641,7 @@ | |
| 1590 1641 | 
             
            - func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 1591 1642 | 
             
              dispatch:
         | 
| 1592 1643 | 
             
                CPU, CUDA: complex_out
         | 
| 1644 | 
            +
                MPS: complex_out_mps
         | 
| 1593 1645 |  | 
| 1594 1646 | 
             
            - func: polar(Tensor abs, Tensor angle) -> Tensor
         | 
| 1595 1647 | 
             
              variants: function
         | 
| @@ -1613,59 +1665,67 @@ | |
| 1613 1665 | 
             
              variants: method
         | 
| 1614 1666 | 
             
              manual_cpp_binding: True
         | 
| 1615 1667 |  | 
| 1616 | 
            -
            - func: convolution(Tensor input, Tensor weight, Tensor? bias,  | 
| 1668 | 
            +
            - func: convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
         | 
| 1617 1669 | 
             
              dispatch:
         | 
| 1618 1670 | 
             
                CompositeExplicitAutograd: convolution
         | 
| 1619 1671 | 
             
              autogen: convolution.out
         | 
| 1620 1672 | 
             
              tags: core
         | 
| 1621 1673 |  | 
| 1622 | 
            -
            - func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes,  | 
| 1674 | 
            +
            - func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
         | 
| 1623 1675 | 
             
              dispatch:
         | 
| 1624 1676 | 
             
                CompositeExplicitAutograd, CUDA: convolution_backward
         | 
| 1625 1677 | 
             
              autogen: convolution_backward.out
         | 
| 1626 1678 | 
             
              tags: core
         | 
| 1627 1679 |  | 
| 1628 | 
            -
            - func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias,  | 
| 1680 | 
            +
            - func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
         | 
| 1629 1681 | 
             
              dispatch:
         | 
| 1630 1682 | 
             
                CompositeExplicitAutograd: convolution_overrideable
         | 
| 1631 1683 | 
             
              autogen: convolution_overrideable.out
         | 
| 1632 1684 |  | 
| 1633 | 
            -
            - func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight,  | 
| 1685 | 
            +
            - func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
         | 
| 1634 1686 | 
             
              dispatch:
         | 
| 1635 1687 | 
             
                CompositeExplicitAutograd: convolution_backward_overrideable
         | 
| 1636 1688 | 
             
              autogen: convolution_backward_overrideable.out
         | 
| 1637 1689 |  | 
| 1638 | 
            -
            - func: _convolution(Tensor input, Tensor weight, Tensor? bias,  | 
| 1690 | 
            +
            - func: _convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
         | 
| 1639 1691 | 
             
              dispatch:
         | 
| 1640 1692 | 
             
                CompositeExplicitAutograd: _convolution
         | 
| 1641 1693 | 
             
              autogen: _convolution.out
         | 
| 1642 1694 |  | 
| 1643 | 
            -
            - func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias,  | 
| 1695 | 
            +
            - func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, int[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
         | 
| 1644 1696 |  | 
| 1645 | 
            -
            - func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias,  | 
| 1697 | 
            +
            - func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, str padding, SymInt[] dilation, SymInt groups) -> Tensor
         | 
| 1698 | 
            +
              dispatch:
         | 
| 1699 | 
            +
                CompositeImplicitAutograd: _convolution_mode_symint
         | 
| 1646 1700 |  | 
| 1647 | 
            -
            - func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self,  | 
| 1701 | 
            +
            - func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
         | 
| 1648 1702 |  | 
| 1649 | 
            -
            - func: conv1d(Tensor input, Tensor weight, Tensor? bias=None,  | 
| 1703 | 
            +
            - func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] dilation=1, SymInt groups=1) -> Tensor
         | 
| 1650 1704 | 
             
              dispatch:
         | 
| 1651 1705 | 
             
                CompositeImplicitAutograd: conv1d_symint
         | 
| 1652 1706 |  | 
| 1653 | 
            -
            - func: conv2d(Tensor input, Tensor weight, Tensor? bias=None,  | 
| 1707 | 
            +
            - func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, SymInt groups=1) -> Tensor
         | 
| 1654 1708 | 
             
              dispatch:
         | 
| 1655 1709 | 
             
                CompositeImplicitAutograd: conv2d_symint
         | 
| 1656 1710 |  | 
| 1657 | 
            -
            - func: conv3d(Tensor input, Tensor weight, Tensor? bias=None,  | 
| 1711 | 
            +
            - func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
         | 
| 1658 1712 | 
             
              dispatch:
         | 
| 1659 1713 | 
             
                CompositeImplicitAutograd: conv3d_symint
         | 
| 1660 1714 |  | 
| 1661 | 
            -
            - func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None,  | 
| 1715 | 
            +
            - func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, str padding="valid", SymInt[1] dilation=1, SymInt groups=1) -> Tensor
         | 
| 1662 1716 | 
             
              cpp_no_default_args: ['bias', 'stride', 'padding']
         | 
| 1717 | 
            +
              dispatch:
         | 
| 1718 | 
            +
                CompositeImplicitAutograd: conv1d_padding_symint
         | 
| 1663 1719 |  | 
| 1664 | 
            -
            - func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None,  | 
| 1720 | 
            +
            - func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, str padding="valid", SymInt[2] dilation=1, SymInt groups=1) -> Tensor
         | 
| 1665 1721 | 
             
              cpp_no_default_args: ['bias', 'stride', 'padding']
         | 
| 1722 | 
            +
              dispatch:
         | 
| 1723 | 
            +
                CompositeImplicitAutograd: conv2d_padding_symint
         | 
| 1666 1724 |  | 
| 1667 | 
            -
            - func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None,  | 
| 1725 | 
            +
            - func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, str padding="valid", SymInt[3] dilation=1, SymInt groups=1) -> Tensor
         | 
| 1668 1726 | 
             
              cpp_no_default_args: ['bias', 'stride', 'padding']
         | 
| 1727 | 
            +
              dispatch:
         | 
| 1728 | 
            +
                CompositeImplicitAutograd: conv3d_padding_symint
         | 
| 1669 1729 |  | 
| 1670 1730 | 
             
            - func: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
         | 
| 1671 1731 | 
             
              dispatch:
         | 
| @@ -1675,15 +1735,15 @@ | |
| 1675 1735 | 
             
            - func: conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)
         | 
| 1676 1736 |  | 
| 1677 1737 | 
             
            # NB: we inherit the goofy argument order from PyTorch torch.nn.functional
         | 
| 1678 | 
            -
            - func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None,  | 
| 1738 | 
            +
            - func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, SymInt groups=1, SymInt[1] dilation=1) -> Tensor
         | 
| 1679 1739 | 
             
              dispatch:
         | 
| 1680 1740 | 
             
                CompositeImplicitAutograd: conv_transpose1d_symint
         | 
| 1681 1741 |  | 
| 1682 | 
            -
            - func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None,  | 
| 1742 | 
            +
            - func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt groups=1, SymInt[2] dilation=1) -> Tensor
         | 
| 1683 1743 | 
             
              dispatch:
         | 
| 1684 1744 | 
             
                CompositeImplicitAutograd: conv_transpose2d_symint
         | 
| 1685 1745 |  | 
| 1686 | 
            -
            - func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None,  | 
| 1746 | 
            +
            - func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt groups=1, SymInt[3] dilation=1) -> Tensor
         | 
| 1687 1747 | 
             
              dispatch:
         | 
| 1688 1748 | 
             
                CompositeImplicitAutograd: conv_transpose3d_symint
         | 
| 1689 1749 |  | 
| @@ -1691,6 +1751,7 @@ | |
| 1691 1751 | 
             
              variants: function
         | 
| 1692 1752 | 
             
              dispatch:
         | 
| 1693 1753 | 
             
                CompositeExplicitAutogradNonFunctional: copy
         | 
| 1754 | 
            +
              tags: core
         | 
| 1694 1755 |  | 
| 1695 1756 | 
             
            - func: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
         | 
| 1696 1757 | 
             
              variants: method
         | 
| @@ -1720,6 +1781,8 @@ | |
| 1720 1781 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 1721 1782 | 
             
              variants: function, method
         | 
| 1722 1783 | 
             
              structured_delegate: cos.out
         | 
| 1784 | 
            +
              dispatch:
         | 
| 1785 | 
            +
                NestedTensorCPU, NestedTensorCUDA: cos_nested
         | 
| 1723 1786 | 
             
              tags: [core, pointwise]
         | 
| 1724 1787 |  | 
| 1725 1788 | 
             
            - func: cos_(Tensor(a!) self) -> Tensor(a!)
         | 
| @@ -1802,32 +1865,35 @@ | |
| 1802 1865 | 
             
                CUDA: cudnn_batch_norm_backward
         | 
| 1803 1866 | 
             
              autogen: cudnn_batch_norm_backward.out
         | 
| 1804 1867 |  | 
| 1805 | 
            -
            - func: cudnn_convolution(Tensor self, Tensor weight,  | 
| 1868 | 
            +
            - func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
         | 
| 1806 1869 | 
             
              dispatch:
         | 
| 1807 1870 | 
             
                CUDA: cudnn_convolution
         | 
| 1808 | 
            -
              autogen: cudnn_convolution.out
         | 
| 1809 1871 |  | 
| 1810 | 
            -
            - func:  | 
| 1872 | 
            +
            - func: cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 1873 | 
            +
              dispatch:
         | 
| 1874 | 
            +
                CUDA: cudnn_convolution_out
         | 
| 1875 | 
            +
             | 
| 1876 | 
            +
            - func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
         | 
| 1811 1877 | 
             
              dispatch:
         | 
| 1812 1878 | 
             
                CUDA: cudnn_convolution_transpose
         | 
| 1813 1879 | 
             
              autogen: cudnn_convolution_transpose.out
         | 
| 1814 1880 |  | 
| 1815 | 
            -
            - func: _mps_convolution_transpose(Tensor self, Tensor weight,  | 
| 1881 | 
            +
            - func: _mps_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
         | 
| 1816 1882 | 
             
              dispatch:
         | 
| 1817 1883 | 
             
                MPS: _mps_convolution_transpose
         | 
| 1818 1884 | 
             
              autogen: _mps_convolution_transpose.out
         | 
| 1819 1885 |  | 
| 1820 | 
            -
            - func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight,  | 
| 1886 | 
            +
            - func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[2] output_mask) -> (Tensor, Tensor)
         | 
| 1821 1887 | 
             
              dispatch:
         | 
| 1822 1888 | 
             
                MPS: mps_convolution_transpose_backward
         | 
| 1823 1889 | 
             
              autogen: mps_convolution_transpose_backward.out
         | 
| 1824 1890 |  | 
| 1825 | 
            -
            - func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias,  | 
| 1891 | 
            +
            - func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
         | 
| 1826 1892 | 
             
              dispatch:
         | 
| 1827 1893 | 
             
                CUDA: cudnn_convolution_relu
         | 
| 1828 1894 | 
             
              autogen: cudnn_convolution_relu.out
         | 
| 1829 1895 |  | 
| 1830 | 
            -
            - func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias,  | 
| 1896 | 
            +
            - func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
         | 
| 1831 1897 | 
             
              dispatch:
         | 
| 1832 1898 | 
             
                CUDA: cudnn_convolution_add_relu
         | 
| 1833 1899 | 
             
              autogen: cudnn_convolution_add_relu.out
         | 
| @@ -1967,6 +2033,7 @@ | |
| 1967 2033 | 
             
              dispatch:
         | 
| 1968 2034 | 
             
                CPU: ctc_loss_cpu
         | 
| 1969 2035 | 
             
                CUDA: ctc_loss_gpu
         | 
| 2036 | 
            +
                Meta: ctc_loss_meta
         | 
| 1970 2037 | 
             
              autogen: _ctc_loss.out
         | 
| 1971 2038 | 
             
              tags: dynamic_output_shape  # the shape of second output is data dependent
         | 
| 1972 2039 |  | 
| @@ -1999,6 +2066,7 @@ | |
| 1999 2066 | 
             
              variants: function, method
         | 
| 2000 2067 | 
             
              dispatch:
         | 
| 2001 2068 | 
             
                CompositeExplicitAutograd: diagonal
         | 
| 2069 | 
            +
              tags: core
         | 
| 2002 2070 |  | 
| 2003 2071 | 
             
            - func: linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a)
         | 
| 2004 2072 | 
             
              python_module: linalg
         | 
| @@ -2079,7 +2147,7 @@ | |
| 2079 2147 | 
             
              structured_delegate: div.out_mode
         | 
| 2080 2148 | 
             
              dispatch:
         | 
| 2081 2149 | 
             
                SparseCPU, SparseCUDA: div_sparse
         | 
| 2082 | 
            -
              tags: pointwise
         | 
| 2150 | 
            +
              tags: [core, pointwise]
         | 
| 2083 2151 |  | 
| 2084 2152 | 
             
            - func: div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!)
         | 
| 2085 2153 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| @@ -2120,7 +2188,7 @@ | |
| 2120 2188 | 
             
              variants: function, method
         | 
| 2121 2189 | 
             
              dispatch:
         | 
| 2122 2190 | 
             
                CompositeExplicitAutograd: div
         | 
| 2123 | 
            -
              tags: pointwise
         | 
| 2191 | 
            +
              tags: [core, pointwise]
         | 
| 2124 2192 |  | 
| 2125 2193 | 
             
            - func: div_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!)
         | 
| 2126 2194 | 
             
              variants: method
         | 
| @@ -2302,7 +2370,7 @@ | |
| 2302 2370 | 
             
                Meta: empty_meta_symint
         | 
| 2303 2371 | 
             
                MkldnnCPU: empty_mkldnn
         | 
| 2304 2372 | 
             
                SparseCPU, SparseCUDA, SparseMeta: empty_sparse
         | 
| 2305 | 
            -
                SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
         | 
| 2373 | 
            +
                SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
         | 
| 2306 2374 | 
             
                QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
         | 
| 2307 2375 | 
             
              tags: core
         | 
| 2308 2376 |  | 
| @@ -2370,7 +2438,7 @@ | |
| 2370 2438 | 
             
              variants: method
         | 
| 2371 2439 | 
             
              device_check: NoCheck
         | 
| 2372 2440 | 
             
              device_guard: False
         | 
| 2373 | 
            -
              tags: inplace_view
         | 
| 2441 | 
            +
              tags: [core, inplace_view]
         | 
| 2374 2442 | 
             
              dispatch:
         | 
| 2375 2443 | 
             
                Meta: resize__symint
         | 
| 2376 2444 | 
             
                CPU: resize_
         | 
| @@ -2408,7 +2476,7 @@ | |
| 2408 2476 | 
             
                CompositeExplicitAutograd: empty_like
         | 
| 2409 2477 | 
             
                QuantizedCPU, QuantizedCUDA: empty_like_quantized
         | 
| 2410 2478 | 
             
                SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
         | 
| 2411 | 
            -
                SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
         | 
| 2479 | 
            +
                SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
         | 
| 2412 2480 | 
             
                NestedTensorCPU, NestedTensorCUDA: empty_like_nested
         | 
| 2413 2481 | 
             
              autogen: empty_like.out
         | 
| 2414 2482 |  | 
| @@ -2517,7 +2585,7 @@ | |
| 2517 2585 | 
             
              dispatch:
         | 
| 2518 2586 | 
             
                SparseCPU, SparseCUDA: expm1_sparse
         | 
| 2519 2587 | 
             
                SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
         | 
| 2520 | 
            -
              tags: pointwise
         | 
| 2588 | 
            +
              tags: [core, pointwise]
         | 
| 2521 2589 |  | 
| 2522 2590 | 
             
            - func: expm1_(Tensor(a!) self) -> Tensor(a!)
         | 
| 2523 2591 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| @@ -2684,10 +2752,15 @@ | |
| 2684 2752 | 
             
            - func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
         | 
| 2685 2753 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 2686 2754 | 
             
              variants: function, method
         | 
| 2755 | 
            +
              dispatch:
         | 
| 2756 | 
            +
                CompositeExplicitAutograd: floor_divide
         | 
| 2687 2757 |  | 
| 2688 2758 | 
             
            - func: floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
         | 
| 2689 2759 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 2690 2760 | 
             
              variants: method
         | 
| 2761 | 
            +
              dispatch:
         | 
| 2762 | 
            +
                CompositeExplicitAutograd: floor_divide_
         | 
| 2763 | 
            +
              autogen: floor_divide.Scalar_out
         | 
| 2691 2764 |  | 
| 2692 2765 | 
             
            - func: frac(Tensor self) -> Tensor
         | 
| 2693 2766 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| @@ -2905,12 +2978,14 @@ | |
| 2905 2978 | 
             
              dispatch:
         | 
| 2906 2979 | 
             
                CPU: _fft_r2c_mkl
         | 
| 2907 2980 | 
             
                CUDA: _fft_r2c_cufft
         | 
| 2981 | 
            +
                MPS: _fft_r2c_mps
         | 
| 2908 2982 |  | 
| 2909 2983 | 
             
            - func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 2910 2984 | 
             
              variants: function
         | 
| 2911 2985 | 
             
              dispatch:
         | 
| 2912 2986 | 
             
                CPU: _fft_r2c_mkl_out
         | 
| 2913 2987 | 
             
                CUDA: _fft_r2c_cufft_out
         | 
| 2988 | 
            +
                MPS: _fft_r2c_mps_out
         | 
| 2914 2989 |  | 
| 2915 2990 | 
             
            # Complex to real inverse FFT
         | 
| 2916 2991 | 
             
            - func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
         | 
| @@ -2918,12 +2993,14 @@ | |
| 2918 2993 | 
             
              dispatch:
         | 
| 2919 2994 | 
             
                CPU: _fft_c2r_mkl
         | 
| 2920 2995 | 
             
                CUDA: _fft_c2r_cufft
         | 
| 2996 | 
            +
                MPS: _fft_c2r_mps
         | 
| 2921 2997 |  | 
| 2922 2998 | 
             
            - func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 2923 2999 | 
             
              variants: function
         | 
| 2924 3000 | 
             
              dispatch:
         | 
| 2925 3001 | 
             
                CPU: _fft_c2r_mkl_out
         | 
| 2926 3002 | 
             
                CUDA: _fft_c2r_cufft_out
         | 
| 3003 | 
            +
                MPS: _fft_c2r_mps_out
         | 
| 2927 3004 |  | 
| 2928 3005 | 
             
            # Standard complex to complex FFT (forward or backward)
         | 
| 2929 3006 | 
             
            - func: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
         | 
| @@ -2931,12 +3008,14 @@ | |
| 2931 3008 | 
             
              dispatch:
         | 
| 2932 3009 | 
             
                CPU: _fft_c2c_mkl
         | 
| 2933 3010 | 
             
                CUDA: _fft_c2c_cufft
         | 
| 3011 | 
            +
                MPS: _fft_c2c_mps
         | 
| 2934 3012 |  | 
| 2935 3013 | 
             
            - func: _fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 2936 3014 | 
             
              variants: function
         | 
| 2937 3015 | 
             
              dispatch:
         | 
| 2938 3016 | 
             
                CPU: _fft_c2c_mkl_out
         | 
| 2939 3017 | 
             
                CUDA: _fft_c2c_cufft_out
         | 
| 3018 | 
            +
                MPS: _fft_c2c_mps_out
         | 
| 2940 3019 |  | 
| 2941 3020 | 
             
            - func: _validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()
         | 
| 2942 3021 | 
             
              device_check: NoCheck
         | 
| @@ -2979,7 +3058,7 @@ | |
| 2979 3058 | 
             
            - func: _unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
         | 
| 2980 3059 | 
             
              variants: function
         | 
| 2981 3060 | 
             
              dispatch:
         | 
| 2982 | 
            -
                 | 
| 3061 | 
            +
                CompositeExplicitAutograd: _unsafe_index
         | 
| 2983 3062 |  | 
| 2984 3063 | 
             
            - func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 2985 3064 | 
             
              structured: True
         | 
| @@ -3253,14 +3332,22 @@ | |
| 3253 3332 | 
             
              dispatch:
         | 
| 3254 3333 | 
             
                CUDA: _cslt_compress
         | 
| 3255 3334 |  | 
| 3256 | 
            -
            - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, bool transpose_result=False) -> Tensor
         | 
| 3335 | 
            +
            - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
         | 
| 3257 3336 | 
             
              dispatch:
         | 
| 3258 3337 | 
             
                CUDA: _cslt_sparse_mm
         | 
| 3259 3338 |  | 
| 3260 | 
            -
            - func:  | 
| 3339 | 
            +
            - func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
         | 
| 3340 | 
            +
              dispatch:
         | 
| 3341 | 
            +
                CUDA: _cslt_sparse_mm_search
         | 
| 3342 | 
            +
             | 
| 3343 | 
            +
            - func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
         | 
| 3261 3344 | 
             
              dispatch:
         | 
| 3262 3345 | 
             
                CUDA: _sparse_semi_structured_linear
         | 
| 3263 3346 |  | 
| 3347 | 
            +
            - func: _mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor
         | 
| 3348 | 
            +
              dispatch:
         | 
| 3349 | 
            +
                CUDA: _mixed_dtypes_linear
         | 
| 3350 | 
            +
             | 
| 3264 3351 | 
             
            - func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
         | 
| 3265 3352 |  | 
| 3266 3353 | 
             
            - func: fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
         | 
| @@ -3291,12 +3378,42 @@ | |
| 3291 3378 | 
             
              dispatch:
         | 
| 3292 3379 | 
             
                CompositeExplicitAutograd: linspace
         | 
| 3293 3380 |  | 
| 3381 | 
            +
            - func: linspace.Tensor_Tensor(Tensor start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
         | 
| 3382 | 
            +
              category_override: factory
         | 
| 3383 | 
            +
              dispatch:
         | 
| 3384 | 
            +
                CompositeExplicitAutograd: linspace
         | 
| 3385 | 
            +
             | 
| 3386 | 
            +
            - func: linspace.Tensor_Scalar(Tensor start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
         | 
| 3387 | 
            +
              category_override: factory
         | 
| 3388 | 
            +
              dispatch:
         | 
| 3389 | 
            +
                CompositeExplicitAutograd: linspace
         | 
| 3390 | 
            +
             | 
| 3391 | 
            +
            - func: linspace.Scalar_Tensor(Scalar start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
         | 
| 3392 | 
            +
              category_override: factory
         | 
| 3393 | 
            +
              dispatch:
         | 
| 3394 | 
            +
                CompositeExplicitAutograd: linspace
         | 
| 3395 | 
            +
             | 
| 3294 3396 | 
             
            - func: linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 3295 3397 | 
             
              dispatch:
         | 
| 3296 3398 | 
             
                CPU, Meta: linspace_out
         | 
| 3297 3399 | 
             
                CUDA: linspace_cuda_out
         | 
| 3298 3400 | 
             
                MPS: linspace_out_mps
         | 
| 3299 3401 |  | 
| 3402 | 
            +
            - func: linspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 3403 | 
            +
              category_override: factory
         | 
| 3404 | 
            +
              dispatch:
         | 
| 3405 | 
            +
                CompositeExplicitAutograd: linspace_out
         | 
| 3406 | 
            +
             | 
| 3407 | 
            +
            - func: linspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 3408 | 
            +
              category_override: factory
         | 
| 3409 | 
            +
              dispatch:
         | 
| 3410 | 
            +
                CompositeExplicitAutograd: linspace_out
         | 
| 3411 | 
            +
             | 
| 3412 | 
            +
            - func: linspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 3413 | 
            +
              category_override: factory
         | 
| 3414 | 
            +
              dispatch:
         | 
| 3415 | 
            +
                CompositeExplicitAutograd: linspace_out
         | 
| 3416 | 
            +
             | 
| 3300 3417 | 
             
            - func: log(Tensor self) -> Tensor
         | 
| 3301 3418 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 3302 3419 | 
             
              structured_delegate: log.out
         | 
| @@ -3322,7 +3439,7 @@ | |
| 3322 3439 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 3323 3440 | 
             
              structured_delegate: log10.out
         | 
| 3324 3441 | 
             
              variants: function, method
         | 
| 3325 | 
            -
              tags: pointwise
         | 
| 3442 | 
            +
              tags: [core, pointwise]
         | 
| 3326 3443 |  | 
| 3327 3444 | 
             
            - func: log10_(Tensor(a!) self) -> Tensor(a!)
         | 
| 3328 3445 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| @@ -3346,7 +3463,7 @@ | |
| 3346 3463 | 
             
              dispatch:
         | 
| 3347 3464 | 
             
                SparseCPU, SparseCUDA: log1p_sparse
         | 
| 3348 3465 | 
             
                SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
         | 
| 3349 | 
            -
              tags: pointwise
         | 
| 3466 | 
            +
              tags: [core, pointwise]
         | 
| 3350 3467 |  | 
| 3351 3468 | 
             
            - func: log1p_(Tensor(a!) self) -> Tensor(a!)
         | 
| 3352 3469 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| @@ -3372,7 +3489,7 @@ | |
| 3372 3489 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 3373 3490 | 
             
              structured_delegate: log2.out
         | 
| 3374 3491 | 
             
              variants: function, method
         | 
| 3375 | 
            -
              tags: pointwise
         | 
| 3492 | 
            +
              tags: [core, pointwise]
         | 
| 3376 3493 |  | 
| 3377 3494 | 
             
            - func: log2_(Tensor(a!) self) -> Tensor(a!)
         | 
| 3378 3495 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| @@ -3477,11 +3594,41 @@ | |
| 3477 3594 | 
             
              dispatch:
         | 
| 3478 3595 | 
             
                CompositeExplicitAutograd: logspace
         | 
| 3479 3596 |  | 
| 3597 | 
            +
            - func: logspace.Tensor_Tensor(Tensor start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
         | 
| 3598 | 
            +
              category_override: factory
         | 
| 3599 | 
            +
              dispatch:
         | 
| 3600 | 
            +
                CompositeExplicitAutograd: logspace
         | 
| 3601 | 
            +
             | 
| 3602 | 
            +
            - func: logspace.Tensor_Scalar(Tensor start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
         | 
| 3603 | 
            +
              category_override: factory
         | 
| 3604 | 
            +
              dispatch:
         | 
| 3605 | 
            +
                CompositeExplicitAutograd: logspace
         | 
| 3606 | 
            +
             | 
| 3607 | 
            +
            - func: logspace.Scalar_Tensor(Scalar start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
         | 
| 3608 | 
            +
              category_override: factory
         | 
| 3609 | 
            +
              dispatch:
         | 
| 3610 | 
            +
                CompositeExplicitAutograd: logspace
         | 
| 3611 | 
            +
             | 
| 3480 3612 | 
             
            - func: logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 3481 3613 | 
             
              dispatch:
         | 
| 3482 3614 | 
             
                CPU, Meta: logspace_out
         | 
| 3483 3615 | 
             
                CUDA: logspace_cuda_out
         | 
| 3484 3616 |  | 
| 3617 | 
            +
            - func: logspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 3618 | 
            +
              category_override: factory
         | 
| 3619 | 
            +
              dispatch:
         | 
| 3620 | 
            +
                CompositeExplicitAutograd: logspace_out
         | 
| 3621 | 
            +
             | 
| 3622 | 
            +
            - func: logspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 3623 | 
            +
              category_override: factory
         | 
| 3624 | 
            +
              dispatch:
         | 
| 3625 | 
            +
                CompositeExplicitAutograd: logspace_out
         | 
| 3626 | 
            +
             | 
| 3627 | 
            +
            - func: logspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 3628 | 
            +
              category_override: factory
         | 
| 3629 | 
            +
              dispatch:
         | 
| 3630 | 
            +
                CompositeExplicitAutograd: logspace_out
         | 
| 3631 | 
            +
             | 
| 3485 3632 | 
             
            # log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
         | 
| 3486 3633 | 
             
            - func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
         | 
| 3487 3634 | 
             
              variants: function, method
         | 
| @@ -3847,17 +3994,17 @@ | |
| 3847 3994 | 
             
            # TODO: Add this function to MPS dispatch key so that we avoid declaring it in
         | 
| 3848 3995 | 
             
            # native_functions.yaml
         | 
| 3849 3996 | 
             
            # https://github.com/pytorch/pytorch/issues/77394
         | 
| 3850 | 
            -
            - func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias,  | 
| 3997 | 
            +
            - func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
         | 
| 3851 3998 | 
             
              dispatch:
         | 
| 3852 3999 | 
             
                MPS: _mps_convolution
         | 
| 3853 4000 | 
             
              autogen: _mps_convolution.out
         | 
| 3854 4001 |  | 
| 3855 | 
            -
            - func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight,  | 
| 4002 | 
            +
            - func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
         | 
| 3856 4003 | 
             
              dispatch:
         | 
| 3857 4004 | 
             
                MPS: mps_convolution_backward
         | 
| 3858 4005 | 
             
              autogen: mps_convolution_backward.out
         | 
| 3859 4006 |  | 
| 3860 | 
            -
            - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding,  | 
| 4007 | 
            +
            - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
         | 
| 3861 4008 | 
             
              dispatch:
         | 
| 3862 4009 | 
             
                CompositeExplicitAutograd: mkldnn_convolution
         | 
| 3863 4010 | 
             
              autogen: mkldnn_convolution.out
         | 
| @@ -3883,26 +4030,26 @@ | |
| 3883 4030 | 
             
                CUDA: miopen_batch_norm_backward
         | 
| 3884 4031 | 
             
              autogen: miopen_batch_norm_backward.out
         | 
| 3885 4032 |  | 
| 3886 | 
            -
            - func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding,  | 
| 4033 | 
            +
            - func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
         | 
| 3887 4034 | 
             
              dispatch:
         | 
| 3888 4035 | 
             
                CUDA: miopen_convolution
         | 
| 3889 4036 | 
             
              autogen: miopen_convolution.out
         | 
| 3890 4037 |  | 
| 3891 | 
            -
            - func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding,  | 
| 4038 | 
            +
            - func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
         | 
| 3892 4039 | 
             
              dispatch:
         | 
| 3893 4040 | 
             
                CUDA: miopen_convolution_transpose
         | 
| 3894 4041 | 
             
              autogen: miopen_convolution_transpose.out
         | 
| 3895 4042 |  | 
| 3896 | 
            -
            - func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding,  | 
| 4043 | 
            +
            - func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
         | 
| 3897 4044 | 
             
              dispatch:
         | 
| 3898 4045 | 
             
                CUDA: miopen_depthwise_convolution
         | 
| 3899 4046 | 
             
              autogen: miopen_depthwise_convolution.out
         | 
| 3900 4047 |  | 
| 3901 | 
            -
            - func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias,  | 
| 4048 | 
            +
            - func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
         | 
| 3902 4049 | 
             
              dispatch:
         | 
| 3903 4050 | 
             
                CUDA: miopen_convolution_relu
         | 
| 3904 4051 |  | 
| 3905 | 
            -
            - func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias,  | 
| 4052 | 
            +
            - func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
         | 
| 3906 4053 | 
             
              dispatch:
         | 
| 3907 4054 | 
             
                CUDA: miopen_convolution_add_relu
         | 
| 3908 4055 |  | 
| @@ -3943,6 +4090,20 @@ | |
| 3943 4090 | 
             
              dispatch:
         | 
| 3944 4091 | 
             
                CUDA: _int_mm_out_cuda
         | 
| 3945 4092 |  | 
| 4093 | 
            +
            - func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
         | 
| 4094 | 
            +
              dispatch:
         | 
| 4095 | 
            +
                CPU: _convert_weight_to_int4pack_cpu
         | 
| 4096 | 
            +
                CUDA: _convert_weight_to_int4pack_cuda
         | 
| 4097 | 
            +
             | 
| 4098 | 
            +
            - func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
         | 
| 4099 | 
            +
              dispatch:
         | 
| 4100 | 
            +
                CPU: _weight_int4pack_mm_cpu
         | 
| 4101 | 
            +
                CUDA: _weight_int4pack_mm_cuda
         | 
| 4102 | 
            +
             | 
| 4103 | 
            +
            - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
         | 
| 4104 | 
            +
              dispatch:
         | 
| 4105 | 
            +
                CPU: _weight_int8pack_mm_cpu
         | 
| 4106 | 
            +
             | 
| 3946 4107 | 
             
            - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
         | 
| 3947 4108 | 
             
              python_module: sparse
         | 
| 3948 4109 |  | 
| @@ -4087,6 +4248,7 @@ | |
| 4087 4248 | 
             
              device_guard: False
         | 
| 4088 4249 | 
             
              dispatch:
         | 
| 4089 4250 | 
             
                CompositeImplicitAutograd: narrow_symint
         | 
| 4251 | 
            +
                NestedTensorCPU, NestedTensorCUDA: narrow_nested_symint
         | 
| 4090 4252 |  | 
| 4091 4253 | 
             
            - func: narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)
         | 
| 4092 4254 | 
             
              variants: function, method
         | 
| @@ -4199,7 +4361,7 @@ | |
| 4199 4361 |  | 
| 4200 4362 | 
             
            - func: _nnpack_available() -> bool
         | 
| 4201 4363 |  | 
| 4202 | 
            -
            - func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding,  | 
| 4364 | 
            +
            - func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, SymInt[2] stride=1) -> Tensor
         | 
| 4203 4365 | 
             
              variants: function
         | 
| 4204 4366 | 
             
              dispatch:
         | 
| 4205 4367 | 
             
                CompositeExplicitAutograd: _nnpack_spatial_convolution
         | 
| @@ -4314,23 +4476,24 @@ | |
| 4314 4476 | 
             
            - func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
         | 
| 4315 4477 | 
             
              dispatch:
         | 
| 4316 4478 | 
             
                CPU: pixel_shuffle_cpu
         | 
| 4479 | 
            +
                MPS: pixel_shuffle_mps
         | 
| 4317 4480 | 
             
                CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
         | 
| 4318 4481 | 
             
              autogen: pixel_shuffle.out
         | 
| 4319 | 
            -
              tags: core
         | 
| 4320 4482 |  | 
| 4321 4483 | 
             
            - func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
         | 
| 4322 4484 | 
             
              dispatch:
         | 
| 4323 4485 | 
             
                CPU: pixel_unshuffle_cpu
         | 
| 4486 | 
            +
                MPS: pixel_unshuffle_mps
         | 
| 4324 4487 | 
             
                CompositeExplicitAutogradNonFunctional: math_pixel_unshuffle
         | 
| 4325 4488 | 
             
              autogen: pixel_unshuffle.out
         | 
| 4326 4489 |  | 
| 4327 | 
            -
            - func: channel_shuffle(Tensor self,  | 
| 4490 | 
            +
            - func: channel_shuffle(Tensor self, SymInt groups) -> Tensor
         | 
| 4328 4491 | 
             
              dispatch:
         | 
| 4329 4492 | 
             
                CPU, CUDA: channel_shuffle
         | 
| 4330 4493 | 
             
                QuantizedCPU: channel_shuffle_quantized_cpu
         | 
| 4331 4494 | 
             
              autogen: channel_shuffle.out
         | 
| 4332 4495 |  | 
| 4333 | 
            -
            - func: native_channel_shuffle(Tensor self,  | 
| 4496 | 
            +
            - func: native_channel_shuffle(Tensor self, SymInt groups) -> Tensor
         | 
| 4334 4497 | 
             
              dispatch:
         | 
| 4335 4498 | 
             
                CPU: channel_shuffle_cpu
         | 
| 4336 4499 | 
             
                CompositeImplicitAutograd: math_channel_shuffle
         | 
| @@ -4338,7 +4501,7 @@ | |
| 4338 4501 | 
             
            - func: is_pinned(Tensor self, Device? device=None) -> bool
         | 
| 4339 4502 | 
             
              variants: method
         | 
| 4340 4503 | 
             
              dispatch:
         | 
| 4341 | 
            -
                CUDA: is_pinned_cuda
         | 
| 4504 | 
            +
                NestedTensorCUDA, CUDA: is_pinned_cuda
         | 
| 4342 4505 | 
             
                MPS: is_pinned_mps
         | 
| 4343 4506 | 
             
                CompositeExplicitAutograd: is_pinned_default
         | 
| 4344 4507 |  | 
| @@ -4352,6 +4515,7 @@ | |
| 4352 4515 | 
             
              dispatch:
         | 
| 4353 4516 | 
             
                CUDA: _pin_memory_cuda
         | 
| 4354 4517 | 
             
                MPS: _pin_memory_mps
         | 
| 4518 | 
            +
                NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
         | 
| 4355 4519 | 
             
              autogen: _pin_memory.out
         | 
| 4356 4520 |  | 
| 4357 4521 | 
             
            - func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
         | 
| @@ -4660,7 +4824,7 @@ | |
| 4660 4824 | 
             
              autogen: repeat.out
         | 
| 4661 4825 | 
             
              tags: core
         | 
| 4662 4826 |  | 
| 4663 | 
            -
            - func: repeat_interleave.Tensor(Tensor repeats, *,  | 
| 4827 | 
            +
            - func: repeat_interleave.Tensor(Tensor repeats, *, SymInt? output_size=None) -> Tensor
         | 
| 4664 4828 | 
             
              variants: function
         | 
| 4665 4829 | 
             
              dispatch:
         | 
| 4666 4830 | 
             
                CPU: repeat_interleave_cpu
         | 
| @@ -4669,10 +4833,12 @@ | |
| 4669 4833 | 
             
              tags: dynamic_output_shape
         | 
| 4670 4834 | 
             
              autogen: repeat_interleave.Tensor_out
         | 
| 4671 4835 |  | 
| 4672 | 
            -
            - func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *,  | 
| 4836 | 
            +
            - func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
         | 
| 4673 4837 | 
             
              variants: function, method
         | 
| 4838 | 
            +
              dispatch:
         | 
| 4839 | 
            +
                CompositeImplicitAutograd: repeat_interleave_symint
         | 
| 4674 4840 |  | 
| 4675 | 
            -
            - func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *,  | 
| 4841 | 
            +
            - func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
         | 
| 4676 4842 | 
             
              variants: function, method
         | 
| 4677 4843 | 
             
              dispatch:
         | 
| 4678 4844 | 
             
                CompositeImplicitAutograd: repeat_interleave_symint
         | 
| @@ -4683,7 +4849,7 @@ | |
| 4683 4849 | 
             
              device_guard: False
         | 
| 4684 4850 | 
             
              dispatch:
         | 
| 4685 4851 | 
             
                CompositeImplicitAutograd: reshape_symint
         | 
| 4686 | 
            -
                CompositeImplicitAutogradNestedTensor:  | 
| 4852 | 
            +
                CompositeImplicitAutogradNestedTensor: reshape_nested_symint
         | 
| 4687 4853 |  | 
| 4688 4854 | 
             
            - func: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
         | 
| 4689 4855 | 
             
              variants: function
         | 
| @@ -4842,6 +5008,7 @@ | |
| 4842 5008 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 4843 5009 | 
             
              python_module: nn
         | 
| 4844 5010 | 
             
              dispatch:
         | 
| 5011 | 
            +
                QuantizedCPU: gelu_quantized_cpu_
         | 
| 4845 5012 | 
             
                NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
         | 
| 4846 5013 |  | 
| 4847 5014 | 
             
            - func: gelu(Tensor self, *, str approximate='none') -> Tensor
         | 
| @@ -4973,12 +5140,14 @@ | |
| 4973 5140 | 
             
              python_module: nn
         | 
| 4974 5141 | 
             
              dispatch:
         | 
| 4975 5142 | 
             
                NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
         | 
| 5143 | 
            +
              tags: pointwise
         | 
| 4976 5144 |  | 
| 4977 5145 | 
             
            - func: silu_(Tensor(a!) self) -> Tensor(a!)
         | 
| 4978 5146 | 
             
              structured_delegate: silu.out
         | 
| 4979 5147 | 
             
              python_module: nn
         | 
| 4980 5148 | 
             
              dispatch:
         | 
| 4981 5149 | 
             
                NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
         | 
| 5150 | 
            +
              tags: pointwise
         | 
| 4982 5151 |  | 
| 4983 5152 | 
             
            - func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 4984 5153 | 
             
              structured: True
         | 
| @@ -4987,6 +5156,7 @@ | |
| 4987 5156 | 
             
              dispatch:
         | 
| 4988 5157 | 
             
                CPU, CUDA: silu_out
         | 
| 4989 5158 | 
             
                MPS: silu_out_mps
         | 
| 5159 | 
            +
              tags: pointwise
         | 
| 4990 5160 |  | 
| 4991 5161 | 
             
            - func: silu_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
         | 
| 4992 5162 | 
             
              structured: True
         | 
| @@ -4995,6 +5165,7 @@ | |
| 4995 5165 | 
             
              dispatch:
         | 
| 4996 5166 | 
             
                CPU, CUDA: silu_backward_out
         | 
| 4997 5167 | 
             
                MPS: silu_backward_out_mps
         | 
| 5168 | 
            +
              tags: pointwise
         | 
| 4998 5169 |  | 
| 4999 5170 | 
             
            - func: silu_backward(Tensor grad_output, Tensor self) -> Tensor
         | 
| 5000 5171 | 
             
              structured_delegate: silu_backward.grad_input
         | 
| @@ -5002,6 +5173,7 @@ | |
| 5002 5173 | 
             
              dispatch:
         | 
| 5003 5174 | 
             
                CompositeImplicitAutograd: math_silu_backward
         | 
| 5004 5175 | 
             
                NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
         | 
| 5176 | 
            +
              tags: pointwise
         | 
| 5005 5177 |  | 
| 5006 5178 | 
             
            - func: mish(Tensor self) -> Tensor
         | 
| 5007 5179 | 
             
              structured_delegate: mish.out
         | 
| @@ -5017,11 +5189,13 @@ | |
| 5017 5189 | 
             
              python_module: nn
         | 
| 5018 5190 | 
             
              dispatch:
         | 
| 5019 5191 | 
             
                CPU, CUDA: mish_out
         | 
| 5192 | 
            +
                MPS: mish_out_mps
         | 
| 5020 5193 |  | 
| 5021 5194 | 
             
            - func: mish_backward(Tensor grad_output, Tensor self) -> Tensor
         | 
| 5022 5195 | 
             
              python_module: nn
         | 
| 5023 5196 | 
             
              dispatch:
         | 
| 5024 5197 | 
             
                CPU, CUDA: mish_backward
         | 
| 5198 | 
            +
                MPS: mish_backward_mps
         | 
| 5025 5199 | 
             
                CompositeImplicitAutograd: math_mish_backward
         | 
| 5026 5200 |  | 
| 5027 5201 | 
             
            - func: sigmoid(Tensor self) -> Tensor
         | 
| @@ -5076,6 +5250,7 @@ | |
| 5076 5250 | 
             
              dispatch:
         | 
| 5077 5251 | 
             
                SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
         | 
| 5078 5252 | 
             
                SparseCPU, SparseCUDA: sin_sparse
         | 
| 5253 | 
            +
                NestedTensorCPU, NestedTensorCUDA: sin_nested
         | 
| 5079 5254 | 
             
              tags: [core, pointwise]
         | 
| 5080 5255 |  | 
| 5081 5256 | 
             
            - func: sin_(Tensor(a!) self) -> Tensor(a!)
         | 
| @@ -5221,6 +5396,21 @@ | |
| 5221 5396 | 
             
                CompositeExplicitAutograd: slice_backward
         | 
| 5222 5397 | 
             
              autogen: slice_backward.out
         | 
| 5223 5398 |  | 
| 5399 | 
            +
            # NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
         | 
| 5400 | 
            +
            # slice.Tensor, split_with_sizes, et. al.). Currently, these are only used during fake-ification
         | 
| 5401 | 
            +
            # of PT2 graph input subclass instances that are views. This means:
         | 
| 5402 | 
            +
            # * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
         | 
| 5403 | 
            +
            # * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
         | 
| 5404 | 
            +
            # * A subclass will have to implement this to work in PT2 if a subclass view is used as a graph
         | 
| 5405 | 
            +
            #   input AND the view utilizes this op in its inverse. The idea is that slice_inverse() is
         | 
| 5406 | 
            +
            #   easier to implement for a subclass than as_strided()
         | 
| 5407 | 
            +
            - func: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
         | 
| 5408 | 
            +
              variants: function, method
         | 
| 5409 | 
            +
              device_check: NoCheck
         | 
| 5410 | 
            +
              device_guard: False
         | 
| 5411 | 
            +
              dispatch:
         | 
| 5412 | 
            +
                CompositeExplicitAutograd: slice_inverse_symint
         | 
| 5413 | 
            +
             | 
| 5224 5414 | 
             
            - func: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
         | 
| 5225 5415 | 
             
              variants: function, method
         | 
| 5226 5416 | 
             
              device_check: NoCheck
         | 
| @@ -5228,7 +5418,7 @@ | |
| 5228 5418 | 
             
              dispatch:
         | 
| 5229 5419 | 
             
                CompositeExplicitAutogradNonFunctional: slice_scatter
         | 
| 5230 5420 | 
             
              autogen: slice_scatter.out
         | 
| 5231 | 
            -
              tags: core
         | 
| 5421 | 
            +
              tags: [core, view_copy]
         | 
| 5232 5422 |  | 
| 5233 5423 | 
             
            - func: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
         | 
| 5234 5424 | 
             
              variants: function, method
         | 
| @@ -5427,6 +5617,14 @@ | |
| 5427 5617 | 
             
                SparseCPU: _sspaddmm_out_cpu
         | 
| 5428 5618 | 
             
                SparseCUDA: _sspaddmm_out_cuda
         | 
| 5429 5619 |  | 
| 5620 | 
            +
            - func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
         | 
| 5621 | 
            +
              dispatch:
         | 
| 5622 | 
            +
                CompositeExplicitAutograd: _chunk_cat
         | 
| 5623 | 
            +
             | 
| 5624 | 
            +
            - func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 5625 | 
            +
              dispatch:
         | 
| 5626 | 
            +
                CompositeExplicitAutograd: _chunk_cat_out
         | 
| 5627 | 
            +
             | 
| 5430 5628 | 
             
            - func: stack(Tensor[] tensors, int dim=0) -> Tensor
         | 
| 5431 5629 | 
             
              dispatch:
         | 
| 5432 5630 | 
             
                CompositeExplicitAutograd: stack
         | 
| @@ -5618,6 +5816,7 @@ | |
| 5618 5816 | 
             
              variants: function
         | 
| 5619 5817 | 
             
              dispatch:
         | 
| 5620 5818 | 
             
                CPU, CUDA: std_mean
         | 
| 5819 | 
            +
                MPS: std_mean_mps
         | 
| 5621 5820 | 
             
              autogen: std_mean.correction_out
         | 
| 5622 5821 |  | 
| 5623 5822 | 
             
            - func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
         | 
| @@ -5873,7 +6072,6 @@ | |
| 5873 6072 | 
             
                CPU, MPS: roll
         | 
| 5874 6073 | 
             
                CUDA: roll_cuda
         | 
| 5875 6074 | 
             
              autogen: roll.out
         | 
| 5876 | 
            -
              tags: core
         | 
| 5877 6075 |  | 
| 5878 6076 | 
             
            # default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
         | 
| 5879 6077 |  | 
| @@ -5956,6 +6154,52 @@ | |
| 5956 6154 | 
             
                CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
         | 
| 5957 6155 | 
             
              autogen: _nested_view_from_buffer_copy.out
         | 
| 5958 6156 |  | 
| 6157 | 
            +
            - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
         | 
| 6158 | 
            +
              variants: function
         | 
| 6159 | 
            +
              device_check: NoCheck
         | 
| 6160 | 
            +
              dispatch: {}
         | 
| 6161 | 
            +
             | 
| 6162 | 
            +
            - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
         | 
| 6163 | 
            +
              variants: function
         | 
| 6164 | 
            +
              device_check: NoCheck
         | 
| 6165 | 
            +
              tags: view_copy
         | 
| 6166 | 
            +
              dispatch:
         | 
| 6167 | 
            +
                CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
         | 
| 6168 | 
            +
              autogen: _nested_view_from_jagged_copy.out
         | 
| 6169 | 
            +
             | 
| 6170 | 
            +
            - func: _nested_get_values(Tensor(a) self) -> Tensor(a)
         | 
| 6171 | 
            +
              variants: function
         | 
| 6172 | 
            +
              device_check: NoCheck
         | 
| 6173 | 
            +
              dispatch: {}
         | 
| 6174 | 
            +
             | 
| 6175 | 
            +
            - func: _nested_get_values_copy(Tensor self) -> Tensor
         | 
| 6176 | 
            +
              variants: function
         | 
| 6177 | 
            +
              device_check: NoCheck
         | 
| 6178 | 
            +
              tags: view_copy
         | 
| 6179 | 
            +
              dispatch:
         | 
| 6180 | 
            +
                CompositeExplicitAutogradNonFunctional: _nested_get_values_copy
         | 
| 6181 | 
            +
              autogen: _nested_get_values_copy.out
         | 
| 6182 | 
            +
             | 
| 6183 | 
            +
            - func: _nested_get_offsets(Tensor self) -> Tensor
         | 
| 6184 | 
            +
              variants: function
         | 
| 6185 | 
            +
              device_check: NoCheck
         | 
| 6186 | 
            +
              dispatch: {}
         | 
| 6187 | 
            +
             | 
| 6188 | 
            +
            # returns undefined Tensor if no lengths present
         | 
| 6189 | 
            +
            - func: _nested_get_lengths(Tensor self) -> Tensor
         | 
| 6190 | 
            +
              variants: function
         | 
| 6191 | 
            +
              device_check: NoCheck
         | 
| 6192 | 
            +
              dispatch: {}
         | 
| 6193 | 
            +
             | 
| 6194 | 
            +
            - func: _nested_get_ragged_idx(Tensor self) -> int
         | 
| 6195 | 
            +
              variants: function
         | 
| 6196 | 
            +
              device_check: NoCheck
         | 
| 6197 | 
            +
              dispatch: {}
         | 
| 6198 | 
            +
             | 
| 6199 | 
            +
            - func: _nested_get_jagged_dummy(Tensor any) -> Tensor
         | 
| 6200 | 
            +
              category_override: dummy
         | 
| 6201 | 
            +
              dispatch: {}
         | 
| 6202 | 
            +
             | 
| 5959 6203 | 
             
            - func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
         | 
| 5960 6204 | 
             
              dispatch:
         | 
| 5961 6205 | 
             
                # calls unsqueeze
         | 
| @@ -5971,7 +6215,7 @@ | |
| 5971 6215 | 
             
              dispatch:
         | 
| 5972 6216 | 
             
                SparseCPU, SparseCUDA: trunc_sparse
         | 
| 5973 6217 | 
             
                SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
         | 
| 5974 | 
            -
              tags: pointwise
         | 
| 6218 | 
            +
              tags: [core, pointwise]
         | 
| 5975 6219 |  | 
| 5976 6220 | 
             
            - func: trunc_(Tensor(a!) self) -> Tensor(a!)
         | 
| 5977 6221 | 
             
              structured_delegate: trunc.out
         | 
| @@ -6140,6 +6384,7 @@ | |
| 6140 6384 | 
             
              variants: function
         | 
| 6141 6385 | 
             
              dispatch:
         | 
| 6142 6386 | 
             
                CPU, CUDA: var_mean
         | 
| 6387 | 
            +
                MPS: var_mean_mps
         | 
| 6143 6388 | 
             
              autogen: var_mean.correction_out
         | 
| 6144 6389 |  | 
| 6145 6390 | 
             
            - func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
         | 
| @@ -6160,15 +6405,13 @@ | |
| 6160 6405 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 6161 6406 | 
             
              variants: function, method
         | 
| 6162 6407 | 
             
              dispatch:
         | 
| 6163 | 
            -
                CPU, CUDA: where
         | 
| 6164 | 
            -
                MPS: where_mps
         | 
| 6408 | 
            +
                CPU, CUDA, MPS: where
         | 
| 6165 6409 | 
             
              tags: [core, pointwise]
         | 
| 6166 6410 |  | 
| 6167 6411 | 
             
            - func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 6168 6412 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 6169 6413 | 
             
              dispatch:
         | 
| 6170 | 
            -
                CPU, CUDA: where_self_out
         | 
| 6171 | 
            -
                MPS: where_self_out_mps
         | 
| 6414 | 
            +
                CPU, CUDA, MPS: where_self_out
         | 
| 6172 6415 |  | 
| 6173 6416 | 
             
            - func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
         | 
| 6174 6417 | 
             
              variants: function
         | 
| @@ -6196,6 +6439,7 @@ | |
| 6196 6439 | 
             
              dispatch:
         | 
| 6197 6440 | 
             
                CPU: weight_norm_cpu
         | 
| 6198 6441 | 
             
                CUDA: weight_norm_cuda
         | 
| 6442 | 
            +
                MPS: weight_norm_mps
         | 
| 6199 6443 | 
             
              autogen: _weight_norm_interface.out
         | 
| 6200 6444 |  | 
| 6201 6445 | 
             
            - func: _weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
         | 
| @@ -6203,6 +6447,7 @@ | |
| 6203 6447 | 
             
              dispatch:
         | 
| 6204 6448 | 
             
                CPU: weight_norm_backward_cpu
         | 
| 6205 6449 | 
             
                CUDA: weight_norm_backward_cuda
         | 
| 6450 | 
            +
                MPS: weight_norm_backward_mps
         | 
| 6206 6451 | 
             
              autogen: _weight_norm_interface_backward.out
         | 
| 6207 6452 |  | 
| 6208 6453 | 
             
            - func: _weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
         | 
| @@ -6219,6 +6464,7 @@ | |
| 6219 6464 | 
             
              dispatch:
         | 
| 6220 6465 | 
             
                CPU: _efficientzerotensor
         | 
| 6221 6466 | 
             
                CUDA: _efficientzerotensor_cuda
         | 
| 6467 | 
            +
                MPS: _efficientzerotensor_mps
         | 
| 6222 6468 | 
             
                Meta: _efficientzerotensor_meta
         | 
| 6223 6469 | 
             
              autogen: _efficientzerotensor.out
         | 
| 6224 6470 |  | 
| @@ -6506,7 +6752,7 @@ | |
| 6506 6752 | 
             
                MPS: zero_mps_
         | 
| 6507 6753 | 
             
                Meta: zero_meta_
         | 
| 6508 6754 | 
             
                SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
         | 
| 6509 | 
            -
                SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
         | 
| 6755 | 
            +
                SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
         | 
| 6510 6756 | 
             
                MkldnnCPU: mkldnn_zero_
         | 
| 6511 6757 | 
             
                NestedTensorCPU, NestedTensorCUDA: zero_nested_
         | 
| 6512 6758 | 
             
              autogen: zero, zero.out
         | 
| @@ -6675,12 +6921,12 @@ | |
| 6675 6921 | 
             
              structured_delegate: _addmm_activation.out
         | 
| 6676 6922 | 
             
              variants: function, method
         | 
| 6677 6923 |  | 
| 6678 | 
            -
            - func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None) -> (Tensor, Tensor)
         | 
| 6924 | 
            +
            - func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)
         | 
| 6679 6925 | 
             
              variants: function
         | 
| 6680 6926 | 
             
              dispatch:
         | 
| 6681 6927 | 
             
                CUDA: _scaled_mm_cuda
         | 
| 6682 6928 |  | 
| 6683 | 
            -
            - func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
         | 
| 6929 | 
            +
            - func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
         | 
| 6684 6930 | 
             
              variants: function
         | 
| 6685 6931 | 
             
              dispatch:
         | 
| 6686 6932 | 
             
                CUDA: _scaled_mm_out_cuda
         | 
| @@ -6796,7 +7042,7 @@ | |
| 6796 7042 | 
             
            # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
         | 
| 6797 7043 | 
             
            # the default would never make sense.
         | 
| 6798 7044 |  | 
| 6799 | 
            -
            - func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values,  | 
| 7045 | 
            +
            - func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
         | 
| 6800 7046 | 
             
              dispatch:
         | 
| 6801 7047 | 
             
                CompositeExplicitAutograd: sparse_compressed_tensor
         | 
| 6802 7048 |  | 
| @@ -6813,7 +7059,10 @@ | |
| 6813 7059 | 
             
            - func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
         | 
| 6814 7060 | 
             
            - func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
         | 
| 6815 7061 |  | 
| 6816 | 
            -
            - func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values,  | 
| 7062 | 
            +
            - func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
         | 
| 7063 | 
            +
              dispatch:
         | 
| 7064 | 
            +
                CompositeImplicitAutograd: _sparse_compressed_tensor_unsafe_symint
         | 
| 7065 | 
            +
             | 
| 6817 7066 | 
             
            - func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
         | 
| 6818 7067 | 
             
            - func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
         | 
| 6819 7068 | 
             
            - func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
         | 
| @@ -6899,7 +7148,7 @@ | |
| 6899 7148 | 
             
              dispatch:
         | 
| 6900 7149 | 
             
                CPU, CUDA: sparse_dim_strided
         | 
| 6901 7150 | 
             
                SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
         | 
| 6902 | 
            -
                SparseCsrCPU, SparseCsrCUDA: sparse_dim_sparse_csr
         | 
| 7151 | 
            +
                SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
         | 
| 6903 7152 | 
             
              device_check: NoCheck
         | 
| 6904 7153 | 
             
              device_guard: False
         | 
| 6905 7154 |  | 
| @@ -6916,7 +7165,7 @@ | |
| 6916 7165 | 
             
              dispatch:
         | 
| 6917 7166 | 
             
                CPU, CUDA: dense_dim_strided
         | 
| 6918 7167 | 
             
                SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
         | 
| 6919 | 
            -
                SparseCsrCPU, SparseCsrCUDA: dense_dim_sparse_csr
         | 
| 7168 | 
            +
                SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
         | 
| 6920 7169 | 
             
              device_check: NoCheck
         | 
| 6921 7170 | 
             
              device_guard: False
         | 
| 6922 7171 |  | 
| @@ -6932,7 +7181,7 @@ | |
| 6932 7181 | 
             
              variants: method
         | 
| 6933 7182 | 
             
              dispatch:
         | 
| 6934 7183 | 
             
                SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
         | 
| 6935 | 
            -
                SparseCsrCPU, SparseCsrCUDA: _nnz_sparse_csr
         | 
| 7184 | 
            +
                SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
         | 
| 6936 7185 | 
             
              device_check: NoCheck
         | 
| 6937 7186 | 
             
              device_guard: False
         | 
| 6938 7187 |  | 
| @@ -6995,7 +7244,7 @@ | |
| 6995 7244 | 
             
              variants: method
         | 
| 6996 7245 | 
             
              dispatch:
         | 
| 6997 7246 | 
             
                SparseCPU, SparseCUDA, SparseMeta: values_sparse
         | 
| 6998 | 
            -
                SparseCsrCPU, SparseCsrCUDA: values_sparse_csr
         | 
| 7247 | 
            +
                SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
         | 
| 6999 7248 | 
             
                NestedTensorCPU, NestedTensorCUDA: values_nested
         | 
| 7000 7249 | 
             
                CompositeExplicitAutograd: values_default
         | 
| 7001 7250 | 
             
              device_check: NoCheck
         | 
| @@ -7004,7 +7253,7 @@ | |
| 7004 7253 | 
             
            - func: crow_indices(Tensor(a) self) -> Tensor(a)
         | 
| 7005 7254 | 
             
              variants: method
         | 
| 7006 7255 | 
             
              dispatch:
         | 
| 7007 | 
            -
                SparseCsrCPU, SparseCsrCUDA: crow_indices_sparse_csr
         | 
| 7256 | 
            +
                SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: crow_indices_sparse_csr
         | 
| 7008 7257 | 
             
                CompositeExplicitAutograd: crow_indices_default
         | 
| 7009 7258 | 
             
              device_check: NoCheck
         | 
| 7010 7259 | 
             
              device_guard: False
         | 
| @@ -7012,7 +7261,7 @@ | |
| 7012 7261 | 
             
            - func: col_indices(Tensor(a) self) -> Tensor(a)
         | 
| 7013 7262 | 
             
              variants: method
         | 
| 7014 7263 | 
             
              dispatch:
         | 
| 7015 | 
            -
                SparseCsrCPU, SparseCsrCUDA: col_indices_sparse_csr
         | 
| 7264 | 
            +
                SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: col_indices_sparse_csr
         | 
| 7016 7265 | 
             
                CompositeExplicitAutograd: col_indices_default
         | 
| 7017 7266 | 
             
              device_check: NoCheck
         | 
| 7018 7267 | 
             
              device_guard: False
         | 
| @@ -7020,7 +7269,7 @@ | |
| 7020 7269 | 
             
            - func: ccol_indices(Tensor(a) self) -> Tensor(a)
         | 
| 7021 7270 | 
             
              variants: method
         | 
| 7022 7271 | 
             
              dispatch:
         | 
| 7023 | 
            -
                SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
         | 
| 7272 | 
            +
                SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ccol_indices_sparse_csr
         | 
| 7024 7273 | 
             
                CompositeExplicitAutograd: ccol_indices_default
         | 
| 7025 7274 | 
             
              device_check: NoCheck
         | 
| 7026 7275 | 
             
              device_guard: False
         | 
| @@ -7028,7 +7277,7 @@ | |
| 7028 7277 | 
             
            - func: row_indices(Tensor(a) self) -> Tensor(a)
         | 
| 7029 7278 | 
             
              variants: method
         | 
| 7030 7279 | 
             
              dispatch:
         | 
| 7031 | 
            -
                SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
         | 
| 7280 | 
            +
                SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: row_indices_sparse_csr
         | 
| 7032 7281 | 
             
                CompositeExplicitAutograd: row_indices_default
         | 
| 7033 7282 | 
             
              device_check: NoCheck
         | 
| 7034 7283 | 
             
              device_guard: False
         | 
| @@ -7055,7 +7304,7 @@ | |
| 7055 7304 | 
             
              variants: function, method
         | 
| 7056 7305 | 
             
              dispatch:
         | 
| 7057 7306 | 
             
                CompositeExplicitAutograd: unbind
         | 
| 7058 | 
            -
                 | 
| 7307 | 
            +
                NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
         | 
| 7059 7308 |  | 
| 7060 7309 | 
             
            - func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
         | 
| 7061 7310 | 
             
              variants: function, method
         | 
| @@ -7143,14 +7392,14 @@ | |
| 7143 7392 | 
             
                CPU: dense_to_mkldnn
         | 
| 7144 7393 | 
             
              autogen: to_mkldnn.out
         | 
| 7145 7394 |  | 
| 7146 | 
            -
            - func: mkldnn_reorder_conv2d_weight(Tensor self,  | 
| 7395 | 
            +
            - func: mkldnn_reorder_conv2d_weight(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor
         | 
| 7147 7396 | 
             
              variants: function
         | 
| 7148 7397 | 
             
              python_module: nn
         | 
| 7149 7398 | 
             
              dispatch:
         | 
| 7150 7399 | 
             
                MkldnnCPU: mkldnn_reorder_conv2d_weight
         | 
| 7151 7400 | 
             
              autogen: mkldnn_reorder_conv2d_weight.out
         | 
| 7152 7401 |  | 
| 7153 | 
            -
            - func: mkldnn_reorder_conv3d_weight(Tensor self,  | 
| 7402 | 
            +
            - func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
         | 
| 7154 7403 | 
             
              variants: function
         | 
| 7155 7404 | 
             
              python_module: nn
         | 
| 7156 7405 | 
             
              dispatch:
         | 
| @@ -7537,6 +7786,7 @@ | |
| 7537 7786 | 
             
              dispatch:
         | 
| 7538 7787 | 
             
                CPU, CUDA, Meta, MPS: set_
         | 
| 7539 7788 | 
             
              autogen: set.source_Storage, set.source_Storage_out
         | 
| 7789 | 
            +
              tags: inplace_view
         | 
| 7540 7790 |  | 
| 7541 7791 | 
             
            - func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
         | 
| 7542 7792 | 
             
              variants: method
         | 
| @@ -7549,6 +7799,7 @@ | |
| 7549 7799 | 
             
                MPS: set_storage_mps_
         | 
| 7550 7800 | 
             
                QuantizedCPU, QuantizedCUDA: set_storage_quantized_
         | 
| 7551 7801 | 
             
              autogen: set.source_Storage_storage_offset, set.source_Storage_storage_offset_out
         | 
| 7802 | 
            +
              tags: inplace_view
         | 
| 7552 7803 |  | 
| 7553 7804 | 
             
            - func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
         | 
| 7554 7805 | 
             
              variants: method
         | 
| @@ -7556,6 +7807,7 @@ | |
| 7556 7807 | 
             
              device_guard: False
         | 
| 7557 7808 | 
             
              dispatch:
         | 
| 7558 7809 | 
             
                CompositeImplicitAutograd: set__symint
         | 
| 7810 | 
            +
              tags: inplace_view
         | 
| 7559 7811 |  | 
| 7560 7812 | 
             
            - func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
         | 
| 7561 7813 | 
             
              variants: method
         | 
| @@ -7564,6 +7816,7 @@ | |
| 7564 7816 | 
             
              dispatch:
         | 
| 7565 7817 | 
             
                CPU, CUDA, Meta, MPS: set_tensor_
         | 
| 7566 7818 | 
             
              autogen: set.source_Tensor, set.source_Tensor_out
         | 
| 7819 | 
            +
              tags: inplace_view
         | 
| 7567 7820 |  | 
| 7568 7821 | 
             
            - func: set_(Tensor(a!) self) -> Tensor(a!)
         | 
| 7569 7822 | 
             
              variants: method
         | 
| @@ -7573,6 +7826,7 @@ | |
| 7573 7826 | 
             
                Meta: set_meta_
         | 
| 7574 7827 | 
             
                MPS: set_mps_
         | 
| 7575 7828 | 
             
              autogen: set, set.out
         | 
| 7829 | 
            +
              tags: inplace_view
         | 
| 7576 7830 |  | 
| 7577 7831 | 
             
            # Not making it CompositeImplicitAutograd because lift
         | 
| 7578 7832 | 
             
            # should be a primitive w.r.t. functorch
         | 
| @@ -7656,6 +7910,10 @@ | |
| 7656 7910 | 
             
              dispatch:
         | 
| 7657 7911 | 
             
                CompositeExplicitAutograd: masked_scatter
         | 
| 7658 7912 |  | 
| 7913 | 
            +
            - func: masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor
         | 
| 7914 | 
            +
              dispatch:
         | 
| 7915 | 
            +
                CompositeExplicitAutograd: masked_scatter_backward_symint
         | 
| 7916 | 
            +
             | 
| 7659 7917 | 
             
            - func: _masked_softmax(Tensor self, Tensor mask, int? dim=None, int? mask_type=None) -> Tensor
         | 
| 7660 7918 | 
             
              dispatch:
         | 
| 7661 7919 | 
             
                CUDA: masked_softmax_cuda
         | 
| @@ -7938,6 +8196,8 @@ | |
| 7938 8196 | 
             
            - func: bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
         | 
| 7939 8197 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 7940 8198 | 
             
              variants: method
         | 
| 8199 | 
            +
              dispatch:
         | 
| 8200 | 
            +
                CompositeExplicitAutograd: bitwise_and_
         | 
| 7941 8201 | 
             
              tags: pointwise
         | 
| 7942 8202 |  | 
| 7943 8203 | 
             
            - func: bitwise_and_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
         | 
| @@ -7982,6 +8242,8 @@ | |
| 7982 8242 | 
             
            - func: bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor
         | 
| 7983 8243 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 7984 8244 | 
             
              variants: method, function
         | 
| 8245 | 
            +
              dispatch:
         | 
| 8246 | 
            +
                CompositeExplicitAutograd: bitwise_or
         | 
| 7985 8247 | 
             
              tags: [core, pointwise]
         | 
| 7986 8248 |  | 
| 7987 8249 | 
             
            - func: bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
         | 
| @@ -8001,6 +8263,8 @@ | |
| 8001 8263 | 
             
            - func: bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
         | 
| 8002 8264 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 8003 8265 | 
             
              variants: method
         | 
| 8266 | 
            +
              dispatch:
         | 
| 8267 | 
            +
                CompositeExplicitAutograd: bitwise_or_
         | 
| 8004 8268 | 
             
              tags: pointwise
         | 
| 8005 8269 |  | 
| 8006 8270 | 
             
            - func: bitwise_or_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
         | 
| @@ -8045,6 +8309,8 @@ | |
| 8045 8309 | 
             
            - func: bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor
         | 
| 8046 8310 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 8047 8311 | 
             
              variants: method, function
         | 
| 8312 | 
            +
              dispatch:
         | 
| 8313 | 
            +
                CompositeExplicitAutograd: bitwise_xor
         | 
| 8048 8314 | 
             
              tags: [core, pointwise]
         | 
| 8049 8315 |  | 
| 8050 8316 | 
             
            - func: bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
         | 
| @@ -8064,6 +8330,8 @@ | |
| 8064 8330 | 
             
            - func: bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
         | 
| 8065 8331 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 8066 8332 | 
             
              variants: method
         | 
| 8333 | 
            +
              dispatch:
         | 
| 8334 | 
            +
                CompositeExplicitAutograd: bitwise_xor_
         | 
| 8067 8335 | 
             
              tags: pointwise
         | 
| 8068 8336 |  | 
| 8069 8337 | 
             
            - func: bitwise_xor_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
         | 
| @@ -8504,6 +8772,7 @@ | |
| 8504 8772 | 
             
              variants: method, function
         | 
| 8505 8773 | 
             
              dispatch:
         | 
| 8506 8774 | 
             
                QuantizedCPU: eq_quantized_cpu
         | 
| 8775 | 
            +
                NestedTensorCPU, NestedTensorCUDA: eq_scalar_nested
         | 
| 8507 8776 | 
             
              tags: [core, pointwise]
         | 
| 8508 8777 |  | 
| 8509 8778 | 
             
            - func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
         | 
| @@ -8540,6 +8809,7 @@ | |
| 8540 8809 | 
             
              variants: method, function
         | 
| 8541 8810 | 
             
              dispatch:
         | 
| 8542 8811 | 
             
                QuantizedCPU: ge_quantized_cpu
         | 
| 8812 | 
            +
                NestedTensorCPU, NestedTensorCUDA: ge_scalar_nested
         | 
| 8543 8813 | 
             
              tags: [core, pointwise]
         | 
| 8544 8814 |  | 
| 8545 8815 | 
             
            - func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
         | 
| @@ -8666,6 +8936,7 @@ | |
| 8666 8936 | 
             
              variants: method, function
         | 
| 8667 8937 | 
             
              dispatch:
         | 
| 8668 8938 | 
             
                QuantizedCPU: gt_quantized_cpu
         | 
| 8939 | 
            +
                NestedTensorCPU, NestedTensorCUDA: gt_scalar_nested
         | 
| 8669 8940 | 
             
              tags: [core, pointwise]
         | 
| 8670 8941 |  | 
| 8671 8942 | 
             
            - func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
         | 
| @@ -9106,6 +9377,7 @@ | |
| 9106 9377 | 
             
              structured_inherits: TensorIteratorBase
         | 
| 9107 9378 | 
             
              dispatch:
         | 
| 9108 9379 | 
             
                CPU, CUDA: lgamma_out
         | 
| 9380 | 
            +
                MPS: lgamma_out_mps
         | 
| 9109 9381 | 
             
              tags: pointwise
         | 
| 9110 9382 |  | 
| 9111 9383 | 
             
            - func: lgamma_(Tensor(a!) self) -> Tensor(a!)
         | 
| @@ -9126,6 +9398,7 @@ | |
| 9126 9398 | 
             
              structured_inherits: TensorIteratorBase
         | 
| 9127 9399 | 
             
              dispatch:
         | 
| 9128 9400 | 
             
                CPU, CUDA: digamma_out
         | 
| 9401 | 
            +
                MPS: digamma_out_mps
         | 
| 9129 9402 | 
             
              tags: pointwise
         | 
| 9130 9403 |  | 
| 9131 9404 | 
             
            - func: digamma(Tensor self) -> Tensor
         | 
| @@ -9140,6 +9413,7 @@ | |
| 9140 9413 | 
             
              structured_inherits: TensorIteratorBase
         | 
| 9141 9414 | 
             
              dispatch:
         | 
| 9142 9415 | 
             
                CPU, CUDA: polygamma_out
         | 
| 9416 | 
            +
                MPS: polygamma_out_mps
         | 
| 9143 9417 | 
             
              tags: pointwise
         | 
| 9144 9418 |  | 
| 9145 9419 | 
             
            - func: polygamma(int n, Tensor self) -> Tensor
         | 
| @@ -9263,7 +9537,7 @@ | |
| 9263 9537 | 
             
              dispatch:
         | 
| 9264 9538 | 
             
                CPU, CUDA: atan2_out
         | 
| 9265 9539 | 
             
                MPS: atan2_out_mps
         | 
| 9266 | 
            -
              tags: pointwise
         | 
| 9540 | 
            +
              tags: [core, pointwise]
         | 
| 9267 9541 |  | 
| 9268 9542 | 
             
            - func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
         | 
| 9269 9543 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| @@ -9275,7 +9549,7 @@ | |
| 9275 9549 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 9276 9550 | 
             
              structured_delegate: atan2.out
         | 
| 9277 9551 | 
             
              variants: method, function
         | 
| 9278 | 
            -
              tags: pointwise
         | 
| 9552 | 
            +
              tags: [core, pointwise]
         | 
| 9279 9553 | 
             
            # arctan2, alias of atan2
         | 
| 9280 9554 |  | 
| 9281 9555 | 
             
            - func: arctan2(Tensor self, Tensor other) -> Tensor
         | 
| @@ -9464,7 +9738,7 @@ | |
| 9464 9738 | 
             
              structured: True
         | 
| 9465 9739 | 
             
              structured_inherits: TensorIteratorBase
         | 
| 9466 9740 | 
             
              dispatch:
         | 
| 9467 | 
            -
                CPU, CUDA: nextafter_out
         | 
| 9741 | 
            +
                CPU, CUDA, MPS: nextafter_out
         | 
| 9468 9742 | 
             
              tags: pointwise
         | 
| 9469 9743 |  | 
| 9470 9744 | 
             
            - func: nextafter(Tensor self, Tensor other) -> Tensor
         | 
| @@ -9811,7 +10085,7 @@ | |
| 9811 10085 | 
             
            - func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor
         | 
| 9812 10086 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| 9813 10087 | 
             
              structured_delegate: pow.Scalar_out
         | 
| 9814 | 
            -
              tags: pointwise
         | 
| 10088 | 
            +
              tags: [core, pointwise]
         | 
| 9815 10089 |  | 
| 9816 10090 | 
             
            - func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 9817 10091 | 
             
              device_check: NoCheck   # TensorIterator
         | 
| @@ -9954,12 +10228,14 @@ | |
| 9954 10228 | 
             
              variants: function
         | 
| 9955 10229 | 
             
              dispatch:
         | 
| 9956 10230 | 
             
                CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
         | 
| 10231 | 
            +
                CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
         | 
| 9957 10232 | 
             
              autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
         | 
| 9958 10233 |  | 
| 9959 10234 | 
             
            - func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
         | 
| 9960 10235 | 
             
              variants: function
         | 
| 9961 10236 | 
             
              dispatch:
         | 
| 9962 10237 | 
             
                CUDA: _amp_update_scale_cuda_
         | 
| 10238 | 
            +
                CPU: _amp_update_scale_cpu_
         | 
| 9963 10239 | 
             
              autogen: _amp_update_scale, _amp_update_scale.out
         | 
| 9964 10240 |  | 
| 9965 10241 | 
             
                #- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
         | 
| @@ -10020,6 +10296,21 @@ | |
| 10020 10296 | 
             
                CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
         | 
| 10021 10297 | 
             
              autogen: _foreach_add.ScalarList_out
         | 
| 10022 10298 |  | 
| 10299 | 
            +
            - func: _foreach_add.Tensor(Tensor[] self, Tensor other, *, Scalar alpha=1) -> Tensor[]
         | 
| 10300 | 
            +
              device_check: NoCheck   # foreach kernels fall back to slow path when tensor are on different devices
         | 
| 10301 | 
            +
              variants: function
         | 
| 10302 | 
            +
              dispatch:
         | 
| 10303 | 
            +
                CPU: foreach_tensor_add_tensor_kernel_slow
         | 
| 10304 | 
            +
                CUDA: foreach_tensor_add_tensor_kernel_cuda
         | 
| 10305 | 
            +
             | 
| 10306 | 
            +
            - func: _foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()
         | 
| 10307 | 
            +
              device_check: NoCheck   # foreach kernels fall back to slow path when tensor are on different devices
         | 
| 10308 | 
            +
              variants: function
         | 
| 10309 | 
            +
              dispatch:
         | 
| 10310 | 
            +
                CPU: foreach_tensor_add_tensor_kernel_slow_
         | 
| 10311 | 
            +
                CUDA: foreach_tensor_add_tensor_kernel_cuda_
         | 
| 10312 | 
            +
              autogen: _foreach_add.Tensor_out
         | 
| 10313 | 
            +
             | 
| 10023 10314 | 
             
            - func: _foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
         | 
| 10024 10315 | 
             
              device_check: NoCheck   # foreach kernels fall back to slow path when tensor are on different devices
         | 
| 10025 10316 | 
             
              variants: function
         | 
| @@ -10170,6 +10461,21 @@ | |
| 10170 10461 | 
             
                CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
         | 
| 10171 10462 | 
             
              autogen: _foreach_div.ScalarList_out
         | 
| 10172 10463 |  | 
| 10464 | 
            +
            - func: _foreach_div.Tensor(Tensor[] self, Tensor other) -> Tensor[]
         | 
| 10465 | 
            +
              device_check: NoCheck   # foreach kernels fall back to slow path when tensor are on different devices
         | 
| 10466 | 
            +
              variants: function
         | 
| 10467 | 
            +
              dispatch:
         | 
| 10468 | 
            +
                CPU: foreach_tensor_div_tensor_kernel_slow
         | 
| 10469 | 
            +
                CUDA: foreach_tensor_div_tensor_kernel_cuda
         | 
| 10470 | 
            +
             | 
| 10471 | 
            +
            - func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
         | 
| 10472 | 
            +
              device_check: NoCheck   # foreach kernels fall back to slow path when tensor are on different devices
         | 
| 10473 | 
            +
              variants: function
         | 
| 10474 | 
            +
              dispatch:
         | 
| 10475 | 
            +
                CPU: foreach_tensor_div_tensor_kernel_slow_
         | 
| 10476 | 
            +
                CUDA: foreach_tensor_div_tensor_kernel_cuda_
         | 
| 10477 | 
            +
              autogen: _foreach_div.Tensor_out
         | 
| 10478 | 
            +
             | 
| 10173 10479 | 
             
            - func: _foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
         | 
| 10174 10480 | 
             
              device_check: NoCheck   # foreach kernels fall back to slow path when tensor are on different devices
         | 
| 10175 10481 | 
             
              variants: function
         | 
| @@ -10990,37 +11296,44 @@ | |
| 10990 11296 | 
             
              dispatch:
         | 
| 10991 11297 | 
             
                CPU: bucketize_cpu
         | 
| 10992 11298 | 
             
                CUDA: bucketize_cuda
         | 
| 11299 | 
            +
                MPS: bucketize_mps
         | 
| 10993 11300 |  | 
| 10994 11301 | 
             
            - func: bucketize.Tensor_out(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)
         | 
| 10995 11302 | 
             
              dispatch:
         | 
| 10996 11303 | 
             
                CPU: bucketize_out_cpu
         | 
| 10997 11304 | 
             
                CUDA: bucketize_out_cuda
         | 
| 11305 | 
            +
                MPS: bucketize_out_mps
         | 
| 10998 11306 |  | 
| 10999 11307 | 
             
            - func: bucketize.Scalar(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
         | 
| 11000 11308 | 
             
              dispatch:
         | 
| 11001 11309 | 
             
                CPU: bucketize_cpu
         | 
| 11002 11310 | 
             
                CUDA: bucketize_cuda
         | 
| 11311 | 
            +
                MPS: bucketize_mps
         | 
| 11003 11312 | 
             
              autogen: bucketize.Scalar_out
         | 
| 11004 11313 |  | 
| 11005 11314 | 
             
            - func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
         | 
| 11006 11315 | 
             
              dispatch:
         | 
| 11007 11316 | 
             
                CPU: searchsorted_cpu
         | 
| 11008 11317 | 
             
                CUDA: searchsorted_cuda
         | 
| 11318 | 
            +
                MPS: searchsorted_mps
         | 
| 11009 11319 |  | 
| 11010 11320 | 
             
            - func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
         | 
| 11011 11321 | 
             
              dispatch:
         | 
| 11012 11322 | 
             
                CPU: searchsorted_out_cpu
         | 
| 11013 11323 | 
             
                CUDA: searchsorted_out_cuda
         | 
| 11324 | 
            +
                MPS: searchsorted_out_mps
         | 
| 11014 11325 |  | 
| 11015 11326 | 
             
            - func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
         | 
| 11016 11327 | 
             
              dispatch:
         | 
| 11017 11328 | 
             
                CPU: searchsorted_cpu
         | 
| 11018 11329 | 
             
                CUDA: searchsorted_cuda
         | 
| 11330 | 
            +
                MPS: searchsorted_mps
         | 
| 11019 11331 |  | 
| 11020 11332 | 
             
            - func: searchsorted.Scalar_out(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
         | 
| 11021 11333 | 
             
              dispatch:
         | 
| 11022 11334 | 
             
                CPU: searchsorted_out_cpu
         | 
| 11023 11335 | 
             
                CUDA: searchsorted_out_cuda
         | 
| 11336 | 
            +
                MPS: searchsorted_out_mps
         | 
| 11024 11337 |  | 
| 11025 11338 | 
             
            - func: _convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor
         | 
| 11026 11339 | 
             
              structured_delegate: _convert_indices_from_coo_to_csr.out
         | 
| @@ -11568,6 +11881,7 @@ | |
| 11568 11881 | 
             
              python_module: nn
         | 
| 11569 11882 | 
             
              dispatch:
         | 
| 11570 11883 | 
             
                CPU, CUDA: softshrink_out
         | 
| 11884 | 
            +
                MPS: softshrink_out_mps
         | 
| 11571 11885 |  | 
| 11572 11886 | 
             
            - func: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
         | 
| 11573 11887 | 
             
              structured_delegate: softshrink.out
         | 
| @@ -11580,6 +11894,7 @@ | |
| 11580 11894 | 
             
              python_module: nn
         | 
| 11581 11895 | 
             
              dispatch:
         | 
| 11582 11896 | 
             
                CPU, CUDA: softshrink_backward_out
         | 
| 11897 | 
            +
                MPS: softshrink_backward_out_mps
         | 
| 11583 11898 |  | 
| 11584 11899 | 
             
            - func: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
         | 
| 11585 11900 | 
             
              structured_delegate: softshrink_backward.grad_input
         | 
| @@ -12144,6 +12459,7 @@ | |
| 12144 12459 | 
             
              dispatch:
         | 
| 12145 12460 | 
             
                CPU: upsample_linear1d_out_cpu
         | 
| 12146 12461 | 
             
                CUDA: upsample_linear1d_out_cuda
         | 
| 12462 | 
            +
                MPS: upsample_linear1d_out_mps
         | 
| 12147 12463 |  | 
| 12148 12464 | 
             
            - func: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
         | 
| 12149 12465 | 
             
              python_module: nn
         | 
| @@ -12155,6 +12471,7 @@ | |
| 12155 12471 | 
             
              dispatch:
         | 
| 12156 12472 | 
             
                CPU: upsample_linear1d_backward_out_cpu
         | 
| 12157 12473 | 
             
                CUDA: upsample_linear1d_backward_out_cuda
         | 
| 12474 | 
            +
                MPS: upsample_linear1d_backward_out_mps
         | 
| 12158 12475 |  | 
| 12159 12476 | 
             
            - func: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
         | 
| 12160 12477 | 
             
              python_module: nn
         | 
| @@ -12482,101 +12799,101 @@ | |
| 12482 12799 | 
             
            # make the operational distinction clear.
         | 
| 12483 12800 | 
             
              tags: pointwise
         | 
| 12484 12801 |  | 
| 12485 | 
            -
            - func: slow_conv_transpose2d.out(Tensor self, Tensor weight,  | 
| 12802 | 
            +
            - func: slow_conv_transpose2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 12486 12803 | 
             
              python_module: nn
         | 
| 12487 12804 | 
             
              structured: True
         | 
| 12488 12805 | 
             
              dispatch:
         | 
| 12489 12806 | 
             
                CPU: slow_conv_transpose2d_structured_cpu
         | 
| 12490 12807 | 
             
                CUDA: slow_conv_transpose2d_structured_cuda
         | 
| 12491 12808 |  | 
| 12492 | 
            -
            - func: slow_conv_transpose2d(Tensor self, Tensor weight,  | 
| 12809 | 
            +
            - func: slow_conv_transpose2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1) -> Tensor
         | 
| 12493 12810 | 
             
              python_module: nn
         | 
| 12494 12811 | 
             
              structured_delegate: slow_conv_transpose2d.out
         | 
| 12495 12812 |  | 
| 12496 | 
            -
            - func: slow_conv_transpose3d.out(Tensor self, Tensor weight,  | 
| 12813 | 
            +
            - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 12497 12814 | 
             
              python_module: nn
         | 
| 12498 12815 | 
             
              dispatch:
         | 
| 12499 12816 | 
             
                CPU: slow_conv_transpose3d_out_cpu
         | 
| 12500 12817 | 
             
                CUDA: slow_conv_transpose3d_out_cuda
         | 
| 12501 12818 |  | 
| 12502 | 
            -
            - func: slow_conv_transpose3d(Tensor self, Tensor weight,  | 
| 12819 | 
            +
            - func: slow_conv_transpose3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1) -> Tensor
         | 
| 12503 12820 | 
             
              python_module: nn
         | 
| 12504 12821 | 
             
              dispatch:
         | 
| 12505 12822 | 
             
                CPU: slow_conv_transpose3d_cpu
         | 
| 12506 12823 | 
             
                CUDA: slow_conv_transpose3d_cuda
         | 
| 12507 12824 |  | 
| 12508 | 
            -
            - func: thnn_conv2d.out(Tensor self, Tensor weight,  | 
| 12825 | 
            +
            - func: thnn_conv2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 12509 12826 | 
             
              python_module: nn
         | 
| 12510 12827 |  | 
| 12511 | 
            -
            - func: thnn_conv2d(Tensor self, Tensor weight,  | 
| 12828 | 
            +
            - func: thnn_conv2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0) -> Tensor
         | 
| 12512 12829 | 
             
              python_module: nn
         | 
| 12513 12830 |  | 
| 12514 | 
            -
            - func: _slow_conv2d_forward.output(Tensor self, Tensor weight,  | 
| 12831 | 
            +
            - func: _slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!)
         | 
| 12515 12832 | 
             
              python_module: nn
         | 
| 12516 12833 | 
             
              dispatch:
         | 
| 12517 12834 | 
             
                CPU: slow_conv2d_forward_out_cpu
         | 
| 12518 12835 | 
             
                CUDA: slow_conv2d_forward_out_cuda
         | 
| 12519 12836 |  | 
| 12520 | 
            -
            - func: _slow_conv2d_forward(Tensor self, Tensor weight,  | 
| 12837 | 
            +
            - func: _slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor
         | 
| 12521 12838 | 
             
              python_module: nn
         | 
| 12522 12839 | 
             
              dispatch:
         | 
| 12523 12840 | 
             
                CPU: slow_conv2d_forward_cpu
         | 
| 12524 12841 | 
             
                CUDA: slow_conv2d_forward_cuda
         | 
| 12525 12842 |  | 
| 12526 | 
            -
            - func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight,  | 
| 12843 | 
            +
            - func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
         | 
| 12527 12844 | 
             
              python_module: nn
         | 
| 12528 12845 | 
             
              dispatch:
         | 
| 12529 12846 | 
             
                CPU: slow_conv2d_backward_out_cpu
         | 
| 12530 12847 | 
             
                CUDA: slow_conv2d_backward_out_cuda
         | 
| 12531 12848 |  | 
| 12532 | 
            -
            - func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight,  | 
| 12849 | 
            +
            - func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
         | 
| 12533 12850 | 
             
              python_module: nn
         | 
| 12534 12851 | 
             
              dispatch:
         | 
| 12535 12852 | 
             
                CPU: slow_conv2d_backward_cpu
         | 
| 12536 12853 | 
             
                CUDA: slow_conv2d_backward_cuda
         | 
| 12537 12854 | 
             
              autogen: _slow_conv2d_backward.output_mask_out
         | 
| 12538 12855 |  | 
| 12539 | 
            -
            - func: _conv_depthwise2d.out(Tensor self, Tensor weight,  | 
| 12856 | 
            +
            - func: _conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 12540 12857 | 
             
              use_const_ref_for_mutable_tensors: True
         | 
| 12541 12858 | 
             
              python_module: nn
         | 
| 12542 12859 | 
             
              dispatch:
         | 
| 12543 12860 | 
             
                CUDA: conv_depthwise2d_cuda_out
         | 
| 12544 12861 |  | 
| 12545 | 
            -
            - func: _conv_depthwise2d(Tensor self, Tensor weight,  | 
| 12862 | 
            +
            - func: _conv_depthwise2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation) -> Tensor
         | 
| 12546 12863 | 
             
              python_module: nn
         | 
| 12547 12864 | 
             
              dispatch:
         | 
| 12548 12865 | 
             
                CUDA: conv_depthwise2d_cuda
         | 
| 12549 12866 |  | 
| 12550 | 
            -
            - func: conv_depthwise3d(Tensor self, Tensor weight,  | 
| 12867 | 
            +
            - func: conv_depthwise3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, SymInt[3] dilation) -> Tensor
         | 
| 12551 12868 | 
             
              python_module: nn
         | 
| 12552 12869 | 
             
              dispatch:
         | 
| 12553 12870 | 
             
                CUDA: conv_depthwise3d_cuda
         | 
| 12554 12871 | 
             
              autogen: conv_depthwise3d.out
         | 
| 12555 12872 |  | 
| 12556 | 
            -
            - func: slow_conv3d.out(Tensor self, Tensor weight,  | 
| 12873 | 
            +
            - func: slow_conv3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 12557 12874 | 
             
              python_module: nn
         | 
| 12558 12875 |  | 
| 12559 | 
            -
            - func: slow_conv3d(Tensor self, Tensor weight,  | 
| 12876 | 
            +
            - func: slow_conv3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0) -> Tensor
         | 
| 12560 12877 | 
             
              python_module: nn
         | 
| 12561 12878 |  | 
| 12562 | 
            -
            - func: slow_conv3d_forward.output(Tensor self, Tensor weight,  | 
| 12879 | 
            +
            - func: slow_conv3d_forward.output(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)
         | 
| 12563 12880 | 
             
              python_module: nn
         | 
| 12564 12881 | 
             
              dispatch:
         | 
| 12565 12882 | 
             
                CPU: slow_conv3d_forward_out_cpu
         | 
| 12566 12883 |  | 
| 12567 | 
            -
            - func: slow_conv3d_forward(Tensor self, Tensor weight,  | 
| 12884 | 
            +
            - func: slow_conv3d_forward(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding) -> Tensor
         | 
| 12568 12885 | 
             
              python_module: nn
         | 
| 12569 12886 | 
             
              dispatch:
         | 
| 12570 12887 | 
             
                CPU: slow_conv3d_forward_cpu
         | 
| 12571 12888 |  | 
| 12572 | 
            -
            - func: slow_conv_dilated2d(Tensor self, Tensor weight,  | 
| 12889 | 
            +
            - func: slow_conv_dilated2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1) -> Tensor
         | 
| 12573 12890 | 
             
              python_module: nn
         | 
| 12574 12891 | 
             
              dispatch:
         | 
| 12575 12892 | 
             
                CPU: slow_conv_dilated2d_cpu
         | 
| 12576 12893 | 
             
                CUDA: slow_conv_dilated2d_cuda
         | 
| 12577 12894 | 
             
              autogen: slow_conv_dilated2d.out
         | 
| 12578 12895 |  | 
| 12579 | 
            -
            - func: slow_conv_dilated3d(Tensor self, Tensor weight,  | 
| 12896 | 
            +
            - func: slow_conv_dilated3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1) -> Tensor
         | 
| 12580 12897 | 
             
              python_module: nn
         | 
| 12581 12898 | 
             
              dispatch:
         | 
| 12582 12899 | 
             
                CPU: slow_conv_dilated3d_cpu
         | 
| @@ -12627,7 +12944,7 @@ | |
| 12627 12944 | 
             
                SparseMeta: isinf_sparse_meta
         | 
| 12628 12945 | 
             
                SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
         | 
| 12629 12946 | 
             
              autogen: isinf.out
         | 
| 12630 | 
            -
              tags: core
         | 
| 12947 | 
            +
              tags: [core, pointwise]
         | 
| 12631 12948 |  | 
| 12632 12949 | 
             
            - func: record_stream(Tensor(a!) self, Stream s) -> ()
         | 
| 12633 12950 | 
             
              variants: method
         | 
| @@ -13553,11 +13870,18 @@ | |
| 13553 13870 | 
             
              dispatch:
         | 
| 13554 13871 | 
             
                CPU, CUDA: linalg_eig_out
         | 
| 13555 13872 |  | 
| 13873 | 
            +
            - func: _linalg_eigvals(Tensor self) -> Tensor
         | 
| 13874 | 
            +
              python_module: linalg
         | 
| 13875 | 
            +
              dispatch:
         | 
| 13876 | 
            +
                CPU, CUDA: _linalg_eigvals
         | 
| 13877 | 
            +
             | 
| 13556 13878 | 
             
            - func: linalg_eigvals(Tensor self) -> Tensor
         | 
| 13557 13879 | 
             
              python_module: linalg
         | 
| 13558 13880 |  | 
| 13559 13881 | 
             
            - func: linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 13560 13882 | 
             
              python_module: linalg
         | 
| 13883 | 
            +
              dispatch:
         | 
| 13884 | 
            +
                CPU, CUDA: linalg_eigvals_out
         | 
| 13561 13885 |  | 
| 13562 13886 | 
             
            # This function is exposes the `compute_v` flag, which is then used to implement `linalg.eigh` and
         | 
| 13563 13887 | 
             
            # `linalg.eigvalsh` as composite functions that call this one
         | 
| @@ -13861,6 +14185,12 @@ | |
| 13861 14185 | 
             
            # It is undocumented and should not be used outside of tests.
         | 
| 13862 14186 | 
             
            - func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
         | 
| 13863 14187 |  | 
| 14188 | 
            +
            # Note: for testing COW materialization within `at::parallel_for` loop function
         | 
| 14189 | 
            +
            - func: _test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor
         | 
| 14190 | 
            +
              variants: function
         | 
| 14191 | 
            +
              dispatch:
         | 
| 14192 | 
            +
                CompositeExplicitAutograd: _test_parallel_materialize
         | 
| 14193 | 
            +
             | 
| 13864 14194 | 
             
            # Note: this function is only for testing.
         | 
| 13865 14195 | 
             
            - func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
         | 
| 13866 14196 | 
             
              python_module: nn
         | 
| @@ -14195,6 +14525,7 @@ | |
| 14195 14525 | 
             
              variants: function
         | 
| 14196 14526 | 
             
              dispatch:
         | 
| 14197 14527 | 
             
                CompositeExplicitAutograd: split_with_sizes_copy_out
         | 
| 14528 | 
            +
                CUDA: split_with_sizes_copy_out_cuda
         | 
| 14198 14529 |  | 
| 14199 14530 | 
             
            - func: view_copy(Tensor self, SymInt[] size) -> Tensor
         | 
| 14200 14531 | 
             
              variants: function
         | 
| @@ -14269,19 +14600,29 @@ | |
| 14269 14600 | 
             
              variants: function
         | 
| 14270 14601 | 
             
              tags: nondeterministic_seeded
         | 
| 14271 14602 |  | 
| 14272 | 
            -
            - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor  | 
| 14603 | 
            +
            - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
         | 
| 14273 14604 | 
             
              dispatch:
         | 
| 14274 | 
            -
                CPU: _scaled_dot_product_flash_attention_cpu
         | 
| 14275 14605 | 
             
                CUDA: _scaled_dot_product_flash_attention_cuda
         | 
| 14276 14606 | 
             
                NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
         | 
| 14277 14607 | 
             
              tags: nondeterministic_seeded
         | 
| 14278 14608 |  | 
| 14279 | 
            -
            - func:  | 
| 14609 | 
            +
            - func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
         | 
| 14610 | 
            +
              dispatch:
         | 
| 14611 | 
            +
                CPU: _scaled_dot_product_flash_attention_cpu
         | 
| 14612 | 
            +
              tags: nondeterministic_seeded
         | 
| 14613 | 
            +
             | 
| 14614 | 
            +
            - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
         | 
| 14280 14615 | 
             
              device_check: NoCheck
         | 
| 14281 14616 | 
             
              variants: function
         | 
| 14282 14617 | 
             
              dispatch:
         | 
| 14283 | 
            -
                CPU: _scaled_dot_product_flash_attention_backward_cpu
         | 
| 14284 14618 | 
             
                CUDA: _scaled_dot_product_flash_attention_backward_cuda
         | 
| 14619 | 
            +
                NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
         | 
| 14620 | 
            +
             | 
| 14621 | 
            +
            - func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
         | 
| 14622 | 
            +
              device_check: NoCheck
         | 
| 14623 | 
            +
              variants: function
         | 
| 14624 | 
            +
              dispatch:
         | 
| 14625 | 
            +
                CPU: _scaled_dot_product_flash_attention_cpu_backward
         | 
| 14285 14626 |  | 
| 14286 14627 | 
             
            - func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
         | 
| 14287 14628 | 
             
              dispatch:
         | 
| @@ -14295,26 +14636,31 @@ | |
| 14295 14636 | 
             
                CUDA: _scaled_dot_product_efficient_attention_backward_cuda
         | 
| 14296 14637 | 
             
              tags: nondeterministic_seeded
         | 
| 14297 14638 |  | 
| 14298 | 
            -
            - func:  | 
| 14639 | 
            +
            - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
         | 
| 14640 | 
            +
              dispatch:
         | 
| 14641 | 
            +
                CUDA: _scaled_dot_product_cudnn_attention_cuda
         | 
| 14642 | 
            +
              tags: nondeterministic_seeded
         | 
| 14643 | 
            +
             | 
| 14644 | 
            +
            - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
         | 
| 14299 14645 | 
             
              variants: function
         | 
| 14300 14646 | 
             
              dispatch:
         | 
| 14301 14647 | 
             
                CUDA: _flash_attention_forward
         | 
| 14302 14648 | 
             
              tags: nondeterministic_seeded
         | 
| 14303 14649 |  | 
| 14304 | 
            -
            - func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k,  | 
| 14650 | 
            +
            - func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
         | 
| 14305 14651 | 
             
              device_check: NoCheck
         | 
| 14306 14652 | 
             
              variants: function
         | 
| 14307 14653 | 
             
              dispatch:
         | 
| 14308 14654 | 
             
                CUDA: _flash_attention_backward
         | 
| 14309 14655 |  | 
| 14310 | 
            -
            # Returns  | 
| 14311 | 
            -
            - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
         | 
| 14656 | 
            +
            # Returns output, logsumexp if compute_logsumexp
         | 
| 14657 | 
            +
            - func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, int? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
         | 
| 14312 14658 | 
             
              variants: function
         | 
| 14313 14659 | 
             
              dispatch:
         | 
| 14314 14660 | 
             
                CUDA: _efficient_attention_forward
         | 
| 14315 14661 | 
             
              tags: nondeterministic_seeded
         | 
| 14316 14662 |  | 
| 14317 | 
            -
            - func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k,  | 
| 14663 | 
            +
            - func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
         | 
| 14318 14664 | 
             
              device_check: NoCheck
         | 
| 14319 14665 | 
             
              variants: function
         | 
| 14320 14666 | 
             
              dispatch:
         | 
| @@ -14422,12 +14768,16 @@ | |
| 14422 14768 | 
             
              tags: pointwise
         | 
| 14423 14769 |  | 
| 14424 14770 | 
             
            - func: special_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 14771 | 
            +
              dispatch:
         | 
| 14772 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_t
         | 
| 14425 14773 | 
             
              device_check: NoCheck
         | 
| 14426 14774 | 
             
              python_module: special
         | 
| 14427 14775 | 
             
              variants: function
         | 
| 14428 14776 | 
             
              tags: pointwise
         | 
| 14429 14777 |  | 
| 14430 14778 | 
             
            - func: special_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 14779 | 
            +
              dispatch:
         | 
| 14780 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_t
         | 
| 14431 14781 | 
             
              device_check: NoCheck
         | 
| 14432 14782 | 
             
              python_module: special
         | 
| 14433 14783 | 
             
              variants: function
         | 
| @@ -14444,6 +14794,8 @@ | |
| 14444 14794 | 
             
              tags: pointwise
         | 
| 14445 14795 |  | 
| 14446 14796 | 
             
            - func: special_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 14797 | 
            +
              dispatch:
         | 
| 14798 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_t_out
         | 
| 14447 14799 | 
             
              device_check: NoCheck
         | 
| 14448 14800 | 
             
              python_module: special
         | 
| 14449 14801 | 
             
              variants: function
         | 
| @@ -14465,12 +14817,16 @@ | |
| 14465 14817 | 
             
              tags: pointwise
         | 
| 14466 14818 |  | 
| 14467 14819 | 
             
            - func: special_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 14820 | 
            +
              dispatch:
         | 
| 14821 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_u
         | 
| 14468 14822 | 
             
              device_check: NoCheck
         | 
| 14469 14823 | 
             
              python_module: special
         | 
| 14470 14824 | 
             
              variants: function
         | 
| 14471 14825 | 
             
              tags: pointwise
         | 
| 14472 14826 |  | 
| 14473 14827 | 
             
            - func: special_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 14828 | 
            +
              dispatch:
         | 
| 14829 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_u
         | 
| 14474 14830 | 
             
              device_check: NoCheck
         | 
| 14475 14831 | 
             
              python_module: special
         | 
| 14476 14832 | 
             
              variants: function
         | 
| @@ -14487,6 +14843,8 @@ | |
| 14487 14843 | 
             
              tags: pointwise
         | 
| 14488 14844 |  | 
| 14489 14845 | 
             
            - func: special_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 14846 | 
            +
              dispatch:
         | 
| 14847 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_u_out
         | 
| 14490 14848 | 
             
              device_check: NoCheck
         | 
| 14491 14849 | 
             
              python_module: special
         | 
| 14492 14850 | 
             
              variants: function
         | 
| @@ -14508,12 +14866,16 @@ | |
| 14508 14866 | 
             
              tags: pointwise
         | 
| 14509 14867 |  | 
| 14510 14868 | 
             
            - func: special_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 14869 | 
            +
              dispatch:
         | 
| 14870 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_v
         | 
| 14511 14871 | 
             
              device_check: NoCheck
         | 
| 14512 14872 | 
             
              python_module: special
         | 
| 14513 14873 | 
             
              variants: function
         | 
| 14514 14874 | 
             
              tags: pointwise
         | 
| 14515 14875 |  | 
| 14516 14876 | 
             
            - func: special_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 14877 | 
            +
              dispatch:
         | 
| 14878 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_v
         | 
| 14517 14879 | 
             
              device_check: NoCheck
         | 
| 14518 14880 | 
             
              python_module: special
         | 
| 14519 14881 | 
             
              variants: function
         | 
| @@ -14530,6 +14892,8 @@ | |
| 14530 14892 | 
             
              tags: pointwise
         | 
| 14531 14893 |  | 
| 14532 14894 | 
             
            - func: special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 14895 | 
            +
              dispatch:
         | 
| 14896 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_v_out
         | 
| 14533 14897 | 
             
              device_check: NoCheck
         | 
| 14534 14898 | 
             
              python_module: special
         | 
| 14535 14899 | 
             
              variants: function
         | 
| @@ -14551,12 +14915,16 @@ | |
| 14551 14915 | 
             
              tags: pointwise
         | 
| 14552 14916 |  | 
| 14553 14917 | 
             
            - func: special_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 14918 | 
            +
              dispatch:
         | 
| 14919 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_w
         | 
| 14554 14920 | 
             
              device_check: NoCheck
         | 
| 14555 14921 | 
             
              python_module: special
         | 
| 14556 14922 | 
             
              variants: function
         | 
| 14557 14923 | 
             
              tags: pointwise
         | 
| 14558 14924 |  | 
| 14559 14925 | 
             
            - func: special_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 14926 | 
            +
              dispatch:
         | 
| 14927 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_w
         | 
| 14560 14928 | 
             
              device_check: NoCheck
         | 
| 14561 14929 | 
             
              python_module: special
         | 
| 14562 14930 | 
             
              variants: function
         | 
| @@ -14573,6 +14941,8 @@ | |
| 14573 14941 | 
             
              tags: pointwise
         | 
| 14574 14942 |  | 
| 14575 14943 | 
             
            - func: special_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 14944 | 
            +
              dispatch:
         | 
| 14945 | 
            +
                CompositeExplicitAutograd: special_chebyshev_polynomial_w_out
         | 
| 14576 14946 | 
             
              device_check: NoCheck
         | 
| 14577 14947 | 
             
              python_module: special
         | 
| 14578 14948 | 
             
              variants: function
         | 
| @@ -14594,12 +14964,16 @@ | |
| 14594 14964 | 
             
              tags: pointwise
         | 
| 14595 14965 |  | 
| 14596 14966 | 
             
            - func: special_hermite_polynomial_h.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 14967 | 
            +
              dispatch:
         | 
| 14968 | 
            +
                CompositeExplicitAutograd: special_hermite_polynomial_h
         | 
| 14597 14969 | 
             
              device_check: NoCheck
         | 
| 14598 14970 | 
             
              python_module: special
         | 
| 14599 14971 | 
             
              variants: function
         | 
| 14600 14972 | 
             
              tags: pointwise
         | 
| 14601 14973 |  | 
| 14602 14974 | 
             
            - func: special_hermite_polynomial_h.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 14975 | 
            +
              dispatch:
         | 
| 14976 | 
            +
                CompositeExplicitAutograd: special_hermite_polynomial_h
         | 
| 14603 14977 | 
             
              device_check: NoCheck
         | 
| 14604 14978 | 
             
              python_module: special
         | 
| 14605 14979 | 
             
              variants: function
         | 
| @@ -14616,6 +14990,8 @@ | |
| 14616 14990 | 
             
              tags: pointwise
         | 
| 14617 14991 |  | 
| 14618 14992 | 
             
            - func: special_hermite_polynomial_h.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 14993 | 
            +
              dispatch:
         | 
| 14994 | 
            +
                CompositeExplicitAutograd: special_hermite_polynomial_h_out
         | 
| 14619 14995 | 
             
              device_check: NoCheck
         | 
| 14620 14996 | 
             
              python_module: special
         | 
| 14621 14997 | 
             
              variants: function
         | 
| @@ -14637,12 +15013,16 @@ | |
| 14637 15013 | 
             
              tags: pointwise
         | 
| 14638 15014 |  | 
| 14639 15015 | 
             
            - func: special_hermite_polynomial_he.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 15016 | 
            +
              dispatch:
         | 
| 15017 | 
            +
                CompositeExplicitAutograd: special_hermite_polynomial_he
         | 
| 14640 15018 | 
             
              device_check: NoCheck
         | 
| 14641 15019 | 
             
              python_module: special
         | 
| 14642 15020 | 
             
              variants: function
         | 
| 14643 15021 | 
             
              tags: pointwise
         | 
| 14644 15022 |  | 
| 14645 15023 | 
             
            - func: special_hermite_polynomial_he.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 15024 | 
            +
              dispatch:
         | 
| 15025 | 
            +
                CompositeExplicitAutograd: special_hermite_polynomial_he
         | 
| 14646 15026 | 
             
              device_check: NoCheck
         | 
| 14647 15027 | 
             
              python_module: special
         | 
| 14648 15028 | 
             
              variants: function
         | 
| @@ -14659,6 +15039,8 @@ | |
| 14659 15039 | 
             
              tags: pointwise
         | 
| 14660 15040 |  | 
| 14661 15041 | 
             
            - func: special_hermite_polynomial_he.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 15042 | 
            +
              dispatch:
         | 
| 15043 | 
            +
                CompositeExplicitAutograd: special_hermite_polynomial_he_out
         | 
| 14662 15044 | 
             
              device_check: NoCheck
         | 
| 14663 15045 | 
             
              python_module: special
         | 
| 14664 15046 | 
             
              variants: function
         | 
| @@ -14680,12 +15062,16 @@ | |
| 14680 15062 | 
             
              tags: pointwise
         | 
| 14681 15063 |  | 
| 14682 15064 | 
             
            - func: special_laguerre_polynomial_l.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 15065 | 
            +
              dispatch:
         | 
| 15066 | 
            +
                CompositeExplicitAutograd: special_laguerre_polynomial_l
         | 
| 14683 15067 | 
             
              device_check: NoCheck
         | 
| 14684 15068 | 
             
              python_module: special
         | 
| 14685 15069 | 
             
              variants: function
         | 
| 14686 15070 | 
             
              tags: pointwise
         | 
| 14687 15071 |  | 
| 14688 15072 | 
             
            - func: special_laguerre_polynomial_l.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 15073 | 
            +
              dispatch:
         | 
| 15074 | 
            +
                CompositeExplicitAutograd: special_laguerre_polynomial_l
         | 
| 14689 15075 | 
             
              device_check: NoCheck
         | 
| 14690 15076 | 
             
              python_module: special
         | 
| 14691 15077 | 
             
              variants: function
         | 
| @@ -14702,6 +15088,8 @@ | |
| 14702 15088 | 
             
              tags: pointwise
         | 
| 14703 15089 |  | 
| 14704 15090 | 
             
            - func: special_laguerre_polynomial_l.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 15091 | 
            +
              dispatch:
         | 
| 15092 | 
            +
                CompositeExplicitAutograd: special_laguerre_polynomial_l_out
         | 
| 14705 15093 | 
             
              device_check: NoCheck
         | 
| 14706 15094 | 
             
              python_module: special
         | 
| 14707 15095 | 
             
              variants: function
         | 
| @@ -14723,12 +15111,16 @@ | |
| 14723 15111 | 
             
              tags: pointwise
         | 
| 14724 15112 |  | 
| 14725 15113 | 
             
            - func: special_legendre_polynomial_p.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 15114 | 
            +
              dispatch:
         | 
| 15115 | 
            +
                CompositeExplicitAutograd: special_legendre_polynomial_p
         | 
| 14726 15116 | 
             
              device_check: NoCheck
         | 
| 14727 15117 | 
             
              python_module: special
         | 
| 14728 15118 | 
             
              variants: function
         | 
| 14729 15119 | 
             
              tags: pointwise
         | 
| 14730 15120 |  | 
| 14731 15121 | 
             
            - func: special_legendre_polynomial_p.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 15122 | 
            +
              dispatch:
         | 
| 15123 | 
            +
                CompositeExplicitAutograd: special_legendre_polynomial_p
         | 
| 14732 15124 | 
             
              device_check: NoCheck
         | 
| 14733 15125 | 
             
              python_module: special
         | 
| 14734 15126 | 
             
              variants: function
         | 
| @@ -14745,6 +15137,8 @@ | |
| 14745 15137 | 
             
              tags: pointwise
         | 
| 14746 15138 |  | 
| 14747 15139 | 
             
            - func: special_legendre_polynomial_p.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 15140 | 
            +
              dispatch:
         | 
| 15141 | 
            +
                CompositeExplicitAutograd: special_legendre_polynomial_p_out
         | 
| 14748 15142 | 
             
              device_check: NoCheck
         | 
| 14749 15143 | 
             
              python_module: special
         | 
| 14750 15144 | 
             
              variants: function
         | 
| @@ -14856,12 +15250,16 @@ | |
| 14856 15250 | 
             
              tags: pointwise
         | 
| 14857 15251 |  | 
| 14858 15252 | 
             
            - func: special_shifted_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 15253 | 
            +
              dispatch:
         | 
| 15254 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
         | 
| 14859 15255 | 
             
              device_check: NoCheck
         | 
| 14860 15256 | 
             
              python_module: special
         | 
| 14861 15257 | 
             
              variants: function
         | 
| 14862 15258 | 
             
              tags: pointwise
         | 
| 14863 15259 |  | 
| 14864 15260 | 
             
            - func: special_shifted_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 15261 | 
            +
              dispatch:
         | 
| 15262 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
         | 
| 14865 15263 | 
             
              device_check: NoCheck
         | 
| 14866 15264 | 
             
              python_module: special
         | 
| 14867 15265 | 
             
              variants: function
         | 
| @@ -14878,6 +15276,8 @@ | |
| 14878 15276 | 
             
              tags: pointwise
         | 
| 14879 15277 |  | 
| 14880 15278 | 
             
            - func: special_shifted_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 15279 | 
            +
              dispatch:
         | 
| 15280 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t_out
         | 
| 14881 15281 | 
             
              device_check: NoCheck
         | 
| 14882 15282 | 
             
              python_module: special
         | 
| 14883 15283 | 
             
              variants: function
         | 
| @@ -14899,12 +15299,16 @@ | |
| 14899 15299 | 
             
              tags: pointwise
         | 
| 14900 15300 |  | 
| 14901 15301 | 
             
            - func: special_shifted_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 15302 | 
            +
              dispatch:
         | 
| 15303 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
         | 
| 14902 15304 | 
             
              device_check: NoCheck
         | 
| 14903 15305 | 
             
              python_module: special
         | 
| 14904 15306 | 
             
              variants: function
         | 
| 14905 15307 | 
             
              tags: pointwise
         | 
| 14906 15308 |  | 
| 14907 15309 | 
             
            - func: special_shifted_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 15310 | 
            +
              dispatch:
         | 
| 15311 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
         | 
| 14908 15312 | 
             
              device_check: NoCheck
         | 
| 14909 15313 | 
             
              python_module: special
         | 
| 14910 15314 | 
             
              variants: function
         | 
| @@ -14921,6 +15325,8 @@ | |
| 14921 15325 | 
             
              tags: pointwise
         | 
| 14922 15326 |  | 
| 14923 15327 | 
             
            - func: special_shifted_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 15328 | 
            +
              dispatch:
         | 
| 15329 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u_out
         | 
| 14924 15330 | 
             
              device_check: NoCheck
         | 
| 14925 15331 | 
             
              python_module: special
         | 
| 14926 15332 | 
             
              variants: function
         | 
| @@ -14942,12 +15348,16 @@ | |
| 14942 15348 | 
             
              tags: pointwise
         | 
| 14943 15349 |  | 
| 14944 15350 | 
             
            - func: special_shifted_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 15351 | 
            +
              dispatch:
         | 
| 15352 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
         | 
| 14945 15353 | 
             
              device_check: NoCheck
         | 
| 14946 15354 | 
             
              python_module: special
         | 
| 14947 15355 | 
             
              variants: function
         | 
| 14948 15356 | 
             
              tags: pointwise
         | 
| 14949 15357 |  | 
| 14950 15358 | 
             
            - func: special_shifted_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 15359 | 
            +
              dispatch:
         | 
| 15360 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
         | 
| 14951 15361 | 
             
              device_check: NoCheck
         | 
| 14952 15362 | 
             
              python_module: special
         | 
| 14953 15363 | 
             
              variants: function
         | 
| @@ -14964,6 +15374,8 @@ | |
| 14964 15374 | 
             
              tags: pointwise
         | 
| 14965 15375 |  | 
| 14966 15376 | 
             
            - func: special_shifted_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 15377 | 
            +
              dispatch:
         | 
| 15378 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v_out
         | 
| 14967 15379 | 
             
              device_check: NoCheck
         | 
| 14968 15380 | 
             
              python_module: special
         | 
| 14969 15381 | 
             
              variants: function
         | 
| @@ -14985,12 +15397,16 @@ | |
| 14985 15397 | 
             
              tags: pointwise
         | 
| 14986 15398 |  | 
| 14987 15399 | 
             
            - func: special_shifted_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
         | 
| 15400 | 
            +
              dispatch:
         | 
| 15401 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
         | 
| 14988 15402 | 
             
              device_check: NoCheck
         | 
| 14989 15403 | 
             
              python_module: special
         | 
| 14990 15404 | 
             
              variants: function
         | 
| 14991 15405 | 
             
              tags: pointwise
         | 
| 14992 15406 |  | 
| 14993 15407 | 
             
            - func: special_shifted_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
         | 
| 15408 | 
            +
              dispatch:
         | 
| 15409 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
         | 
| 14994 15410 | 
             
              device_check: NoCheck
         | 
| 14995 15411 | 
             
              python_module: special
         | 
| 14996 15412 | 
             
              variants: function
         | 
| @@ -15007,6 +15423,8 @@ | |
| 15007 15423 | 
             
              tags: pointwise
         | 
| 15008 15424 |  | 
| 15009 15425 | 
             
            - func: special_shifted_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
         | 
| 15426 | 
            +
              dispatch:
         | 
| 15427 | 
            +
                CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w_out
         | 
| 15010 15428 | 
             
              device_check: NoCheck
         | 
| 15011 15429 | 
             
              python_module: special
         | 
| 15012 15430 | 
             
              variants: function
         | 
| @@ -15075,6 +15493,22 @@ | |
| 15075 15493 | 
             
                CUDA: _fused_adamw_kernel_cuda_
         | 
| 15076 15494 | 
             
              autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
         | 
| 15077 15495 |  | 
| 15496 | 
            +
            - func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
         | 
| 15497 | 
            +
              # Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
         | 
| 15498 | 
            +
              variants: function
         | 
| 15499 | 
            +
              dispatch:
         | 
| 15500 | 
            +
                CUDA: _fused_sgd_kernel_cuda_
         | 
| 15501 | 
            +
              autogen: _fused_sgd, _fused_sgd.out
         | 
| 15502 | 
            +
             | 
| 15503 | 
            +
            - func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
         | 
| 15504 | 
            +
              # Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
         | 
| 15505 | 
            +
              # but still skip the device check as the Tensor LR can be on CPU
         | 
| 15506 | 
            +
              device_check: NoCheck
         | 
| 15507 | 
            +
              variants: function
         | 
| 15508 | 
            +
              dispatch:
         | 
| 15509 | 
            +
                CUDA: _fused_sgd_kernel_cuda_
         | 
| 15510 | 
            +
              autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
         | 
| 15511 | 
            +
             | 
| 15078 15512 | 
             
            # This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
         | 
| 15079 15513 | 
             
            - func: _propagate_xla_data(Tensor input, Tensor output) -> ()
         | 
| 15080 15514 | 
             
              variants: function
         |