RubyGems - torch-rb - Versions diffs - 0.2.4 → 0.3.1 - Mend

torch-rb 0.2.4 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +29 -2
data/README.md +22 -7
data/ext/torch/ext.cpp +46 -24
data/ext/torch/extconf.rb +3 -4
data/lib/torch.rb +7 -5
data/lib/torch/hub.rb +48 -4
data/lib/torch/inspector.rb +236 -61
data/lib/torch/native/function.rb +1 -0
data/lib/torch/native/generator.rb +5 -2
data/lib/torch/native/native_functions.yaml +654 -660
data/lib/torch/native/parser.rb +1 -1
data/lib/torch/nn/conv2d.rb +0 -1
data/lib/torch/nn/module.rb +5 -2
data/lib/torch/optim/optimizer.rb +6 -4
data/lib/torch/optim/rprop.rb +0 -3
data/lib/torch/tensor.rb +69 -39
data/lib/torch/version.rb +1 -1
metadata +2 -2

data/lib/torch/native/function.rb CHANGED

@@ -66,6 +66,7 @@ module Torch
             end
             next if t == "Generator?"
+            next if t == "MemoryFormat"
             next if t == "MemoryFormat?"
             args << {name: k, type: t, default: d, pos: pos, has_default: has_default}
           end

data/lib/torch/native/generator.rb CHANGED

@@ -18,7 +18,7 @@ module Torch
           functions = functions()
           # skip functions
-          skip_args = ["bool[3]", "Dimname", "MemoryFormat", "Layout", "Storage", "ConstQuantizerPtr"]
+          skip_args = ["bool[3]", "Dimname", "Layout", "Storage", "ConstQuantizerPtr"]
           # remove functions
           functions.reject! do |f|
@@ -31,7 +31,7 @@ module Torch
           todo_functions, functions =
             functions.partition do |f|
               f.args.any? do |a|
-                a[:type].include?("?") && !["Tensor?", "Generator?", "int?", "ScalarType?"].include?(a[:type]) ||
+                a[:type].include?("?") && !["Tensor?", "Generator?", "int?", "ScalarType?", "Tensor?[]"].include?(a[:type]) ||
                 skip_args.any? { |sa| a[:type].include?(sa) } ||
                 # native_functions.yaml is missing size argument for normal
                 # https://pytorch.org/cppdocs/api/function_namespacetorch_1a80253fe5a3ded4716ec929a348adb4b9.html
@@ -112,6 +112,9 @@ void add_%{type}_functions(Module m) {
                   "OptionalScalarType"
                 when "Tensor[]"
                   "TensorList"
+                when "Tensor?[]"
+                  # TODO make optional
+                  "TensorList"
                 when "int"
                   "int64_t"
                 when "float"

data/lib/torch/native/native_functions.yaml CHANGED

@@ -1,44 +1,52 @@
 # See README.md in this directory for more guidance
+# *********NB: _cast_* operators are DEPRECATED and will be removed
+# eventually. These were previously used before TorchScript IR supported
+# representing ScalarType's. They are now superseded by usage of
+# `aten::to()`. The ops remain here for backward compatibility purposes.
-# Temporary type cast operators. These are needed to trace type-casts now since
-# Type's are not supported in the IR. Instead, we call down to these
-# specialized operators for each datatype.
-# TODO: remove when we have Type support in the IR
+# DEPRECATED. DO NOT USE
 - func: _cast_Byte(Tensor self, bool non_blocking=False) -> Tensor
   use_c10_dispatcher: full
   variants: function
+# DEPRECATED. DO NOT USE
 - func: _cast_Char(Tensor self, bool non_blocking=False) -> Tensor
   use_c10_dispatcher: full
   variants: function
+# DEPRECATED. DO NOT USE
 - func: _cast_Double(Tensor self, bool non_blocking=False) -> Tensor
   use_c10_dispatcher: full
   variants: function
+# DEPRECATED. DO NOT USE
 - func: _cast_Float(Tensor self, bool non_blocking=False) -> Tensor
   use_c10_dispatcher: full
   variants: function
+# DEPRECATED. DO NOT USE
 - func: _cast_Int(Tensor self, bool non_blocking=False) -> Tensor
   use_c10_dispatcher: full
   variants: function
+# DEPRECATED. DO NOT USE
 - func: _cast_Long(Tensor self, bool non_blocking=False) -> Tensor
   use_c10_dispatcher: full
   variants: function
+# DEPRECATED. DO NOT USE
 - func: _cast_Short(Tensor self, bool non_blocking=False) -> Tensor
   use_c10_dispatcher: full
   variants: function
+# DEPRECATED. DO NOT USE
 - func: _cast_Half(Tensor self, bool non_blocking=False) -> Tensor
   use_c10_dispatcher: full
   variants: function
 # Computes the gradient of current tensor w.r.t. graph leaves.
-- func: backward(Tensor self, Tensor? gradient=None, bool keep_graph=False, bool create_graph=False) -> ()
+- func: backward(Tensor self, Tensor? gradient=None, bool? retain_graph=None, bool create_graph=False) -> ()
   manual_kernel_registration: True
   variants: method
@@ -79,14 +87,13 @@
   use_c10_dispatcher: full
   manual_kernel_registration: True
   variants: method
-  supports_named_tensor: True
 - func: _version(Tensor self) -> int
   use_c10_dispatcher: full
   manual_kernel_registration: True
   variants: method
-- func: requires_grad_(Tensor(a!) self, bool _requires_grad=True) -> Tensor(a!)
+- func: requires_grad_(Tensor(a!) self, bool requires_grad=True) -> Tensor(a!)
   manual_kernel_registration: True
   variants: method
@@ -98,45 +105,39 @@
 - func: rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: rename(Tensor(a) self, Dimname[]? names) -> Tensor(a)
   variants: method
-  supports_named_tensor: True
 - func: align_to(Tensor(a) self, Dimname[] names) -> Tensor(a)
   variants: method
-  supports_named_tensor: True
 - func: align_to.ellipsis_idx(Tensor(a) self, Dimname[] order, int ellipsis_idx) -> Tensor(a)
   variants: method
-  supports_named_tensor: True
 - func: align_as(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
   variants: method
-  supports_named_tensor: True
 - func: align_tensors(Tensor[] tensors) -> Tensor[]
-  supports_named_tensor: True
+  use_c10_dispatcher: full
 - func: refine_names(Tensor(a) self, Dimname[] names) -> Tensor(a)
   variants: method
-  supports_named_tensor: True
 - func: unflatten.Dimname(Tensor self, Dimname dim, int[] sizes, Dimname[] names) -> Tensor
   variants: method
-  supports_named_tensor: True
 - func: unflatten.int(Tensor self, int dim, int[] sizes, Dimname[] names) -> Tensor
   variants: method
-  supports_named_tensor: True
 - func: _use_cudnn_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank) -> bool
+  use_c10_dispatcher: full
   dispatch:
     CUDA: _use_cudnn_ctc_loss
 - func: _cudnn_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank, bool deterministic, bool zero_infinity) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CUDA: _cudnn_ctc_loss
@@ -144,6 +145,7 @@
   use_c10_dispatcher: full
 - func: _cudnn_rnn_flatten_weight(Tensor[] weight_arr, int weight_stride0, int input_size, int mode, int hidden_size, int num_layers, bool batch_first, bool bidirectional) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: _cudnn_rnn_flatten_weight
@@ -167,7 +169,6 @@
   variants: function
   dispatch:
      CUDA: fused_dropout_cuda
-  supports_named_tensor: True
 - func: _masked_scale(Tensor self, Tensor mask, float scale) -> Tensor
   use_c10_dispatcher: full
@@ -179,13 +180,10 @@
 - func: _sobol_engine_ff_(Tensor(a!) self, int n, Tensor sobolstate, int dimension, int num_generated) -> Tensor(a!)
 - func: _sobol_engine_scramble_(Tensor(a!) self, Tensor ltm, int dimension) -> Tensor(a!)
 - func: _sobol_engine_initialize_state_(Tensor(a!) self, int dimension) -> Tensor(a!)
 - func: _reshape_from_tensor(Tensor self, Tensor shape) -> Tensor
   use_c10_dispatcher: full
@@ -194,10 +192,8 @@
 - func: dropout(Tensor input, float p, bool train) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
 - func: dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
-  supports_named_tensor: True
 - func: feature_dropout(Tensor input, float p, bool train) -> Tensor
   use_c10_dispatcher: full
@@ -209,69 +205,84 @@
 - func: alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
 - func: feature_alpha_dropout(Tensor input, float p, bool train) -> Tensor
   use_c10_dispatcher: full
 - func: feature_alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
 - func: abs(Tensor self) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: abs_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
-  supports_named_tensor: True
 - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
+- func: absolute(Tensor self) -> Tensor
+  use_c10_dispatcher: full
+  variants: function, method
+  dispatch:
+    CPU: abs
+    CUDA: abs
+- func: absolute_(Tensor(a!) self) -> Tensor(a!)
+  variants: function, method
+  dispatch:
+    CPU: abs_
+    CUDA: abs_
+- func: absolute.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+  dispatch:
+    CPU: abs_out
+    CUDA: abs_out
 - func: angle(Tensor self) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-- func: real(Tensor self) -> Tensor
+- func: view_as_real(Tensor(a) self) -> Tensor(a)
   use_c10_dispatcher: full
   variants: function
-  supports_named_tensor: True
-- func: imag(Tensor self) -> Tensor
+- func: view_as_complex(Tensor(a) self) -> Tensor(a)
+  use_c10_dispatcher: full
+  variants: function
+- func: real(Tensor(a) self) -> Tensor(a)
+  use_c10_dispatcher: full
+  variants: function
+- func: imag(Tensor(a) self) -> Tensor(a)
   use_c10_dispatcher: full
   variants: function
-  supports_named_tensor: True
 - func: conj(Tensor self) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: conj.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: acos(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: acos_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: acos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor
+  use_c10_dispatcher: full
 - func: adaptive_avg_pool1d(Tensor self, int[1] output_size) -> Tensor
+  use_c10_dispatcher: full
 # Return: (Tensor output, Tensor indices)
 - func: adaptive_max_pool1d(Tensor self, int[1] output_size) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
@@ -282,7 +293,7 @@
     SparseCPU: add_sparse
     SparseCUDA: add_sparse
     MkldnnCPU: mkldnn_add
-  supports_named_tensor: True
+    Vulkan: vulkan_add
 - func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
   variants: method
@@ -292,7 +303,6 @@
     SparseCPU: add_sparse_
     SparseCUDA: add_sparse_
     MkldnnCPU: mkldnn_add_
-  supports_named_tensor: True
 - func: add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -301,38 +311,28 @@
     SparseCPU: add_out_sparse_cpu
     SparseCUDA: add_out_sparse_cuda
     MkldnnCPU: mkldnn_add_out
-  supports_named_tensor: True
 # For C++ only, until we have conversion from C++ numbers to Tensor
 - func: add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: add_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  dispatch:
-    CPU: legacy::cpu::_th_addmv
-    CUDA: legacy::cuda::_th_addmv
-  supports_named_tensor: True
 - func: addmv_(Tensor(a!) self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
   variants: function, method
-  dispatch:
-    CPU: legacy::cpu::_th_addmv_
-    CUDA: legacy::cuda::_th_addmv_
-  supports_named_tensor: True
 - func: addmv.out(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
+- func: _addmv_impl_(Tensor(a!) self, Tensor self2, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
   dispatch:
-    CPU: legacy::cpu::_th_addmv_out
-    CUDA: legacy::cuda::_th_addmv_out
-  supports_named_tensor: True
+    CPU: addmv_impl_cpu
+    CUDA: addmv_impl_cuda
 - func: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
@@ -344,9 +344,11 @@
 - func: addr.out(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
 - func: affine_grid_generator(Tensor theta, int[] size, bool align_corners) -> Tensor
+  use_c10_dispatcher: full
   variants: function
 - func: affine_grid_generator_backward(Tensor grad, int[] size, bool align_corners) -> Tensor
+  use_c10_dispatcher: full
   variants: function
 - func: all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor
@@ -397,25 +399,64 @@
   use_c10_dispatcher: full
 - func: argmax(Tensor self, int? dim=None, bool keepdim=False) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
   dispatch:
     CPU: argmax
     CUDA: argmax
 - func: argmin(Tensor self, int? dim=None, bool keepdim=False) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
   dispatch:
     CPU: argmin
     CUDA: argmin
+- func: acosh(Tensor self) -> Tensor
+  use_c10_dispatcher: full
+  supports_named_tensor: True
+  variants: function, method
+- func: acosh_(Tensor(a!) self) -> Tensor(a!)
+  supports_named_tensor: True
+  variants: function, method
+- func: acosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+  supports_named_tensor: True
+- func: asinh(Tensor self) -> Tensor
+  use_c10_dispatcher: full
+  supports_named_tensor: True
+  variants: function, method
+- func: asinh_(Tensor(a!) self) -> Tensor(a!)
+  supports_named_tensor: True
+  variants: function, method
+- func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+  supports_named_tensor: True
+- func: atanh(Tensor self) -> Tensor
+  use_c10_dispatcher: full
+  supports_named_tensor: True
+  variants: function, method
+- func: atanh_(Tensor(a!) self) -> Tensor(a!)
+  supports_named_tensor: True
+  variants: function, method
+- func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+  supports_named_tensor: True
 - func: as_strided(Tensor(a) self, int[] size, int[] stride, int? storage_offset=None) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: function, method
   dispatch:
     CPU: as_strided_tensorimpl
     CUDA: as_strided_tensorimpl
     QuantizedCPU: as_strided_qtensorimpl
+    QuantizedCUDA: as_strided_qtensorimpl
   device_guard: False
-  supports_named_tensor: True
 - func: as_strided_(Tensor(a!) self, int[] size, int[] stride, int? storage_offset=None) -> Tensor(a!)
   variants: function, method
@@ -423,33 +464,21 @@
 - func: asin(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: asin_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: atan(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: atan_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
-  dispatch:
-    CPU: _atan__cpu
-    CUDA: _atan__cuda
 - func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-  dispatch:
-    CPU: _atan_out_cpu
-    CUDA: _atan_out_cuda
 - func: baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
@@ -491,25 +520,15 @@
 # Sample bernoulli with values in `self` as probability.
 - func: bernoulli(Tensor self, *, Generator? generator=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: bernoulli.out(Tensor self, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
   variants: function
-  supports_named_tensor: True
 - func: bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!)
   variants: method
-  dispatch:
-    CPU: bernoulli_tensor_cpu_
-    CUDA: bernoulli_tensor_cuda_
-  supports_named_tensor: True
 - func: bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!)
   variants: method
-  dispatch:
-    CPU: bernoulli_scalar_cpu_
-    CUDA: bernoulli_scalar_cuda_
-  supports_named_tensor: True
 # This out-of-place version isn't used explicitly, but needed by jit.
 # There is no default valid on `p` here because it would introduce ambiguity
@@ -561,74 +580,63 @@
 - func: bitwise_not(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: bitwise_not_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: method
 - func: bitwise_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: bitwise_not_out
     CUDA: bitwise_not_out
 - func: logical_not(Tensor self) -> Tensor
-  supports_named_tensor: True
+  use_c10_dispatcher: full
   variants: function, method
 - func: logical_not_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: method
 - func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: logical_not_out
     CUDA: logical_not_out
 - func: logical_xor(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: logical_xor.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU: logical_xor_out
     CUDA: logical_xor_out
-  supports_named_tensor: True
 - func: logical_and(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: logical_and_(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU: logical_and_out
     CUDA: logical_and_out
-  supports_named_tensor: True
 - func: logical_or(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: logical_or_(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU: logical_or_out
     CUDA: logical_or_out
-  supports_named_tensor: True
 - func: blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -640,127 +648,115 @@
   dispatch:
     CPU: bmm_cpu
     CUDA: bmm_cuda
-  supports_named_tensor: True
+    SparseCPU: bmm_sparse_cpu
+    SparseCUDA: bmm_sparse_cuda
+- func: _bmm(Tensor self, Tensor mat2, *, bool deterministic=False) -> Tensor
+  use_c10_dispatcher: full
+  variants: function
+  dispatch:
+    SparseCUDA: _bmm_sparse_cuda
 - func: bmm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
   dispatch:
     CPU: bmm_out_cpu
     CUDA: bmm_out_cuda
-  supports_named_tensor: True
+    SparseCPU: bmm_out_sparse_cpu
+    SparseCUDA: bmm_out_sparse_cuda
+- func: _bmm.out(Tensor self, Tensor mat2, *, bool deterministic=False, Tensor(a!) out) -> Tensor(a!)
+  variants: function
+  dispatch:
+    SparseCUDA: _bmm_out_sparse_cuda
 - func: broadcast_tensors(Tensor[] tensors) -> Tensor[]
+  use_c10_dispatcher: full
   device_guard: False
 - func: cat(Tensor[] tensors, int dim=0) -> Tensor
-  supports_named_tensor: True
+  use_c10_dispatcher: full
 - func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: cat.names(Tensor[] tensors, Dimname dim) -> Tensor
-  supports_named_tensor: True
 - func: cat.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
+- func: block_diag(Tensor[] tensors) -> Tensor
+  use_c10_dispatcher: full
+  variants: function
 - func: ceil(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: ceil_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: ceil_out
     CUDA: ceil_out
 - func: chain_matmul(Tensor[] matrices) -> Tensor
+  use_c10_dispatcher: full
   variants: function
 - func: chunk(Tensor(a) self, int chunks, int dim=0) -> Tensor(a)[]
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
   dispatch:
     CPU: clamp
     CUDA: clamp
     QuantizedCPU: quantized_clamp
+    Vulkan: vulkan_clamp
 - func: clamp_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
-  dispatch:
-    CPU: _clamp__cpu
-    CUDA: _clamp__cuda
 - func: clamp.out(Tensor self, Scalar? min=None, Scalar? max=None, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-  dispatch:
-    CPU: _clamp_out_cpu
-    CUDA: _clamp_out_cuda
 - func: clamp_max(Tensor self, Scalar max) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: clamp_max_(Tensor(a!) self, Scalar max) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
-  dispatch:
-    CPU: _clamp_max__cpu
-    CUDA: _clamp_max__cuda
 - func: clamp_max.out(Tensor self, Scalar max, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-  dispatch:
-    CPU: _clamp_max_out_cpu
-    CUDA: _clamp_max_out_cuda
 - func: clamp_min(Tensor self, Scalar min) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: clamp_min_(Tensor(a!) self, Scalar min) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
-  dispatch:
-    CPU: _clamp_min__cpu
-    CUDA: _clamp_min__cuda
 - func: clamp_min.out(Tensor self, Scalar min, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-  dispatch:
-    CPU: _clamp_min_out_cpu
-    CUDA: _clamp_min_out_cuda
 - func: cudnn_is_acceptable(Tensor self) -> bool
   use_c10_dispatcher: full
   device_guard: False
 - func: constant_pad_nd(Tensor self, int[] pad, Scalar value=0) -> Tensor
+  use_c10_dispatcher: full
   variants: function
 - func: contiguous(Tensor self, *, MemoryFormat memory_format=contiguous_format) -> Tensor
   variants: method
-  supports_named_tensor: True
 - func: convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
 - func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
 - func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+  use_c10_dispatcher: full
 - func: _convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
@@ -778,6 +774,7 @@
   use_c10_dispatcher: full
 - func: conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
 # NB: we inherit the goofy argument order from PyTorch torch.nn.functional
 - func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, int[1] padding=0, int[1] output_padding=0, int groups=1, int[1] dilation=1) -> Tensor
@@ -790,7 +787,6 @@
   manual_kernel_registration: True
   variants: method
   device_guard: False
-  supports_named_tensor: True
 - func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
   use_c10_dispatcher: full
@@ -798,39 +794,21 @@
 - func: cos(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: cos_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
-  dispatch:
-    CPU: _cos__cpu
-    CUDA: _cos__cuda
 - func: cos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-  dispatch:
-    CPU: _cos_out_cpu
-    CUDA: _cos_out_cuda
 - func: cosh(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: cosh_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
-  dispatch:
-    CPU: _cosh__cpu
-    CUDA: _cosh__cuda
 - func: cosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-  dispatch:
-    CPU: _cosh_out_cpu
-    CUDA: _cosh_out_cuda
 - func: cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor
   use_c10_dispatcher: full
@@ -860,18 +838,22 @@
     CUDA: cudnn_convolution_deprecated
 - func: cudnn_convolution(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: cudnn_convolution
 - func: cudnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: cudnn_convolution_backward_input
 - func: cudnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[2] output_mask) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CUDA: cudnn_convolution_backward
 - func: cudnn_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: cudnn_convolution_backward_weight
@@ -880,20 +862,24 @@
     CUDA: cudnn_convolution_transpose_deprecated
 - func: cudnn_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: cudnn_convolution_transpose
 # NB: output_padding not strictly needed here, but it's helpful for the float
 # backwards
 - func: cudnn_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[2] output_mask) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CUDA: cudnn_convolution_transpose_backward
 - func: cudnn_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: cudnn_convolution_transpose_backward_input
 - func: cudnn_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: cudnn_convolution_transpose_backward_weight
@@ -904,22 +890,20 @@
     CUDA: cudnn_grid_sampler_forward
 - func: cudnn_grid_sampler_backward(Tensor self, Tensor grid, Tensor grad_output) -> (Tensor grad_self, Tensor grad_grid)
+  use_c10_dispatcher: full
   dispatch:
     CUDA: cudnn_grid_sampler_backward
 - func: cummax(Tensor self, int dim) -> (Tensor values, Tensor indices)
-  supports_named_tensor: True
+  use_c10_dispatcher: full
   variants: function, method
 - func: cummax.out(Tensor self, int dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: cummax.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices)
-  supports_named_tensor: True
   variants: function, method
 - func: cummax.dimname_out(Tensor self, Dimname dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: _cummax_helper(Tensor self, Tensor(a!) values, Tensor(b!) indices, int dim) -> ()
   variants: function
@@ -928,18 +912,15 @@
     CUDA: cummax_helper_cuda
 - func: cummin(Tensor self, int dim) -> (Tensor values, Tensor indices)
-  supports_named_tensor: True
+  use_c10_dispatcher: full
   variants: function, method
 - func: cummin.out(Tensor self, int dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: cummin.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices)
-  supports_named_tensor: True
   variants: function, method
 - func: cummin.dimname_out(Tensor self, Dimname dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: _cummin_helper(Tensor self, Tensor(a!) values, Tensor(b!) indices, int dim) -> ()
   variants: function
@@ -948,45 +929,40 @@
     CUDA: cummin_helper_cuda
 - func: cumprod(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor
-  supports_named_tensor: True
   variants: function, method
 - func: cumprod.out(Tensor self, int dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: cumprod.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
-  supports_named_tensor: True
   variants: function, method
 - func: cumprod.dimname_out(Tensor self, Dimname dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: cumsum(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor
-  supports_named_tensor: True
   variants: function, method
 - func: cumsum.out(Tensor self, int dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: cumsum.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
-  supports_named_tensor: True
   variants: function, method
 - func: cumsum.dimname_out(Tensor self, Dimname dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: ctc_loss.IntList(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor
+  use_c10_dispatcher: full
 # convenience function that converts to intlists for you
 - func: ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor
   use_c10_dispatcher: full
 - func: _ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, bool zero_infinity=False) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CPU:  ctc_loss_cpu
     CUDA: ctc_loss_gpu
 - func: _ctc_loss_backward(Tensor grad, Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, Tensor neg_log_likelihood, Tensor log_alpha, int blank, bool zero_infinity=False) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CPU: ctc_loss_backward_cpu
     CUDA: ctc_loss_backward_gpu
@@ -1004,12 +980,11 @@
   variants: function, method
 - func: diagonal(Tensor(a) self, int offset=0, int dim1=0, int dim2=1) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: diagonal.Dimname(Tensor(a) self, *, Dimname outdim, Dimname dim1, Dimname dim2, int offset=0) -> Tensor(a)
   variants: function, method
-  supports_named_tensor: True
 - func: fill_diagonal_(Tensor(a!) self, Scalar fill_value, bool wrap=False) -> Tensor(a!)
   variants: method
@@ -1022,7 +997,6 @@
     CUDA: div
     SparseCPU: div_sparse
     SparseCUDA: div_sparse
-  supports_named_tensor: True
 - func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
@@ -1031,7 +1005,6 @@
     CUDA: div_
     SparseCPU: div_sparse_
     SparseCUDA: div_sparse_
-  supports_named_tensor: True
 - func: div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -1039,17 +1012,14 @@
     CUDA: div_out
     SparseCPU: div_out_sparse_zerodim
     SparseCUDA: div_out_sparse_zerodim
-  supports_named_tensor: True
 # For C++ only, until we have conversion from C++ numbers to Tensor
 - func: div.Scalar(Tensor self, Scalar other) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: dot(Tensor self, Tensor tensor) -> Tensor
   use_c10_dispatcher: full
@@ -1057,12 +1027,11 @@
   dispatch:
     CPU: legacy::cpu::_th_dot
     CUDA: legacy::cuda::_th_dot
-  supports_named_tensor: True
 - func: dot.out(Tensor self, Tensor tensor, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: einsum(str equation, Tensor[] tensors) -> Tensor
+  use_c10_dispatcher: full
 - func: embedding(Tensor weight, Tensor indices, int padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor
   use_c10_dispatcher: full
@@ -1115,6 +1084,8 @@
     CPU: _embedding_bag_per_sample_weights_backward_cpu
     CUDA: _embedding_bag_per_sample_weights_backward_cuda
+- func: empty_meta(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
 - func: empty.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
   device_guard: False
@@ -1125,6 +1096,7 @@
     MkldnnCPU: empty_mkldnn
     SparseCPU: empty_sparse
     SparseCUDA: empty_sparse
+    Vulkan: empty_vulkan
 - func: new_empty(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
   variants: method
@@ -1139,7 +1111,8 @@
 - func: _empty_affine_quantized(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, float scale=1, int zero_point=0, MemoryFormat? memory_format=contiguous_format) -> Tensor
   dispatch:
     CPU: empty_affine_quantized_other_backends_stub
-    QuantizedCPU: empty_affine_quantized_cpu
+    QuantizedCPU: empty_affine_quantized
+    QuantizedCUDA: empty_affine_quantized
 # it's a factory function receiving a tensor argument, thus overriding explicitly
 # other overrides are to provide a more helpful error message that dtype is required
@@ -1151,95 +1124,70 @@
 - func: resize_(Tensor(a!) self, int[] size, *, MemoryFormat? memory_format=None) -> Tensor(a!)
   manual_kernel_registration: True
-  supports_named_tensor: True
   variants: method
   device_guard: False
+- func: empty_quantized(int[] size, Tensor qtensor) -> Tensor
+  variants: function
+  dispatch:
+    QuantizedCPU: empty_quantized
+    QuantizedCUDA: empty_quantized
 - func: empty.out(int[] size, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)
   device_guard: False
 - func: empty_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
   device_guard: False
-  supports_named_tensor: True
 - func: empty_strided(int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
   dispatch:
     CPU: empty_strided_cpu
     CUDA: empty_strided_cuda
+    Vulkan: empty_strided_vulkan
 - func: erf(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: erf_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
-  dispatch:
-    CPU: _erf__cpu
-    CUDA: _erf__cuda
 - func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-  dispatch:
-    CPU: _erf_out_cpu
-    CUDA: _erf_out_cuda
 - func: erfc(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: erfc_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
-  dispatch:
-    CPU: _erfc__cpu
-    CUDA: _erfc__cuda
 - func: erfc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-  dispatch:
-    CPU: _erfc_out_cpu
-    CUDA: _erfc_out_cuda
 - func: exp(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: exp_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
-  dispatch:
-    CPU: _exp__cpu
-    CUDA: _exp__cuda
 - func: exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-  dispatch:
-    CPU: _exp_out_cpu
-    CUDA: _exp_out_cuda
 - func: expm1(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: expm1_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: expm1_out
     CUDA: expm1_out
 - func: expand(Tensor(a) self, int[] size, *, bool implicit=False) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: method  # This is method-only to match the previous tensor API. In the future we could make this a function too.
   device_guard: False
-  supports_named_tensor: True
 - func: expand_as(Tensor self, Tensor other) -> Tensor
   use_c10_dispatcher: full
@@ -1263,51 +1211,42 @@
 - func: flatten.using_ints(Tensor self, int start_dim=0, int end_dim=-1) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: flatten.named_out_dim(Tensor self, int start_dim, int end_dim, Dimname out_dim) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: flatten.using_names(Tensor self, Dimname start_dim, Dimname end_dim, Dimname out_dim) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: flatten.DimnameList(Tensor self, Dimname[] dims, Dimname out_dim) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: floor(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: floor_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: floor_out
     CUDA: floor_out
 - func: floor_divide(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
   dispatch:
     CPU: floor_divide
     CUDA: floor_divide
     SparseCPU: floor_divide_sparse
     SparseCUDA: floor_divide_sparse
-  supports_named_tensor: True
 - func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
@@ -1316,7 +1255,6 @@
     CUDA: floor_divide_
     SparseCPU: floor_divide_sparse_
     SparseCUDA: floor_divide_sparse_
-  supports_named_tensor: True
 - func: floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -1324,27 +1262,22 @@
     CUDA: floor_divide_out
     SparseCPU: floor_divide_out_sparse_zerodim
     SparseCUDA: floor_divide_out_sparse_zerodim
-  supports_named_tensor: True
 - func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: frac(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: frac_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: frac.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: full.names(int[] size, Scalar fill_value, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
   device_guard: False
@@ -1354,7 +1287,6 @@
 - func: full.out(int[] size, Scalar fill_value, *, Tensor(a!) out) -> Tensor(a!)
 - func: full_like(Tensor self, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
-  supports_named_tensor: True
 - func: from_file(str filename, bool? shared=None, int? size=0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
   dispatch:
@@ -1381,6 +1313,7 @@
     CUDA: grid_sampler_2d_cuda
 - func: grid_sampler_2d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CPU: grid_sampler_2d_backward_cpu
     CUDA: grid_sampler_2d_backward_cuda
@@ -1392,6 +1325,7 @@
     CUDA: grid_sampler_3d_cuda
 - func: grid_sampler_3d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CPU: grid_sampler_3d_backward_cpu
     CUDA: grid_sampler_3d_backward_cuda
@@ -1419,6 +1353,16 @@
 - func: group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enabled=True) -> Tensor
+- func: native_group_norm(Tensor input, Tensor? weight, Tensor? bias, int N, int C, int HxW, int group, float eps) -> (Tensor, Tensor, Tensor)
+  dispatch:
+    CPU: native_group_norm
+    CUDA: native_group_norm
+- func: native_group_norm_backward(Tensor grad_out, Tensor input, Tensor mean, Tensor rstd, Tensor? weight, int N, int C, int HxW, int group, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+  dispatch:
+    CPU: native_group_norm_backward
+    CUDA: native_group_norm_backward
 # FFT
 - func: fft(Tensor self, int signal_ndim, bool normalized=False) -> Tensor
@@ -1434,9 +1378,11 @@
   variants: function, method
 - func: irfft(Tensor self, int signal_ndim, bool normalized=False, bool onesided=True, int[] signal_sizes=[]) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
 - func: _fft_with_size(Tensor self, int signal_ndim, bool complex_input, bool complex_output, bool inverse, int[] checked_signal_sizes, bool normalized, bool onesided, int[] output_sizes) -> Tensor
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: _fft_mkl
@@ -1449,10 +1395,10 @@
   use_c10_dispatcher: full
 - func: _cufft_set_plan_cache_max_size(int device_index, int max_size) -> ()
-  use_c10_dispatcher: unboxed_only
+  use_c10_dispatcher: full
 - func: _cufft_clear_plan_cache(int device_index) -> ()
-  use_c10_dispatcher: unboxed_only
+  use_c10_dispatcher: full
 - func: index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
   variants: function, method
@@ -1510,9 +1456,8 @@
 - func: isnan(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  variants: function
+  variants: function, method
   device_guard: False
-  supports_named_tensor: True
   dispatch:
     CPU: isnan
     CUDA: isnan
@@ -1528,57 +1473,49 @@
   use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: is_complex(Tensor self) -> bool
   use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: is_nonzero(Tensor self) -> bool
   use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: is_same_size(Tensor self, Tensor other) -> bool
   use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: is_signed(Tensor self) -> bool
   use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
-- func: kl_div(Tensor self, Tensor target, int reduction=Mean) -> Tensor
+- func: kl_div(Tensor self, Tensor target, int reduction=Mean, *, bool log_target=False) -> Tensor
   use_c10_dispatcher: full
-- func: kl_div_backward(Tensor grad_output, Tensor self, Tensor target, int reduction=Mean) -> Tensor
+- func: kl_div_backward(Tensor grad_output, Tensor self, Tensor target, int reduction=Mean, *, bool log_target=False) -> Tensor
   use_c10_dispatcher: full
   dispatch:
     CPU: kl_div_backward_cpu
     CUDA: kl_div_backward_cuda
 - func: kthvalue(Tensor self, int k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
-  supports_named_tensor: True
+  use_c10_dispatcher: full
   variants: function, method
 - func: kthvalue.values(Tensor self, int k, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
   dispatch:
     CPU: kthvalue_out_cpu
     CUDA: kthvalue_out_cuda
 - func: kthvalue.dimname(Tensor self, int k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
-  supports_named_tensor: True
   variants: function, method
 - func: kthvalue.dimname_out(Tensor self, int k, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: layer_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor
@@ -1601,16 +1538,19 @@
     MkldnnCPU: mkldnn_linear
 - func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
+  use_c10_dispatcher: full
 - func: fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
   use_c10_dispatcher: full
 - func: fbgemm_linear_quantize_weight(Tensor input) -> (Tensor, Tensor, float, int)
+  use_c10_dispatcher: full
 - func: fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor
   use_c10_dispatcher: full
 - func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
+  use_c10_dispatcher: full
 - func: fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
   use_c10_dispatcher: full
@@ -1630,41 +1570,33 @@
 - func: log(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: log_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: log.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: log_out
     CUDA: log_out
 - func: log10(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: log10_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: log10.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: log10_out
     CUDA: log10_out
 - func: log1p(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: log1p_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
   dispatch:
     CPU: log1p_
@@ -1673,7 +1605,6 @@
     SparseCUDA: log1p_sparse_
 - func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: log1p_out
     CUDA: log1p_out
@@ -1682,19 +1613,28 @@
 - func: log2(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: log2_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: log2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: log2_out
     CUDA: log2_out
+- func: logaddexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+- func: logaddexp(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
+  variants: method, function
+- func: logaddexp2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+- func: logaddexp2(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
+  variants: method, function
 - func: logdet(Tensor self) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
@@ -1709,11 +1649,9 @@
 # log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
 - func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: _log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
   use_c10_dispatcher: full
@@ -1727,19 +1665,37 @@
     CPU: log_softmax_backward_cpu
     CUDA: log_softmax_backward_cuda
+- func: _logcumsumexp(Tensor self, int dim) -> Tensor
+  use_c10_dispatcher: full
+  dispatch:
+    CPU: _logcumsumexp_cpu
+    CUDA: _logcumsumexp_cuda
+- func: _logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
+  dispatch:
+    CPU: _logcumsumexp_out_cpu
+    CUDA: _logcumsumexp_out_cuda
+- func: logcumsumexp(Tensor self, int dim) -> Tensor
+  variants: function, method
+- func: logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
+- func: logcumsumexp.dimname(Tensor self, Dimname dim) -> Tensor
+  variants: function, method
+- func: logcumsumexp.dimname_out(Tensor self, Dimname dim, *, Tensor(a!) out) -> Tensor(a!)
 - func: logsumexp(Tensor self, int[1] dim, bool keepdim=False) -> Tensor
-  supports_named_tensor: True
+  use_c10_dispatcher: full
   variants: function, method
 - func: logsumexp.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: logsumexp.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor
-  supports_named_tensor: True
   variants: function, method
 - func: logsumexp.names_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: margin_ranking_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor
   use_c10_dispatcher: full
@@ -1747,10 +1703,8 @@
 - func: matmul(Tensor self, Tensor other) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: matrix_rank.tol(Tensor self, float tol, bool symmetric=False) -> Tensor
   use_c10_dispatcher: full
@@ -1763,53 +1717,52 @@
   variants: function, method
 - func: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
+  use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: max.dim_max(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: max_values(Tensor self, int[1] dim, bool keepdim=False) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
 - func: max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
   variants: function, method
-  supports_named_tensor: True
 - func: max.names_dim_max(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: max_values.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor
   variants: function, method
 # Return: (Tensor output, Tensor indices)
 - func: max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
-  supports_named_tensor: True
+  use_c10_dispatcher: full
 - func: max_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> Tensor
-  supports_named_tensor: True
+  use_c10_dispatcher: full
 - func: max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
-  supports_named_tensor: True
+  use_c10_dispatcher: full
 - func: mkldnn_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
+  use_c10_dispatcher: full
   requires_tensor: True
   dispatch:
     MkldnnCPU: mkldnn_max_pool2d
 - func: quantized_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
+  use_c10_dispatcher: full
   requires_tensor: True
   dispatch:
     QuantizedCPU: quantized_max_pool2d
 - func: max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
-  supports_named_tensor: True
+  use_c10_dispatcher: full
 # The CPU and GPU dispatch variants are named weirdly here because otherwise there
 # are namespacing issues in C++
 - func: mean(Tensor self, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
   dispatch:
     CPU: mean_cpu_gpu
     CUDA: mean_cpu_gpu
@@ -1817,14 +1770,13 @@
 - func: mean.dim(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
   dispatch:
     CPU: mean_cpu_gpu
     CUDA: mean_cpu_gpu
     QuantizedCPU: quantized_mean_cpu
+    Vulkan: mean_vulkan
 - func: mean.out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: mean_out_cpu_gpu
     CUDA: mean_out_cpu_gpu
@@ -1832,41 +1784,34 @@
 - func: mean.names_dim(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: mean.names_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: median.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
-  supports_named_tensor: True
+  use_c10_dispatcher: full
   variants: function, method
 - func: median.dim_values(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: median.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
-  supports_named_tensor: True
   variants: function, method
 - func: median.names_dim_values(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
+  use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: min.dim_min(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: min_values(Tensor self, int[1] dim, bool keepdim=False) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
 - func: min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
   variants: function, method
-  supports_named_tensor: True
 - func: min.names_dim_min(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: min_values.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor
   variants: function, method
@@ -1874,10 +1819,13 @@
 - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
 - func: mkldnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> Tensor
+  use_c10_dispatcher: full
 - func: mkldnn_convolution_backward_weights(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: mkldnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor)
   dispatch:
@@ -1892,10 +1840,12 @@
     CUDA: miopen_convolution
 - func: miopen_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: miopen_convolution_backward_input
 - func: miopen_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CUDA: miopen_convolution_backward
@@ -1905,6 +1855,7 @@
     CUDA: miopen_convolution_backward_bias
 - func: miopen_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: miopen_convolution_backward_weight
@@ -1915,14 +1866,17 @@
 # NB: output_padding not strictly needed here, but it's helpful for the float
 # backwards
 - func: miopen_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CUDA: miopen_convolution_transpose_backward
 - func: miopen_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: miopen_convolution_transpose_backward_input
 - func: miopen_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: miopen_convolution_transpose_backward_weight
@@ -1931,14 +1885,17 @@
     CUDA: miopen_depthwise_convolution
 - func: miopen_depthwise_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: miopen_depthwise_convolution_backward_input
 - func: miopen_depthwise_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CUDA: miopen_depthwise_convolution_backward
 - func: miopen_depthwise_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CUDA: miopen_depthwise_convolution_backward_weight
@@ -1955,35 +1912,30 @@
   variants: function, method
   dispatch:
     CPU: mm_cpu
-    CUDA: legacy::cuda::_th_mm
+    CUDA: mm_cuda
     SparseCPU: _sparse_mm
     SparseCUDA: _sparse_mm
-  supports_named_tensor: True
 - func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU: mm_cpu_out
-    CUDA: legacy::cuda::_th_mm_out
+    CUDA: mm_out_cuda
     SparseCPU: _sparse_mm_out
     SparseCUDA: _sparse_mm_out
-  supports_named_tensor: True
 - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
   use_c10_dispatcher: full
 - func: mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
-  supports_named_tensor: True
+  use_c10_dispatcher: full
   variants: function, method
 - func: mode.values(Tensor self, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: mode.dimname(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
   variants: function, method
-  supports_named_tensor: True
 - func: mode.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
-  supports_named_tensor: True
 - func: mul.Tensor(Tensor self, Tensor other) -> Tensor
   use_c10_dispatcher: full
@@ -1994,7 +1946,6 @@
     SparseCPU: mul_sparse
     SparseCUDA: mul_sparse
     MkldnnCPU: mkldnn_mul
-  supports_named_tensor: True
 - func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
@@ -2004,7 +1955,6 @@
     SparseCPU: mul_sparse_
     SparseCUDA: mul_sparse_
     MkldnnCPU: mkldnn_mul_
-  supports_named_tensor: True
 - func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -2013,7 +1963,6 @@
     SparseCPU: mul_out_sparse_cpu
     SparseCUDA: mul_out_sparse_cuda
     MkldnnCPU: mkldnn_mul_out
-  supports_named_tensor: True
   # For C++ only, until we have conversion from C++ numbers to Tensor
 - func: mul.Scalar(Tensor self, Scalar other) -> Tensor
@@ -2027,15 +1976,12 @@
   use_c10_dispatcher: full
   variants: function, method
   dispatch:
-    CPU: mv_cpu
-    CUDA: legacy::cuda::_th_mv
-  supports_named_tensor: True
+    CPU: mv
+    CUDA: mv
+    SparseCPU: mv_sparse
+    SparseCUDA: mv_sparse
-- func: mv.out(Tensor self, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)
-  dispatch:
-    CPU: mv_cpu_out
-    CUDA: legacy::cuda::_th_mv_out
-  supports_named_tensor: True
+- func: mv.out(Tensor self, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)
 - func: mvlgamma(Tensor self, int p) -> Tensor
   use_c10_dispatcher: full
@@ -2054,14 +2000,14 @@
     SparseCUDA: narrow_copy_sparse
 - func: narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: narrow.Tensor(Tensor(a) self, int dim, Tensor start, int length) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: native_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)
   dispatch:
@@ -2074,6 +2020,7 @@
     CUDA: batch_norm_cuda_out
 - func: batch_norm_stats(Tensor input, float eps) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CUDA: batch_norm_stats_cuda
@@ -2090,7 +2037,7 @@
   dispatch:
     CUDA: batch_norm_gather_stats_cuda
-- func: batch_norm_gather_stats_with_counts(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, int[] counts) -> (Tensor, Tensor)
+- func: batch_norm_gather_stats_with_counts(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, Tensor counts) -> (Tensor, Tensor)
   dispatch:
     CUDA: batch_norm_gather_stats_with_counts_cuda
@@ -2112,6 +2059,9 @@
     CPU: batch_norm_update_stats_cpu
     CUDA: batch_norm_update_stats_cuda
+- func: is_vulkan_available() -> bool
+  use_c10_dispatcher: full
 - func: _nnpack_available() -> bool
   use_c10_dispatcher: full
@@ -2119,12 +2069,15 @@
   variants: function
 - func: _nnpack_spatial_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[2] padding, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
 - func: _nnpack_spatial_convolution_backward_input(Tensor input, Tensor grad_output, Tensor weight, int[2] padding) -> Tensor
+  use_c10_dispatcher: full
   variants: function
 - func: _nnpack_spatial_convolution_backward_weight(Tensor input, int[] weightsize, Tensor grad_output, int[2] padding) -> Tensor
+  use_c10_dispatcher: full
   variants: function
 - func: ones.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -2135,17 +2088,18 @@
 - func: ones.out(int[] size, *, Tensor(a!) out) -> Tensor(a!)
 - func: ones_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
-  supports_named_tensor: True
 - func: pairwise_distance(Tensor x1, Tensor x2, float p=2, float eps=1e-06, bool keepdim=False) -> Tensor
   use_c10_dispatcher: full
 - func: cdist(Tensor x1, Tensor x2, float p=2, int? compute_mode=None) -> Tensor
-  supports_named_tensor: True
+  use_c10_dispatcher: full
+- func: _euclidean_dist(Tensor x1, Tensor x2) -> Tensor
+  use_c10_dispatcher: full
 - func: _cdist_forward(Tensor x1, Tensor x2, float p, int? compute_mode) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
 - func: _cdist_backward(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist) -> Tensor
   use_c10_dispatcher: full
@@ -2164,6 +2118,7 @@
   variants: function
 - func: permute(Tensor(a) self, int[] dims) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: method  # This is method-only to match the previous tensor API. In the future we could make this a function too.
 # Only exposed from C++ -- in Python,
@@ -2174,15 +2129,21 @@
 # behavior on Windows, for reasons I don't understand
 # (maybe related to capital letter collation somehow...)
 - func: numpy_T(Tensor(a) self) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: method
 - func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
   use_c10_dispatcher: full
+- func: channel_shuffle(Tensor self, int groups) -> Tensor
+  use_c10_dispatcher: full
+  dispatch:
+    CPU: channel_shuffle
+    QuantizedCPU: quantized_channel_shuffle
 - func: is_pinned(Tensor self) -> bool
   use_c10_dispatcher: full
   variants: method
-  supports_named_tensor: True
 - func: pin_memory(Tensor self) -> Tensor
   use_c10_dispatcher: full
@@ -2196,6 +2157,30 @@
   use_c10_dispatcher: full
   variants: function
+- func: rad2deg(Tensor self) -> Tensor
+  use_c10_dispatcher: full
+  variants: function, method
+  supports_named_tensor: True
+- func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
+  variants: function, method
+  supports_named_tensor: True
+- func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+  supports_named_tensor: True
+- func: deg2rad(Tensor self) -> Tensor
+  use_c10_dispatcher: full
+  variants: function, method
+  supports_named_tensor: True
+- func: deg2rad_(Tensor(a!) self) -> Tensor(a!)
+  variants: function, method
+  supports_named_tensor: True
+- func: deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+  supports_named_tensor: True
 - func: scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
 - func: rand.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -2213,7 +2198,6 @@
 - func: rand.generator_out(int[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)
 - func: rand_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
-  supports_named_tensor: True
 - func: randint(int high, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -2250,7 +2234,6 @@
 - func: randn.generator_out(int[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)
 - func: randn_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
-  supports_named_tensor: True
 - func: randperm(int n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -2274,32 +2257,27 @@
 - func: reciprocal(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: reciprocal_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: reciprocal.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: neg(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: neg_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: neg_out
     CUDA: neg_out
 - func: repeat(Tensor self, int[] repeats) -> Tensor
+  use_c10_dispatcher: full
   variants: method  # This is method-only to match the previous tensor API. In the future we could make this a function too.
 - func: repeat_interleave.Tensor(Tensor repeats) -> Tensor
@@ -2318,11 +2296,12 @@
   variants: function, method
 - func: reshape(Tensor self, int[] shape) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
+  use_c10_dispatcher: full
   device_guard: False
   requires_tensor: True
   dispatch:
@@ -2335,15 +2314,12 @@
 - func: round(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: round_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: round_out
     CUDA: round_out
@@ -2360,10 +2336,8 @@
     CUDA: relu
     MkldnnCPU: mkldnn_relu
     QuantizedCPU: quantized_relu
-  supports_named_tensor: True
 - func: relu_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
   dispatch:
     CPU: relu_
@@ -2379,6 +2353,7 @@
     CUDA: prelu_cuda
 - func: prelu_backward(Tensor grad_output, Tensor self, Tensor weight) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function, method
   dispatch:
     CPU: prelu_backward_cpu
@@ -2408,15 +2383,12 @@
 - func: rsqrt(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: rsqrt_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: rsqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: rsqrt_out
     CUDA: rsqrt_out
@@ -2424,12 +2396,11 @@
 - func: select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: select.int(Tensor(a) self, int dim, int index) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: selu(Tensor self) -> Tensor
   use_c10_dispatcher: full
@@ -2441,10 +2412,8 @@
 - func: celu_(Tensor(a!) self, Scalar alpha=1.0) -> Tensor(a!)
 - func: sigmoid(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
   dispatch:
     CPU: sigmoid
@@ -2453,7 +2422,6 @@
     MkldnnCPU: mkldnn_sigmoid
 - func: sigmoid_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
   dispatch:
     CPU: sigmoid_
@@ -2461,34 +2429,27 @@
     MkldnnCPU: mkldnn_sigmoid_
 - func: sigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: sin(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: sin_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: sin_out
     CUDA: sin_out
 - func: sinh(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: sinh_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 # Returns a copy of this `Variable` that is detached from its autograd graph.
 # This method is OK to call if the `Variable` is a view.
@@ -2504,7 +2465,6 @@
 - func: detach(Tensor self) -> Tensor
   use_c10_dispatcher: full
   manual_kernel_registration: True
-  supports_named_tensor: True
   variants: function, method
 # Like `detach()`, but modifies this `Variable` in-place. This method may
@@ -2512,26 +2472,24 @@
 # this. If this `Variable` is a view, throws an `std::runtime_error()`.
 - func: detach_(Tensor(a!) self) -> Tensor(a!)
   manual_kernel_registration: True
-  supports_named_tensor: True
   variants: function, method
 - func: size.int(Tensor self, int dim) -> int
   use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: size.Dimname(Tensor self, Dimname dim) -> int
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: slice.Tensor(Tensor(a) self, int dim=0, int start=0, int end=9223372036854775807, int step=1) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)
+  use_c10_dispatcher: full
   variants: function, method
 - func: smm(Tensor self, Tensor mat2) -> Tensor
@@ -2541,11 +2499,9 @@
 # softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
 - func: softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: _softmax(Tensor self, int dim, bool half_to_float) -> Tensor
   use_c10_dispatcher: full
@@ -2561,27 +2517,26 @@
     CUDA: softmax_backward_cuda
 - func: split.Tensor(Tensor(a) self, int split_size, int dim=0) -> Tensor(a)[]
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: split_with_sizes(Tensor self, int[] split_sizes, int dim=0) -> Tensor[]
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: squeeze(Tensor(a) self) -> Tensor(a)
-  supports_named_tensor: True
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
 - func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
-  supports_named_tensor: True
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
 - func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
-  supports_named_tensor: True
   variants: function, method
   device_guard: False
@@ -2609,6 +2564,7 @@
     SparseCUDA: _sspaddmm_out_cuda
 - func: stack(Tensor[] tensors, int dim=0) -> Tensor
+  use_c10_dispatcher: full
 - func: stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
@@ -2619,114 +2575,95 @@
 - func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool onesided=True) -> Tensor
   variants: function, method
+- func: istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool onesided=True, int? length=None) -> Tensor
+  variants: function, method
 - func: stride.int(Tensor self, int dim) -> int
   use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: stride.Dimname(Tensor self, Dimname dim) -> int
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: sum(Tensor self, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: sum.dim_IntList(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: sum.IntList_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: sum.DimnameList_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: sum_to_size(Tensor self, int[] size) -> Tensor
+  use_c10_dispatcher: full
   variants: method
   device_guard: False
 - func: sqrt(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: sqrt_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: square(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: square_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: std(Tensor self, bool unbiased=True) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: std.dim(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: std_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
-  supports_named_tensor: True
 - func: std_mean.dim(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
-  supports_named_tensor: True
 - func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
   variants: function
-  supports_named_tensor: True
 - func: std.out(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: std.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: std.names_out(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: prod(Tensor self, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: prod.int_out(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: prod.dim_Dimname(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: prod.Dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: t(Tensor(a) self) -> Tensor(a)
+  use_c10_dispatcher: full
   device_guard: False
   variants: function, method
-  supports_named_tensor: True
 - func: t_(Tensor(a!) self) -> Tensor(a!)
   device_guard: False
@@ -2734,25 +2671,15 @@
 - func: tan(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: tan_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
-  dispatch:
-    CPU: _tan__cpu
-    CUDA: _tan__cuda
 - func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-  dispatch:
-    CPU: _tan_out_cpu
-    CUDA: _tan_out_cuda
 - func: tanh(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
   dispatch:
     CPU: tanh
@@ -2760,39 +2687,30 @@
     QuantizedCPU: quantized_tanh
 - func: tanh_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
-  dispatch:
-    CPU: _tanh__cpu
-    CUDA: _tanh__cuda
 - func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
-  dispatch:
-    CPU: _tanh_out_cpu
-    CUDA: _tanh_out_cuda
 - func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor
+  use_c10_dispatcher: full
   variants: function
 # TODO: namespace threshold in 'nn'
 - func: threshold(Tensor self, Scalar threshold, Scalar value) -> Tensor
   use_c10_dispatcher: full
   variants: function
-  supports_named_tensor: True
   dispatch:
     CPU: threshold
     CUDA: threshold_cuda
+    QuantizedCPU: quantized_threshold
 - func: threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!)
   variants: function
-  supports_named_tensor: True
   dispatch:
     CPU: threshold_
     CUDA: threshold__cuda
 - func: threshold.out(Tensor self, Scalar threshold, Scalar value, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: threshold_out
     CUDA: threshold_out_cuda
@@ -2805,14 +2723,13 @@
     CUDA: threshold_backward_cuda
 - func: transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: transpose.Dimname(Tensor(a) self, Dimname dim0, Dimname dim1) -> Tensor(a)
   variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: _mkldnn_transpose(Tensor self, int dim0, int dim1) -> Tensor
   use_c10_dispatcher: full
@@ -2837,12 +2754,22 @@
   variants: function
 - func: flip(Tensor self, int[] dims) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
   dispatch:
     CPU: flip_cpu
     CUDA: flip_cuda
+- func: fliplr(Tensor self) -> Tensor
+  use_c10_dispatcher: full
+  variants: function, method
+- func: flipud(Tensor self) -> Tensor
+  use_c10_dispatcher: full
+  variants: function, method
 - func: roll(Tensor self, int[1] shifts, int[1] dims=[]) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
   dispatch:
     CPU: roll_cpu
@@ -2851,6 +2778,7 @@
 # default int[] value [0,1] should not add space after comma, since native_parse.py uses ', ' to split args
 - func: rot90(Tensor self, int k=1, int[] dims=[0,1]) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
 - func: trapz.x(Tensor y, Tensor x, *, int dim=-1) -> Tensor
@@ -2860,6 +2788,7 @@
   use_c10_dispatcher: full
 - func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
+  use_c10_dispatcher: full
 - func: triplet_margin_loss(Tensor anchor, Tensor positive, Tensor negative, float margin=1.0, float p=2, float eps=1e-06, bool swap=False, int reduction=Mean) -> Tensor
   use_c10_dispatcher: full
@@ -2872,7 +2801,6 @@
     CUDA: true_divide
     SparseCPU: true_divide_sparse
     SparseCUDA: true_divide_sparse
-  supports_named_tensor: True
 - func: true_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
@@ -2881,7 +2809,6 @@
     CUDA: true_divide_
     SparseCPU: true_divide_sparse_
     SparseCUDA: true_divide_sparse_
-  supports_named_tensor: True
 - func: true_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -2889,28 +2816,22 @@
     CUDA: true_divide_out
     SparseCPU: true_divide_out_sparse_zerodim
     SparseCUDA: true_divide_out_sparse_zerodim
-  supports_named_tensor: True
 - func: true_divide.Scalar(Tensor self, Scalar other) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: true_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: trunc(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: trunc_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: function, method
 - func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: trunc_out
     CUDA: trunc_out
@@ -2924,24 +2845,28 @@
   variants: function
 - func: _unique(Tensor self, bool sorted=True, bool return_inverse=False) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: _unique_cpu
     CUDA: _unique_cuda
 - func: unique_dim(Tensor self, int dim, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: unique_dim_cpu
     CUDA: unique_dim_cuda
 - func: unique_consecutive(Tensor self, bool return_inverse=False, bool return_counts=False, int? dim=None) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: unique_consecutive_cpu
     CUDA: unique_consecutive_cuda
 - func: unique_dim_consecutive(Tensor self, int dim, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: unique_dim_consecutive_cpu
@@ -2952,14 +2877,17 @@
 # Please don't rely on these two operators, they will be removed soon
 - func: _unique2(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: _unique2_cpu
     CUDA: _unique2_cuda
 - func: _unsafe_view(Tensor self, int[] size) -> Tensor
+  use_c10_dispatcher: full
 - func: unsqueeze(Tensor(a) self, int dim) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: function, method
   device_guard: False
@@ -2967,36 +2895,34 @@
   variants: method
   device_guard: False
+- func: vander(Tensor x, int? N=None, bool increasing=False) -> Tensor
+  use_c10_dispatcher: full
 - func: var(Tensor self, bool unbiased=True) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: var.dim(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: var.out(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: var.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: var.names_out(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: var_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
-  supports_named_tensor: True
 - func: var_mean.dim(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
-  supports_named_tensor: True
 - func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
   variants: function
-  supports_named_tensor: True
 - func: view_as(Tensor self, Tensor other) -> Tensor
   use_c10_dispatcher: full
@@ -3011,6 +2937,7 @@
   variants: function, method
 - func: where(Tensor condition) -> Tensor[]
+  use_c10_dispatcher: full
   variants: function
 - func: _s_where(Tensor condition, Tensor self, Tensor other) -> Tensor
@@ -3018,6 +2945,7 @@
   variants: function
 - func: norm_except_dim(Tensor v, int pow=2, int dim=0) -> Tensor
+  use_c10_dispatcher: full
   variants: function
 # VariableType::_weight_norm does not want to be given a gap in the autograd graph,
@@ -3027,16 +2955,19 @@
   variants: function
 - func: _weight_norm_cuda_interface(Tensor v, Tensor g, int dim=0) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CUDA: weight_norm_cuda
 - func: _weight_norm_cuda_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CUDA: weight_norm_cuda_backward
 - func: _weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
 - func: zeros.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -3047,7 +2978,6 @@
 - func: zeros.out(int[] size, *, Tensor(a!) out) -> Tensor(a!)
 - func: zeros_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
-  supports_named_tensor: True
 - func: _standard_gamma_grad(Tensor self, Tensor output) -> Tensor
   use_c10_dispatcher: full
@@ -3079,6 +3009,11 @@
     CPU: _s_poisson_cpu
     CUDA: _s_poisson_cuda
+- func: binomial(Tensor count, Tensor prob, Generator? generator=None) -> Tensor
+  dispatch:
+    CPU: _s_binomial_cpu
+    CUDA: _s_binomial_cuda
 # When more variants get ported to native, this dispatch will get more
 # complicated
@@ -3095,14 +3030,46 @@
 - func: _sparse_sum.dtype(Tensor self, *, ScalarType dtype) -> Tensor
 - func: _sparse_sum.dim(Tensor self, int[1] dim) -> Tensor
+  use_c10_dispatcher: full
 - func: _sparse_sum.dim_dtype(Tensor self, int[1] dim, *, ScalarType dtype) -> Tensor
 - func: _sparse_sum_backward(Tensor grad, Tensor self, int[] dim) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
       SparseCPU: _sparse_sum_backward_cpu
       SparseCUDA: _sparse_sum_backward_cuda
+- func: _sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
+  variants: function
+- func: _sparse_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
+  variants: function
+- func: _sparse_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
+  use_c10_dispatcher: full
+  dispatch:
+    SparseCPU: softmax_sparse_cpu
+- func: _sparse_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
+  dispatch:
+    SparseCPU: softmax_backward_sparse_cpu
+- func: _sparse_log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
+  variants: function
+- func: _sparse_log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
+  variants: function
+- func: _sparse_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
+  use_c10_dispatcher: full
+  dispatch:
+    SparseCPU: log_softmax_sparse_cpu
+- func: _sparse_log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
+  dispatch:
+    SparseCPU: log_softmax_backward_sparse_cpu
 - func: norm.ScalarOpt_dtype(Tensor self, Scalar? p, *, ScalarType dtype) -> Tensor
   variants: function, method
@@ -3114,6 +3081,7 @@
   variants: function, method
 - func: norm.ScalarOpt_dim(Tensor self, Scalar? p, int[1] dim, bool keepdim=False) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
 - func: norm.dtype_out(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)
@@ -3135,6 +3103,7 @@
   variants: function
 - func: frobenius_norm.dim(Tensor self, int[1] dim, bool keepdim=False) -> Tensor
+  use_c10_dispatcher: full
   variants: function
 - func: frobenius_norm.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
@@ -3148,6 +3117,7 @@
   variants: function
 - func: nuclear_norm.dim(Tensor self, int[2] dim, bool keepdim=False) -> Tensor
+  use_c10_dispatcher: full
   variants: function
 - func: nuclear_norm.dim_out(Tensor self, int[2] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
@@ -3162,15 +3132,13 @@
     SparseCUDA: clone_sparse
     MkldnnCPU: mkldnn_clone
     QuantizedCPU: quantized_clone
-  supports_named_tensor: True
+    QuantizedCUDA: quantized_clone
 - func: resize_as_(Tensor(a!) self, Tensor the_template, *, MemoryFormat? memory_format=None) -> Tensor(a!)
   manual_kernel_registration: True
-  supports_named_tensor: True
   variants: function, method
 - func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: pow_out
     CUDA: pow_out
@@ -3180,7 +3148,6 @@
 - func: pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
   dispatch:
     CPU: pow
     CUDA: pow
@@ -3188,7 +3155,6 @@
     SparseCUDA: pow_sparse_scalar
 - func: zero_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: method, function
   dispatch:
     CPU: zero_
@@ -3203,7 +3169,6 @@
     CUDA: sub_out
     SparseCPU: sub_out_sparse
     SparseCUDA: sub_out_sparse
-  supports_named_tensor: True
 - func: sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
@@ -3213,7 +3178,6 @@
     CUDA: sub
     SparseCPU: sub_sparse
     SparseCUDA: sub_sparse
-  supports_named_tensor: True
 - func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
   variants: method
@@ -3222,64 +3186,55 @@
     CUDA: sub_
     SparseCPU: sub_sparse_
     SparseCUDA: sub_sparse_
-  supports_named_tensor: True
 # For C++ only, until we have conversion from C++ numbers to Tensor
 - func: sub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: sub_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: rsub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
   variants: function
-  supports_named_tensor: True
 # For C++ only, until we have conversion from C++ numbers to Tensor
 - func: rsub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
   variants: function
-  supports_named_tensor: True
 # Functionally the same as addmm, but we give it a different derivative formula
 # that doesn't propagate gradients to non-present entries on sparse.
 - func: _sparse_addmm(Tensor self, Tensor sparse, Tensor dense, *, Scalar beta=1, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
-  named_guard: False
 - func: addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    CPU: legacy::cpu::_th_addmm_out
-    CUDA: legacy::cuda::_th_addmm_out
+    CPU: addmm_cpu_out
+    CUDA: addmm_out_cuda
     SparseCPU: addmm_out_sparse_dense_cpu
     SparseCUDA: addmm_out_sparse_dense_cuda
-  supports_named_tensor: True
 - func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
   dispatch:
-    CPU: legacy::cpu::_th_addmm
-    CUDA: legacy::cuda::_th_addmm
+    CPU: addmm_cpu
+    CUDA: addmm_cuda
     SparseCPU: addmm_sparse_dense_cpu
     SparseCUDA: addmm_sparse_dense_cuda
-  supports_named_tensor: True
+    Vulkan: vulkan_addmm
 - func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
   variants: method
   dispatch:
     CPU: legacy::cpu::_th_addmm_
-    CUDA: legacy::cuda::_th_addmm_
+    CUDA: addmm__cuda
     # Warning!  For whatever reason, the inplace sparse addmm is NON
     # broadcasting
     SparseCPU: s_addmm_sparse_dense_cpu_
     SparseCUDA: s_addmm_sparse_dense_cuda_
-  supports_named_tensor: True
 # NOTE [ Sparse: autograd and API ]
 #
@@ -3396,7 +3351,6 @@
 # shared. In other words, their outputs are non-differentiable views of the
 # sparse tensor.
 # FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
 # the default would never make sense.
 - func: sparse_coo_tensor.size(int[] size, *, ScalarType dtype, Layout layout, Device device, bool pin_memory=False) -> Tensor
@@ -3433,7 +3387,6 @@
     SparseCUDA: sparse_resize_and_clear_
   requires_tensor: True
 - func: sparse_mask(Tensor self, Tensor mask) -> Tensor
   use_c10_dispatcher: full
   variants: method
@@ -3442,7 +3395,6 @@
     SparseCUDA: sparse_mask_cuda
   requires_tensor: True
 - func: to_dense(Tensor self) -> Tensor
   use_c10_dispatcher: full
   variants: method
@@ -3474,7 +3426,6 @@
   requires_tensor: True
   device_guard: False
 - func: dense_dim(Tensor self) -> int
   use_c10_dispatcher: full
   variants: method
@@ -3494,7 +3445,6 @@
   requires_tensor: True
   device_guard: False
 - func: _nnz(Tensor self) -> int
   use_c10_dispatcher: full
   variants: method
@@ -3504,7 +3454,6 @@
   requires_tensor: True
   device_guard: False
 - func: coalesce(Tensor self) -> Tensor
   use_c10_dispatcher: full
   variants: method
@@ -3513,7 +3462,6 @@
     SparseCUDA: coalesce_sparse_cuda
   requires_tensor: True
 - func: is_coalesced(Tensor self) -> bool
   use_c10_dispatcher: full
   variants: method
@@ -3522,10 +3470,9 @@
     SparseCUDA: is_coalesced_sparse
   requires_tensor: True
   device_guard: False
-  supports_named_tensor: True
 - func: _indices(Tensor(a) self) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: method
   dispatch:
     SparseCPU: _indices_sparse
@@ -3534,6 +3481,7 @@
   device_guard: False
 - func: _values(Tensor(a) self) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: method
   dispatch:
     SparseCPU: _values_sparse
@@ -3553,6 +3501,7 @@
   device_guard: False
 - func: indices(Tensor(a) self) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: method
   dispatch:
     SparseCPU: indices_sparse
@@ -3561,6 +3510,7 @@
   device_guard: False
 - func: values(Tensor(a) self) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: method
   dispatch:
     SparseCPU: values_sparse
@@ -3568,7 +3518,6 @@
   requires_tensor: True
   device_guard: False
 - func: hspmm.out(Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     SparseCPU: hspmm_out_sparse_cpu
@@ -3590,12 +3539,11 @@
   requires_tensor: True
 - func: unbind.int(Tensor(a) self, int dim=0) -> Tensor(a)[]
+  use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: unbind.Dimname(Tensor(a) self, Dimname dim) -> Tensor(a)[]
   variants: function, method
-  supports_named_tensor: True
 - func: to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
   use_c10_dispatcher: full
@@ -3618,6 +3566,7 @@
     CPU: dense_to_mkldnn
 - func: mkldnn_reorder_conv2d_weight(Tensor self, int[2] padding=0, int[2] stride=1, int[2] dilation=1, int groups=1) -> Tensor
+  use_c10_dispatcher: full
   variants: function
   python_module: nn
   dispatch:
@@ -3629,42 +3578,60 @@
 - func: quantize_per_tensor(Tensor self, float scale, int zero_point, ScalarType dtype) -> Tensor
   variants: function
   dispatch:
-    CPU: quantize_per_tensor_cpu
+    CPU: quantize_per_tensor
+    CUDA: quantize_per_tensor
+- func: quantize_per_tensor.tensors(Tensor[] tensors, Tensor scales, Tensor zero_points, ScalarType dtype) -> Tensor[]
+  variants: function
+  dispatch:
+    CPU: quantize_per_tensor_list_cpu
 - func: quantize_per_channel(Tensor self, Tensor scales, Tensor zero_points, int axis, ScalarType dtype) -> Tensor
   variants: function
   dispatch:
     CPU: quantize_per_channel_cpu
-- func: dequantize(Tensor self) -> Tensor
+- func: dequantize.self(Tensor self) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
   dispatch:
-    QuantizedCPU: dequantize_quant
+    QuantizedCPU: dequantize_quant
+    QuantizedCUDA: dequantize_quant
+- func: dequantize.tensors(Tensor[] tensors) -> Tensor[]
+  use_c10_dispatcher: full
+  variants: function
+  dispatch:
+    QuantizedCPU: dequantize_tensors_quant
 - func: q_scale(Tensor self) -> float
   use_c10_dispatcher: full
   variants: function, method
   dispatch:
     QuantizedCPU: q_scale_quant
+    QuantizedCUDA: q_scale_quant
 - func: q_zero_point(Tensor self) -> int
   use_c10_dispatcher: full
   variants: function, method
   dispatch:
     QuantizedCPU: q_zero_point_quant
+    QuantizedCUDA: q_zero_point_quant
 - func: q_per_channel_scales(Tensor self) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
   dispatch:
     QuantizedCPU: q_per_channel_scales_quant
 - func: q_per_channel_zero_points(Tensor self) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
   dispatch:
     QuantizedCPU: q_per_channel_zero_points_quant
 - func: q_per_channel_axis(Tensor self) -> int
+  use_c10_dispatcher: full
   variants: function, method
   dispatch:
     QuantizedCPU: q_per_channel_axis_quant
@@ -3673,14 +3640,17 @@
   use_c10_dispatcher: full
   variants: function, method
   dispatch:
-    QuantizedCPU: int_repr_quant
+    QuantizedCPU: int_repr_quant_cpu
+    QuantizedCUDA: int_repr_quant_cuda
 - func: _make_per_tensor_quantized_tensor(Tensor self, float scale, int zero_point) -> Tensor
   use_c10_dispatcher: full
   dispatch:
     CPU: make_per_tensor_quantized_tensor_cpu
+    CUDA: make_per_tensor_quantized_tensor_cuda
 - func: _make_per_channel_quantized_tensor(Tensor self, Tensor scale, Tensor zero_point, int axis) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CPU: make_per_channel_quantized_tensor_cpu
@@ -3689,6 +3659,7 @@
   variants: method
   dispatch:
     QuantizedCPU: qscheme_quant
+    QuantizedCUDA: qscheme_quant
 - func: fake_quantize_per_tensor_affine(Tensor self, float scale, int zero_point, int quant_min, int quant_max) -> Tensor
   use_c10_dispatcher: full
@@ -3706,31 +3677,34 @@
   use_c10_dispatcher: full
   variants: function
+- func: _choose_qparams_per_tensor(Tensor self, bool reduce_range=False) -> (float, int)
+  use_c10_dispatcher: full
+  variants: function
 # to(Device) must not exist because all constructors of Device also works for
 # TensorOptions. Otherwise, an ambiguity error is thrown.
 # See NOTE [ TensorOptions Constructors ].
 - func: to.dtype_layout(Tensor self, *, ScalarType dtype, Layout layout, Device device, bool pin_memory=False, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor
   variants: method
   device_guard: False
-  supports_named_tensor: True
 - func: to.device(Tensor self, Device device, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor
   variants: method
   device_guard: False
-  supports_named_tensor: True
 - func: to.dtype(Tensor self, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor
   variants: method
   device_guard: False
-  supports_named_tensor: True
 - func: to.other(Tensor self, Tensor other, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor
   variants: method
   device_guard: False
 - func: meshgrid(Tensor[] tensors) -> Tensor[]
+  use_c10_dispatcher: full
 - func: cartesian_prod(Tensor[] tensors) -> Tensor
+  use_c10_dispatcher: full
   variants: function
 - func: combinations(Tensor self, int r=2, bool with_replacement=False) -> Tensor
@@ -3740,7 +3714,6 @@
 - func: item(Tensor self) -> Scalar
   use_c10_dispatcher: full
   variants: method
-  supports_named_tensor: True
 - func: result_type.Tensor(Tensor tensor, Tensor other) -> ScalarType
   variants: function
@@ -3766,7 +3739,6 @@
     CPU: _local_scalar_dense_cpu
     CUDA: _local_scalar_dense_cuda
   variants: function
-  supports_named_tensor: True
 # Fused RNN kernels
 - func: _thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor, Tensor)
@@ -3784,6 +3756,7 @@
     CUDA: _thnn_fused_gru_cell_cuda
 - func: _thnn_fused_gru_cell_backward(Tensor grad_hy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CUDA: _thnn_fused_gru_cell_backward_cuda
@@ -3791,20 +3764,28 @@
 # RNN cells and layers
 - func: lstm.input(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: lstm.data(Tensor data, Tensor batch_sizes, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: gru.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: gru.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: rnn_tanh.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: rnn_tanh.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: rnn_relu.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: rnn_relu.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: lstm_cell(Tensor input, Tensor[] hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> (Tensor, Tensor)
@@ -3814,19 +3795,24 @@
 - func: rnn_relu_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> Tensor
+# Quantized RNN layer registration has been moved to C10 dispatch in `RNN.cpp`
 # Quantized RNN layers
-- func: quantized_lstm(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first, *, ScalarType? dtype=None, bool use_dynamic=False) -> (Tensor, Tensor, Tensor)
+# - func: quantized_lstm(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first, *, ScalarType? dtype=None, bool use_dynamic=False) -> (Tensor, Tensor, Tensor)
-- func: quantized_lstm.data(Tensor data, Tensor batch_sizes, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, *, ScalarType? dtype=None, bool use_dynamic=False) -> (Tensor, Tensor, Tensor)
+# - func: quantized_lstm.data(Tensor data, Tensor batch_sizes, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, *, ScalarType? dtype=None, bool use_dynamic=False) -> (Tensor, Tensor, Tensor)
 # Quantized GRU layers
-- func: quantized_gru.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
+# - func: quantized_gru.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
+#   use_c10_dispatcher: full
-- func: quantized_gru.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
+# - func: quantized_gru.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
+#   use_c10_dispatcher: full
 # Quantized RNN cells
 - func: quantized_lstm_cell(Tensor input, Tensor[] hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: quantized_gru_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor b_ih, Tensor b_hh, Tensor packed_ih, Tensor packed_hh, Tensor col_offsets_ih, Tensor col_offsets_hh, Scalar scale_ih, Scalar scale_hh, Scalar zero_point_ih, Scalar zero_point_hh) -> Tensor
   use_c10_dispatcher: full
@@ -3839,10 +3825,13 @@
 # PackedSequence utilities
 - func: _pack_padded_sequence(Tensor input, Tensor lengths, bool batch_first) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
 - func: _pack_padded_sequence_backward(Tensor grad, int[] input_size, Tensor batch_sizes, bool batch_first) -> Tensor
+  use_c10_dispatcher: full
 - func: _pad_packed_sequence(Tensor data, Tensor batch_sizes, bool batch_first, Scalar padding_value, int total_length) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
 # wrappers for legacy TH methods
@@ -3857,9 +3846,10 @@
   variants: method
   device_guard: False
   dispatch:
-    CPU: legacy::cpu::_th_set_
-    CUDA: legacy::cuda::_th_set_
-    QuantizedCPU: set_storage
+    CPU: set_storage_cpu_
+    CUDA: set_storage_cuda_
+    QuantizedCPU: set_storage_quantized_
+    QuantizedCUDA: set_storage_quantized_
 - func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
   variants: method
@@ -3878,6 +3868,7 @@
   variants: method
   dispatch:
     QuantizedCPU: set_quantizer_
+    QuantizedCUDA: set_quantizer_
 - func: is_set_to(Tensor self, Tensor tensor) -> bool
   use_c10_dispatcher: full
@@ -3892,24 +3883,20 @@
   dispatch:
     CPU: masked_fill__cpu
     CUDA: masked_fill__cuda
-  supports_named_tensor: True
 - func: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
   variants: method
   dispatch:
     CPU: masked_fill__cpu
     CUDA: masked_fill__cuda
-  supports_named_tensor: True
 - func: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!)
   variants: method
@@ -3922,6 +3909,7 @@
   variants: function, method
 - func: view(Tensor(a) self, int[] size) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: method
   device_guard: False
   dispatch:
@@ -3929,6 +3917,7 @@
     CUDA: view
     MkldnnCPU: mkldnn_view
     QuantizedCPU: view
+    QuantizedCUDA: view
 - func: put_(Tensor(a!) self, Tensor index, Tensor source, bool accumulate=False) -> Tensor(a!)
   variants: method
@@ -3951,14 +3940,12 @@
 - func: index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
   dispatch:
     CPU: legacy::cpu::_th_index_fill_
     CUDA: legacy::cuda::_th_index_fill_
 - func: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: function, method
 - func: index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!)
@@ -3966,34 +3953,28 @@
   dispatch:
     CPU: index_fill_
     CUDA: index_fill_
-  supports_named_tensor: True
 - func: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor
   use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: index_fill_.Dimname_Tensor(Tensor(a!) self, Dimname dim, Tensor index, Tensor value) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: index_fill.Dimname_Scalar(Tensor self, Dimname dim, Tensor index, Scalar value) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: index_fill.Dimname_Tensor(Tensor self, Dimname dim, Tensor index, Tensor value) -> Tensor
   variants: function, method
-  supports_named_tensor: True
 - func: scatter_.src(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!)
   variants: method
   dispatch:
-    CPU: scatter_cpu_
-    CUDA: legacy::cuda::_th_scatter_
+    CPU: scatter_
+    CUDA: scatter_
 - func: scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor
   use_c10_dispatcher: full
@@ -4002,8 +3983,8 @@
 - func: scatter_.value(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)
   variants: method
   dispatch:
-    CPU: scatter_fill_cpu_
-    CUDA: legacy::cuda::_th_scatter_
+    CPU: scatter_fill_
+    CUDA: scatter_fill_
 - func: scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
   use_c10_dispatcher: full
@@ -4018,8 +3999,8 @@
 - func: scatter_add_(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!)
   variants: method
   dispatch:
-    CPU: scatter_add_cpu_
-    CUDA: legacy::cuda::_th_scatter_add_
+    CPU: scatter_add_
+    CUDA: scatter_add_
 - func: scatter_add(Tensor self, int dim, Tensor index, Tensor src) -> Tensor
   use_c10_dispatcher: full
@@ -4077,9 +4058,11 @@
     CUDA: bitwise_and_out
 - func: bitwise_and.Scalar(Tensor self, Scalar other) -> Tensor
+  use_c10_dispatcher: full
   variants: method, function
 - func: bitwise_and.Tensor(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
   variants: method, function
 - func: bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
@@ -4115,9 +4098,11 @@
     CUDA: bitwise_or_out
 - func: bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor
+  use_c10_dispatcher: full
   variants: method, function
 - func: bitwise_or.Tensor(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
   variants: method, function
 - func: bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
@@ -4153,9 +4138,11 @@
     CUDA: bitwise_xor_out
 - func: bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor
+  use_c10_dispatcher: full
   variants: method, function
 - func: bitwise_xor.Tensor(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
   variants: method, function
 - func: bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
@@ -4231,14 +4218,12 @@
     CUDA: __irshift__
 - func: lgamma_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: method
   dispatch:
     CPU: _lgamma__cpu
     CUDA: _lgamma__cuda
 - func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
-  supports_named_tensor: True
   variants: method
 - func: tril_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
@@ -4254,11 +4239,9 @@
     CUDA: triu_cuda_
 - func: digamma_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: method
 - func: polygamma_(Tensor(a!) self, int n) -> Tensor(a!)
-  supports_named_tensor: True
   variants: method
 - func: renorm_(Tensor(a!) self, Scalar p, int dim, Scalar maxnorm) -> Tensor(a!)
@@ -4268,14 +4251,12 @@
     CUDA: legacy::cuda::_th_renorm_
 - func: pow_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!)
-  supports_named_tensor: True
   variants: method
   dispatch:
     CPU: pow_
     CUDA: pow_
 - func: pow_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!)
-  supports_named_tensor: True
   variants: method
   dispatch:
     CPU: pow_
@@ -4297,13 +4278,13 @@
   variants: method
   dispatch:
     CPU: fmod_
-    CUDA: legacy::cuda::_th_fmod_
+    CUDA: fmod_cuda_
 - func: fmod_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
   dispatch:
     CPU: fmod_
-    CUDA: legacy::cuda::_th_fmod_
+    CUDA: fmod_cuda_
 - func: remainder_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
@@ -4321,72 +4302,57 @@
   variants: method
   dispatch:
     CPU: legacy::cpu::_th_addbmm_
-    CUDA: legacy::cuda::_th_addbmm_
+    CUDA: addbmm__cuda
 - func: addbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    CPU: legacy::cpu::_th_addbmm_out
-    CUDA: legacy::cuda::_th_addbmm_out
+    CPU: addbmm_cpu_out
+    CUDA: addbmm_out_cuda
 - func: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
-    CPU: legacy::cpu::_th_addbmm
-    CUDA: legacy::cuda::_th_addbmm
+    CPU: addbmm_cpu
+    CUDA: addbmm_cuda
 - func: addcdiv_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: random_.to(Tensor(a!) self, int to, *, Generator? generator=None) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: random_(Tensor(a!) self, *, Generator? generator=None) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: uniform_(Tensor(a!) self, float from=0, float to=1, *, Generator? generator=None) -> Tensor(a!)
   variants: method
-  dispatch:
-    CPU: legacy::cpu::_th_uniform_
-    CUDA: uniform_cuda_
-  supports_named_tensor: True
 - func: cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: log_normal_(Tensor(a!) self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: exponential_(Tensor(a!) self, float lambd=1, *, Generator? generator=None) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 # wrappers for TH functions
 - func: diag.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    CPU: legacy::cpu::_th_diag_out
-    CUDA: legacy::cuda::_th_diag_out
+    CPU: diag_cpu_out
+    CUDA: diag_cuda_out
 - func: diag(Tensor self, int diagonal=0) -> Tensor
   use_c10_dispatcher: full
   variants: method, function
-  dispatch:
-    CPU: legacy::cpu::_th_diag
-    CUDA: legacy::cuda::_th_diag
 - func: cross.out(Tensor self, Tensor other, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)
@@ -4427,17 +4393,15 @@
   variants: method, function
   dispatch:
     CPU: legacy::cpu::_th_trace
-    CUDA: legacy::cuda::_th_trace
+    CUDA: trace_cuda
 - func: ne.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: ne_out
     CUDA: ne_out
     QuantizedCPU: ne_out_quantized_cpu
 - func: ne.Scalar(Tensor self, Scalar other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4446,14 +4410,12 @@
     QuantizedCPU: ne_quantized_cpu
 - func: ne.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: ne_out
     CUDA: ne_out
     QuantizedCPU: ne_out_quantized_cpu
 - func: ne.Tensor(Tensor self, Tensor other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4462,14 +4424,12 @@
     QuantizedCPU: ne_quantized_cpu
 - func: eq.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: eq_out
     CUDA: eq_out
     QuantizedCPU: eq_out_quantized_cpu
 - func: eq.Scalar(Tensor self, Scalar other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4478,14 +4438,12 @@
     QuantizedCPU: eq_quantized_cpu
 - func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: eq_out
     CUDA: eq_out
     QuantizedCPU: eq_out_quantized_cpu
 - func: eq.Tensor(Tensor self, Tensor other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4494,14 +4452,12 @@
     QuantizedCPU: eq_quantized_cpu
 - func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: ge_out
     CUDA: ge_out
     QuantizedCPU: ge_out_quantized_cpu
 - func: ge.Scalar(Tensor self, Scalar other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4510,14 +4466,12 @@
     QuantizedCPU: ge_quantized_cpu
 - func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: ge_out
     CUDA: ge_out
     QuantizedCPU: ge_out_quantized_cpu
 - func: ge.Tensor(Tensor self, Tensor other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4526,14 +4480,12 @@
     QuantizedCPU: ge_quantized_cpu
 - func: le.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: le_out
     CUDA: le_out
     QuantizedCPU: le_out_quantized_cpu
 - func: le.Scalar(Tensor self, Scalar other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4542,14 +4494,12 @@
     QuantizedCPU: le_quantized_cpu
 - func: le.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: le_out
     CUDA: le_out
     QuantizedCPU: le_out_quantized_cpu
 - func: le.Tensor(Tensor self, Tensor other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4558,14 +4508,12 @@
     QuantizedCPU: le_quantized_cpu
 - func: gt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: gt_out
     CUDA: gt_out
     QuantizedCPU: gt_out_quantized_cpu
 - func: gt.Scalar(Tensor self, Scalar other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4574,14 +4522,12 @@
     QuantizedCPU: gt_quantized_cpu
 - func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: gt_out
     CUDA: gt_out
     QuantizedCPU: gt_out_quantized_cpu
 - func: gt.Tensor(Tensor self, Tensor other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4590,14 +4536,12 @@
     QuantizedCPU: gt_quantized_cpu
 - func: lt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: lt_out
     CUDA: lt_out
     QuantizedCPU: lt_out_quantized_cpu
 - func: lt.Scalar(Tensor self, Scalar other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4606,14 +4550,12 @@
     QuantizedCPU: lt_quantized_cpu
 - func: lt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: lt_out
     CUDA: lt_out
     QuantizedCPU: lt_out_quantized_cpu
 - func: lt.Tensor(Tensor self, Tensor other) -> Tensor
-  supports_named_tensor: True
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
@@ -4656,7 +4598,6 @@
   dispatch:
     CPU: masked_select_out_cpu
     CUDA: masked_select_out_cuda
-  supports_named_tensor: True
 - func: masked_select(Tensor self, Tensor mask) -> Tensor
   use_c10_dispatcher: full
@@ -4664,7 +4605,6 @@
   dispatch:
     CPU: masked_select_cpu
     CUDA: masked_select_cuda
-  supports_named_tensor: True
 - func: nonzero.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -4679,19 +4619,20 @@
     CUDA: legacy::cuda::_th_nonzero
 - func: nonzero_numpy(Tensor self) -> Tensor[]
+  use_c10_dispatcher: full
   variants: method, function
 - func: gather.out(Tensor self, int dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    CPU: gather_out_cpu
-    CUDA: gather_out_cuda
+    CPU: gather_out_cpu_cuda
+    CUDA: gather_out_cpu_cuda
 - func: gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
-    CPU: gather_cpu
-    CUDA: gather_cuda
+    CPU: gather
+    CUDA: gather
 - func: gather.dimname_out(Tensor self, Dimname dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!)
@@ -4702,24 +4643,19 @@
   use_c10_dispatcher: full
 - func: addcmul.out(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
   use_c10_dispatcher: full
   variants: method, function
-  supports_named_tensor: True
 - func: addcmul_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: addcdiv.out(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: addcdiv(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
   use_c10_dispatcher: full
   variants: method, function
-  supports_named_tensor: True
 - func: lstsq.X(Tensor self, Tensor A, *, Tensor(a!) X, Tensor(b!) qr) -> (Tensor(a!) solution, Tensor(b!) QR)
   dispatch:
@@ -4727,6 +4663,7 @@
     CUDA: legacy::cuda::_th_gels_out
 - func: lstsq(Tensor self, Tensor A) -> (Tensor solution, Tensor QR)
+  use_c10_dispatcher: full
   variants: method, function
   dispatch:
     CPU: legacy::cpu::_th_gels
@@ -4735,9 +4672,11 @@
 - func: triangular_solve.X(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False, *, Tensor(a!) X, Tensor(b!) M) -> (Tensor(a!) solution, Tensor(b!) cloned_coefficient)
 - func: triangular_solve(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False) -> (Tensor solution, Tensor cloned_coefficient)
+  use_c10_dispatcher: full
   variants: method, function
 - func: _triangular_solve_helper(Tensor self, Tensor A, bool upper, bool transpose, bool unitriangular) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: _triangular_solve_helper_cpu
@@ -4746,9 +4685,11 @@
 - func: symeig.e(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)
 - func: symeig(Tensor self, bool eigenvectors=False, bool upper=True) -> (Tensor eigenvalues, Tensor eigenvectors)
+  use_c10_dispatcher: full
   variants: method, function
 - func: _symeig_helper(Tensor self, bool eigenvectors, bool upper) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: _symeig_helper_cpu
@@ -4760,6 +4701,7 @@
     CUDA: legacy::cuda::_th_eig_out
 - func: eig(Tensor self, bool eigenvectors=False) -> (Tensor eigenvalues, Tensor eigenvectors)
+  use_c10_dispatcher: full
   variants: method, function
   dispatch:
     CPU: legacy::cpu::_th_eig
@@ -4768,9 +4710,11 @@
 - func: svd.U(Tensor self, bool some=True, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) V)
 - func: svd(Tensor self, bool some=True, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor V)
+  use_c10_dispatcher: full
   variants: method, function
 - func: _svd_helper(Tensor self, bool some, bool compute_uv) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: _svd_helper_cpu
@@ -4803,11 +4747,13 @@
     CUDA: _cholesky_solve_helper_cuda
 - func: solve(Tensor self, Tensor A) -> (Tensor solution, Tensor LU)
+  use_c10_dispatcher: full
   variants: function, method
 - func: solve.solution(Tensor self, Tensor A, *, Tensor(a!) solution, Tensor(b!) lu) -> (Tensor(a!) solution, Tensor(b!) LU)
 - func: _solve_helper(Tensor self, Tensor A) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: _solve_helper_cpu
@@ -4828,9 +4774,11 @@
 - func: qr.Q(Tensor self, bool some=True, *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R)
 - func: qr(Tensor self, bool some=True) -> (Tensor Q, Tensor R)
+  use_c10_dispatcher: full
   variants: method, function
 - func: _qr_helper(Tensor self, bool some) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: _qr_helper_cpu
@@ -4842,6 +4790,7 @@
     CUDA: legacy::cuda::_th_geqrf_out
 - func: geqrf(Tensor self) -> (Tensor a, Tensor tau)
+  use_c10_dispatcher: full
   variants: method, function
   dispatch:
     CPU: legacy::cpu::_th_geqrf
@@ -4868,6 +4817,7 @@
     CPU: legacy::cpu::_th_ormqr
 - func: _lu_with_info(Tensor self, bool pivot=True, bool check_errors=True) -> (Tensor, Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: _lu_with_info_cpu
@@ -4899,6 +4849,7 @@
     CUDA: multinomial
 - func: _multinomial_alias_setup(Tensor probs) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   variants: function
   dispatch:
     CPU: legacy::cpu::_th_multinomial_alias_setup
@@ -4911,66 +4862,55 @@
     CUDA: legacy::cuda::_th_multinomial_alias_draw
 - func: lgamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: _lgamma_out_cpu
     CUDA: _lgamma_out_cuda
 - func: lgamma(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: method, function
   dispatch:
     CPU: lgamma
     CUDA: lgamma
 - func: digamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: digamma(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: method, function
 - func: polygamma.out(int n, Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: polygamma(int n, Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: method, function
 - func: erfinv(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: method, function
   dispatch:
     CPU: erfinv
     CUDA: erfinv
 - func: erfinv_(Tensor(a!) self) -> Tensor(a!)
-  supports_named_tensor: True
   variants: method
   dispatch:
     CPU: _erfinv__cpu
     CUDA: _erfinv__cuda
 - func: erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: _erfinv_out_cpu
     CUDA: _erfinv_out_cuda
 - func: sign(Tensor self) -> Tensor
+  use_c10_dispatcher: full
   variants: function, method
-  supports_named_tensor: True
 - func: sign_(Tensor(a!) self) -> Tensor(a!)
   variants: method
-  supports_named_tensor: True
 - func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: sign_out
     CUDA: sign_out
@@ -4980,11 +4920,9 @@
   variants: method, function
 - func: atan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
 - func: atan2(Tensor self, Tensor other) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: method, function
 - func: lerp.Scalar_out(Tensor self, Tensor end, Scalar weight, *, Tensor(a!) out) -> Tensor(a!)
@@ -5026,26 +4964,26 @@
 - func: fmod.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU: fmod_out
-    CUDA: legacy::cuda::_th_fmod_out
+    CUDA: fmod_cuda_out
 - func: fmod.Scalar(Tensor self, Scalar other) -> Tensor
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
     CPU: fmod
-    CUDA: legacy::cuda::_th_fmod
+    CUDA: fmod_cuda
 - func: fmod.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU: fmod_out
-    CUDA: legacy::cuda::_th_fmod_out
+    CUDA: fmod_cuda_out
 - func: fmod.Tensor(Tensor self, Tensor other) -> Tensor
   use_c10_dispatcher: full
   variants: method, function
   dispatch:
     CPU: fmod
-    CUDA: legacy::cuda::_th_fmod
+    CUDA: fmod_cuda
 - func: remainder.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -5082,9 +5020,8 @@
   variants: method, function
   dispatch:
     CPU: min
-    CUDA: legacy::cuda::_th_min
+    CUDA: min
     QuantizedCPU: min_quant
-  supports_named_tensor: True
 - func: max.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -5097,9 +5034,8 @@
   variants: method, function
   dispatch:
     CPU: max
-    CUDA: legacy::cuda::_th_max
+    CUDA: max
     QuantizedCPU: max_quant
-  supports_named_tensor: True
 - func: median(Tensor self) -> Tensor
   use_c10_dispatcher: full
@@ -5107,7 +5043,6 @@
   dispatch:
     CPU: median_cpu
     CUDA: median_cuda
-  supports_named_tensor: True
 - func: sort.values(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
   dispatch:
@@ -5115,6 +5050,7 @@
     CUDA: legacy::cuda::_th_sort_out
 - func: sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)
+  use_c10_dispatcher: full
   variants: method, function
   dispatch:
     CPU: legacy::cpu::_th_sort
@@ -5139,6 +5075,7 @@
     CUDA: legacy::cuda::_th_topk_out
 - func: topk(Tensor self, int k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices)
+  use_c10_dispatcher: full
   variants: method, function
   dispatch:
     CPU: topk
@@ -5147,12 +5084,10 @@
 - func: all(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: method, function
 - func: any(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: method, function
   dispatch:
     CPU: any
@@ -5173,11 +5108,20 @@
     CUDA: legacy::cuda::_th_renorm
 - func: unfold(Tensor(a) self, int dimension, int size, int step) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: method
   device_guard: False
   dispatch:
     CPU: unfold
     CUDA: unfold
+    QuantizedCPU: unfold
+    QuantizedCUDA: unfold
+- func: unfold_backward(Tensor grad_in, int[] input_sizes, int dim, int size, int step) -> Tensor
+  variants: function
+  dispatch:
+    CPU: unfold_backward
+    CUDA: unfold_backward
 - func: equal(Tensor self, Tensor other) -> bool
   use_c10_dispatcher: full
@@ -5185,96 +5129,69 @@
   dispatch:
     CPU: legacy::cpu::_th_equal
     CUDA: legacy::cuda::_th_equal
-    QuantizedCPU: quantized_equal
-  supports_named_tensor: True
+    QuantizedCPU: quantized_equal_cpu
 - func: pow.Tensor_Tensor_out(Tensor self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: pow_out
     CUDA: pow_out
 - func: pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   variants: method, function
   dispatch:
     CPU: pow
     CUDA: pow
 - func: pow.Scalar_out(Scalar self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)
-  supports_named_tensor: True
   dispatch:
     CPU: pow_out
     CUDA: pow_out
 - func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor
   use_c10_dispatcher: full
-  supports_named_tensor: True
   dispatch:
     CPU: pow
     CUDA: pow
 - func: normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!)
   variants: method
-  dispatch:
-    CPU: normal_cpu_
-    CUDA: normal_cuda_
-  supports_named_tensor: True
 - func: normal.Tensor_float_out(Tensor mean, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
-  dispatch:
-    CPU: normal_out_cpu
-    CUDA: normal_out_cuda
 - func: normal.Tensor_float(Tensor mean, float std=1, *, Generator? generator=None) -> Tensor
-  dispatch:
-    CPU: normal_cpu
-    CUDA: normal_cuda
 - func: normal.float_Tensor_out(float mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
-  dispatch:
-    CPU: normal_out_cpu
-    CUDA: normal_out_cuda
 - func: normal.float_Tensor(float mean, Tensor std, *, Generator? generator=None) -> Tensor
-  dispatch:
-    CPU: normal_cpu
-    CUDA: normal_cuda
 - func: normal.Tensor_Tensor_out(Tensor mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
-  dispatch:
-    CPU: normal_out_cpu
-    CUDA: normal_out_cuda
 - func: normal.Tensor_Tensor(Tensor mean, Tensor std, *, Generator? generator=None) -> Tensor
-  dispatch:
-    CPU: normal_cpu
-    CUDA: normal_cuda
 - func: normal.float_float(float mean, float std, int[] size, *, Generator? generator=None, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
 - func: normal.float_float_out(float mean, float std, int[] size, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
 - func: alias(Tensor(a) self) -> Tensor(a)
+  use_c10_dispatcher: full
   variants: method, function
-  supports_named_tensor: True
 - func: _addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
   use_c10_dispatcher: full
   dispatch:
     CPU: legacy::cpu::_th_addr
-    CUDA: legacy::cuda::_th_addr
+    CUDA: addr_cuda
 - func: _addr_(Tensor(a!) self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
   dispatch:
     CPU: legacy::cpu::_th_addr_
-    CUDA: legacy::cuda::_th_addr_
+    CUDA: addr__cuda
 - func: _addr.out(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU: legacy::cpu::_th_addr_out
-    CUDA: legacy::cuda::_th_addr_out
+    CUDA: addr_out_cuda
 - func: _index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
   dispatch:
@@ -5285,37 +5202,23 @@
   use_c10_dispatcher: full
   dispatch:
     CPU: _cumsum_cpu
-    CUDA: legacy::cuda::_th_cumsum
+    CUDA: _cumsum_cuda
 - func: _cumsum.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU: _cumsum_out_cpu
-    CUDA: legacy::cuda::_th_cumsum_out
+    CUDA: _cumsum_out_cuda
 - func: _cumprod(Tensor self, int dim) -> Tensor
   use_c10_dispatcher: full
   dispatch:
     CPU: _cumprod_cpu
-    CUDA: legacy::cuda::_th_cumprod
+    CUDA: _cumprod_cuda
 - func: _cumprod.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CPU: _cumprod_out_cpu
-    CUDA: legacy::cuda::_th_cumprod_out
-- func: _var(Tensor self, bool unbiased=True) -> Tensor
-  use_c10_dispatcher: full
-  dispatch:
-    CPU: legacy::cpu::_th_var
-    CUDA: legacy::cuda::_th_var
-  supports_named_tensor: True
-- func: _std(Tensor self, bool unbiased=True) -> Tensor
-  use_c10_dispatcher: full
-  dispatch:
-    CPU: legacy::cpu::_th_std
-    CUDA: legacy::cuda::_th_std
-  supports_named_tensor: True
+    CUDA: _cumprod_out_cuda
 - func: _amp_non_finite_check_and_unscale_(Tensor(a!) self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
   variants: function
@@ -5328,6 +5231,7 @@
     CUDA: _amp_update_scale_cuda
 - func: _cat(Tensor[] tensors, int dim=0) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CPU: _cat_cpu
     CUDA: cat_cuda
@@ -5340,6 +5244,7 @@
     QuantizedCPU: quantized_cat_out
 - func: _mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   dispatch:
     CPU: legacy::cpu::_th_mode
     CUDA: legacy::cuda::_th_mode
@@ -5349,25 +5254,39 @@
     CPU: legacy::cpu::_th_mode_out
     CUDA: legacy::cuda::_th_mode_out
-- func: _max(Tensor self, int dim, bool keepdim=False) -> (Tensor, Tensor)
+- func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
+  use_c10_dispatcher: full
+  dispatch:
+    CPU: bucketize_cpu
+    CUDA: bucketize_cuda
+- func: bucketize.Tensor_out(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)
+  dispatch:
+    CPU: bucketize_out_cpu
+    CUDA: bucketize_out_cuda
+- func: bucketize.Scalar(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
-    CPU: legacy::cpu::_th_max
-    CUDA: legacy::cuda::_th_max
+    CPU: bucketize_cpu
+    CUDA: bucketize_cuda
-- func: _max.max(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_indices) -> (Tensor(a!), Tensor(b!))
+- func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
-    CPU: legacy::cpu::_th_max_out
-    CUDA: legacy::cuda::_th_max_out
+    CPU: searchsorted_cpu
+    CUDA: searchsorted_cuda
-- func: _min(Tensor self, int dim, bool keepdim=False) -> (Tensor, Tensor)
+- func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    CPU: legacy::cpu::_th_min
-    CUDA: legacy::cuda::_th_min
+    CPU: searchsorted_out_cpu
+    CUDA: searchsorted_out_cuda
-- func: _min.min(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!), Tensor(b!))
+- func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
-    CPU: legacy::cpu::_th_min_out
-    CUDA: legacy::cuda::_th_min_out
+    CPU: searchsorted_cpu
+    CUDA: searchsorted_cuda
 ## NN wrappers
@@ -5446,6 +5365,7 @@
     CUDA: legacy::cuda::_thnn_multilabel_margin_loss_forward_out
 - func: multilabel_margin_loss_forward(Tensor self, Tensor target, int reduction) -> (Tensor output, Tensor is_target)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: multilabel_margin_loss_forward_cpu
@@ -5560,18 +5480,10 @@
 - func: elu.out(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1, *, Tensor(a!) out) -> Tensor(a!)
   python_module: nn
-  dispatch:
-    CPU: elu_out
-    CUDA: elu_out
-    QuantizedCPU: quantized_elu_out
 - func: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor
   use_c10_dispatcher: full
   python_module: nn
-  dispatch:
-    CPU: elu
-    CUDA: elu
-    QuantizedCPU: quantized_elu
 - func: elu_backward.grad_input(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)
   python_module: nn
@@ -5585,10 +5497,6 @@
 - func: elu_(Tensor(a!) self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor(a!)
   python_module: nn
-  dispatch:
-    CPU: elu_
-    CUDA: elu_
-    QuantizedCPU: quantized_elu_
 - func: glu.out(Tensor self, int dim=-1, *, Tensor(a!) out) -> Tensor(a!)
   python_module: nn
@@ -5622,6 +5530,10 @@
 - func: hardsigmoid(Tensor self) -> Tensor
   use_c10_dispatcher: full
   python_module: nn
+  dispatch:
+    CPU: hardsigmoid
+    CUDA: hardsigmoid
+    QuantizedCPU: quantized_hardsigmoid
 - func: hardsigmoid_(Tensor(a!) self) -> Tensor(a!)
   python_module: nn
@@ -5629,6 +5541,9 @@
 - func: hardsigmoid_backward(Tensor grad_output, Tensor self) -> Tensor
   use_c10_dispatcher: full
   python_module: nn
+  dispatch:
+    CPU: hardsigmoid_backward
+    CUDA: hardsigmoid_backward
 - func: hardtanh.out(Tensor self, Scalar min_val=-1, Scalar max_val=1, *, Tensor(a!) out) -> Tensor(a!)
   python_module: nn
@@ -5661,6 +5576,24 @@
     CPU: hardtanh_
     CUDA: hardtanh_
     QuantizedCPU: quantized_hardtanh_
+    Vulkan: vulkan_hardtanh_
+- func: hardswish.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+  python_module: nn
+- func: hardswish(Tensor self) -> Tensor
+  use_c10_dispatcher: full
+  python_module: nn
+- func: hardswish_(Tensor(a!) self) -> Tensor(a!)
+  python_module: nn
+- func: hardswish_backward(Tensor grad_output, Tensor self) -> Tensor
+  use_c10_dispatcher: full
+  python_module: nn
+  dispatch:
+    CPU: hardswish_backward
+    CUDA: hardswish_backward
 - func: leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)
   python_module: nn
@@ -5702,6 +5635,7 @@
     CUDA: legacy::cuda::_thnn_log_sigmoid_forward_out
 - func: log_sigmoid_forward(Tensor self) -> (Tensor output, Tensor buffer)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: log_sigmoid_forward_cpu
@@ -5784,14 +5718,17 @@
     MkldnnCPU: mkldnn_adaptive_avg_pool2d_out
 - func: adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
 - func: mkldnn_adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     MkldnnCPU: mkldnn_adaptive_avg_pool2d
   requires_tensor: True
 - func: _adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor
+  use_c10_dispatcher: full
   dispatch:
     CPU: adaptive_avg_pool2d_cpu
     CUDA: adaptive_avg_pool2d_cuda
@@ -5811,6 +5748,7 @@
     CUDA: adaptive_avg_pool3d_out_cuda
 - func: adaptive_avg_pool3d(Tensor self, int[3] output_size) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: adaptive_avg_pool3d_cpu
@@ -5838,6 +5776,7 @@
 # Return: (Tensor output, Tensor indices)
 - func: adaptive_max_pool2d(Tensor self, int[2] output_size) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: adaptive_max_pool2d_cpu
@@ -5865,6 +5804,7 @@
 # Return: (Tensor output, Tensor indices)
 - func: adaptive_max_pool3d(Tensor self, int[3] output_size) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: adaptive_max_pool3d_cpu
@@ -5891,6 +5831,7 @@
     MkldnnCPU: mkldnn_avg_pool2d_out
 - func: avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: avg_pool2d_cpu
@@ -5905,6 +5846,7 @@
     CUDA: avg_pool2d_backward_out_cuda
 - func: avg_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: avg_pool2d_backward_cpu
@@ -5917,6 +5859,7 @@
     CUDA: avg_pool3d_out_cuda
 - func: avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: avg_pool3d_cpu
@@ -5930,6 +5873,7 @@
     CUDA: avg_pool3d_backward_out_cuda
 - func: avg_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: avg_pool3d_backward_cpu
@@ -5944,6 +5888,7 @@
 # Return: (Tensor output, Tensor indices)
 - func: fractional_max_pool2d(Tensor self, int[2] kernel_size, int[2] output_size, Tensor random_samples) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: fractional_max_pool2d_cpu
@@ -5956,6 +5901,7 @@
     CUDA: fractional_max_pool2d_backward_out_cuda
 - func: fractional_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: fractional_max_pool2d_backward_cpu
@@ -5970,6 +5916,7 @@
 # Return: (Tensor output, Tensor indices)
 - func: fractional_max_pool3d(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: fractional_max_pool3d_cpu
@@ -5982,6 +5929,7 @@
     CUDA: fractional_max_pool3d_backward_out_cuda
 - func: fractional_max_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: fractional_max_pool3d_backward_cpu
@@ -5996,11 +5944,11 @@
 # Return: (Tensor output, Tensor indices)
 - func: max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: max_pool2d_with_indices_cpu
     CUDA: max_pool2d_with_indices_cuda
-  supports_named_tensor: True
 - func: max_pool2d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
   python_module: nn
@@ -6009,6 +5957,7 @@
     CUDA: max_pool2d_with_indices_backward_out_cuda
 - func: max_pool2d_with_indices_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: max_pool2d_with_indices_backward_cpu
@@ -6023,11 +5972,11 @@
 # Return: (Tensor output, Tensor indices)
 - func: max_pool3d_with_indices(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: max_pool3d_with_indices_cpu
     CUDA: max_pool3d_with_indices_cuda
-  supports_named_tensor: True
 - func: max_pool3d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
   python_module: nn
@@ -6036,6 +5985,7 @@
     CUDA: max_pool3d_with_indices_backward_out_cuda
 - func: max_pool3d_with_indices_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: max_pool3d_with_indices_backward_cpu
@@ -6048,6 +5998,7 @@
     CUDA: max_unpooling2d_forward_out_cuda
 - func: max_unpool2d(Tensor self, Tensor indices, int[2] output_size) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: max_unpooling2d_forward_cpu
@@ -6060,6 +6011,7 @@
     CUDA: max_unpooling2d_backward_out_cuda
 - func: max_unpool2d_backward(Tensor grad_output, Tensor self, Tensor indices, int[2] output_size) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: max_unpooling2d_backward_cpu
@@ -6072,6 +6024,7 @@
     CUDA: max_unpooling3d_forward_out_cuda
 - func: max_unpool3d(Tensor self, Tensor indices, int[3] output_size, int[3] stride, int[3] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: max_unpooling3d_forward_cpu
@@ -6084,6 +6037,7 @@
     CUDA: max_unpooling3d_backward_out_cuda
 - func: max_unpool3d_backward(Tensor grad_output, Tensor self, Tensor indices, int[3] output_size, int[3] stride, int[3] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: max_unpooling3d_backward_cpu
@@ -6096,10 +6050,12 @@
     CUDA: reflection_pad1d_out_cuda
 - func: reflection_pad1d(Tensor self, int[2] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: reflection_pad1d_cpu
     CUDA: reflection_pad1d_cuda
+    QuantizedCPU: reflection_pad1d_cpu
 - func: reflection_pad1d_backward.grad_input(Tensor grad_output, Tensor self, int[2] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
   python_module: nn
@@ -6108,6 +6064,7 @@
     CUDA: reflection_pad1d_backward_out_cuda
 - func: reflection_pad1d_backward(Tensor grad_output, Tensor self, int[2] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: reflection_pad1d_backward_cpu
@@ -6120,6 +6077,7 @@
     CUDA: reflection_pad2d_out_cuda
 - func: reflection_pad2d(Tensor self, int[4] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: reflection_pad2d_cpu
@@ -6132,6 +6090,7 @@
     CUDA: reflection_pad2d_backward_out_cuda
 - func: reflection_pad2d_backward(Tensor grad_output, Tensor self, int[4] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: reflection_pad2d_backward_cpu
@@ -6144,6 +6103,7 @@
     CUDA: replication_pad1d_out_cuda
 - func: replication_pad1d(Tensor self, int[2] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: replication_pad1d_cpu
@@ -6156,6 +6116,7 @@
     CUDA: replication_pad1d_backward_out_cuda
 - func: replication_pad1d_backward(Tensor grad_output, Tensor self, int[2] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: replication_pad1d_backward_cpu
@@ -6168,6 +6129,7 @@
     CUDA: replication_pad2d_out_cuda
 - func: replication_pad2d(Tensor self, int[4] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: replication_pad2d_cpu
@@ -6180,6 +6142,7 @@
     CUDA: replication_pad2d_backward_out_cuda
 - func: replication_pad2d_backward(Tensor grad_output, Tensor self, int[4] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: replication_pad2d_backward_cpu
@@ -6192,6 +6155,7 @@
     CUDA: replication_pad3d_out_cuda
 - func: replication_pad3d(Tensor self, int[6] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: replication_pad3d_cpu
@@ -6204,6 +6168,7 @@
     CUDA: replication_pad3d_backward_out_cuda
 - func: replication_pad3d_backward(Tensor grad_output, Tensor self, int[6] padding) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: replication_pad3d_backward_cpu
@@ -6216,6 +6181,7 @@
     CUDA: upsample_linear1d_out_cuda
 - func: upsample_linear1d(Tensor self, int[1] output_size, bool align_corners, float? scales=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_linear1d_cpu
@@ -6228,6 +6194,7 @@
     CUDA: upsample_linear1d_backward_out_cuda
 - func: upsample_linear1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, bool align_corners, float? scales=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_linear1d_backward_cpu
@@ -6240,6 +6207,7 @@
     CUDA: upsample_bilinear2d_out_cuda
 - func: upsample_bilinear2d(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_bilinear2d_cpu
@@ -6253,6 +6221,7 @@
     CUDA: upsample_bilinear2d_backward_out_cuda
 - func: upsample_bilinear2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_bilinear2d_backward_cpu
@@ -6265,6 +6234,7 @@
     CUDA: upsample_bicubic2d_out_cuda
 - func: upsample_bicubic2d(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_bicubic2d_cpu
@@ -6277,6 +6247,7 @@
     CUDA: upsample_bicubic2d_backward_out_cuda
 - func: upsample_bicubic2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_bicubic2d_backward_cpu
@@ -6289,6 +6260,7 @@
     CUDA: upsample_trilinear3d_out_cuda
 - func: upsample_trilinear3d(Tensor self, int[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_trilinear3d_cpu
@@ -6301,6 +6273,7 @@
     CUDA: upsample_trilinear3d_backward_out_cuda
 - func: upsample_trilinear3d_backward(Tensor grad_output, int[3] output_size, int[5] input_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_trilinear3d_backward_cpu
@@ -6313,6 +6286,7 @@
     CUDA: upsample_nearest1d_out_cuda
 - func: upsample_nearest1d(Tensor self, int[1] output_size, float? scales=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_nearest1d_cpu
@@ -6325,6 +6299,7 @@
     CUDA: upsample_nearest1d_backward_out_cuda
 - func: upsample_nearest1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_nearest1d_backward_cpu
@@ -6337,11 +6312,13 @@
     CUDA: upsample_nearest2d_out_cuda
 - func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_nearest2d_cpu
     CUDA: upsample_nearest2d_cuda
     QuantizedCPU: quantized_upsample_nearest2d_cpu
+    Vulkan: upsample_nearest2d_vulkan
 - func: upsample_nearest2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
   python_module: nn
@@ -6350,6 +6327,7 @@
     CUDA: upsample_nearest2d_backward_out_cuda
 - func: upsample_nearest2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_nearest2d_backward_cpu
@@ -6362,6 +6340,7 @@
     CUDA: upsample_nearest3d_out_cuda
 - func: upsample_nearest3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_nearest3d_cpu
@@ -6375,6 +6354,7 @@
     CUDA: upsample_nearest3d_backward_out_cuda
 - func: upsample_nearest3d_backward(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: upsample_nearest3d_backward_cpu
@@ -6437,6 +6417,7 @@
     CUDA: slow_conv_transpose2d_backward_out_cuda
 - func: slow_conv_transpose2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] output_padding, int[2] dilation, Tensor columns, Tensor ones, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: slow_conv_transpose2d_backward_cpu
@@ -6461,6 +6442,7 @@
     CUDA: slow_conv_transpose3d_backward_out_cuda
 - func: slow_conv_transpose3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] output_padding, int[3] dilation, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: slow_conv_transpose3d_backward_cpu
@@ -6488,13 +6470,14 @@
   python_module: nn
   dispatch:
     CPU: slow_conv2d_backward_out_cpu
-    CUDA: legacy::cuda::_thnn_conv2d_backward_out
+    CUDA: slow_conv2d_backward_out_cuda
 - func: thnn_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: slow_conv2d_backward_cpu
-    CUDA: legacy::cuda::_thnn_conv2d_backward
+    CUDA: slow_conv2d_backward_cuda
 - func: thnn_conv_depthwise2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
   python_module: nn
@@ -6515,12 +6498,13 @@
 - func: thnn_conv_depthwise2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, *, Tensor(a!)? grad_input, Tensor(b!)? grad_weight) -> (Tensor(a!), Tensor(b!))
   python_module: nn
   dispatch:
-    CUDA: legacy::cuda::_thnn_conv_depthwise2d_backward_out
+    CUDA: thnn_conv_depthwise2d_backward_out
 - func: thnn_conv_depthwise2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool[2] output_mask) -> (Tensor grad_input, Tensor grad_weight)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
-    CUDA: legacy::cuda::_thnn_conv_depthwise2d_backward
+    CUDA: thnn_conv_depthwise2d_backward
 - func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
   python_module: nn
@@ -6544,6 +6528,7 @@
     CPU: slow_conv3d_backward_out_cpu
 - func: slow_conv3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: slow_conv3d_backward_cpu
@@ -6555,6 +6540,7 @@
     CUDA: slow_conv_dilated2d_cuda
 - func: slow_conv_dilated2d_backward(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: slow_conv_dilated2d_backward_cpu
@@ -6567,6 +6553,7 @@
     CUDA: slow_conv_dilated3d_cuda
 - func: slow_conv_dilated3d_backward(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: slow_conv_dilated3d_backward_cpu
@@ -6579,6 +6566,7 @@
     CUDA: col2im_out_cuda
 - func: col2im(Tensor self, int[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: col2im_cpu
@@ -6591,6 +6579,7 @@
     CUDA: col2im_backward_out_cuda
 - func: col2im_backward(Tensor grad_output, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: col2im_backward_cpu
@@ -6603,6 +6592,7 @@
     CUDA: im2col_out_cuda
 - func: im2col(Tensor self, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: im2col_cpu
@@ -6615,6 +6605,7 @@
     CUDA: im2col_backward_out_cuda
 - func: im2col_backward(Tensor grad_output, int[2] input_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
+  use_c10_dispatcher: full
   python_module: nn
   dispatch:
     CPU: im2col_backward_cpu
@@ -6622,12 +6613,15 @@
 - func: isfinite(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  variants: function
+  variants: function, method
   device_guard: False
-  supports_named_tensor: True
 - func: isinf(Tensor self) -> Tensor
   use_c10_dispatcher: full
-  variants: function
+  variants: function, method
   device_guard: False
-  supports_named_tensor: True
+# Note: this function is only for testing.
+# It is undocumented and should not be used outside of tests.
+- func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
+  use_c10_dispatcher: full