torch-rb 0.9.1 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -100,10 +100,49 @@
100
100
 
101
101
  - func: _make_dual(Tensor(a) primal, Tensor tangent, int level) -> Tensor(a)
102
102
  variants: function
103
+ dispatch:
104
+ CompositeExplicitAutograd: _make_dual
103
105
 
104
106
  - func: _unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent)
105
107
  variants: function
106
108
 
109
+ # NOTE: [_new_zeros_with_same_feature_meta]
110
+ # This function creates a new tensor with the layout and TensorOptions
111
+ # of `other` but also takes into account the batch dimensions of `self`
112
+ #
113
+ # This function has a couple extra constraints because it is also used for `jvp`
114
+ # in functorch.
115
+ # - is used for forward AD because there is the restriction
116
+ # that the primal and tangent must have the same layout
117
+ # - We cannot assume that `self` and `other` have the same sizes or even dim
118
+ # because in the inplace over view case, `other` is the base tensor, and
119
+ # `self` is the forward grad with respect to the view, which can have an
120
+ # entirely different shape
121
+ # - takes the number of batch dims for `self` because we also handle
122
+ # some batching logic. We handle that here instead of a batching rule because
123
+ # we'd like to avoid calling as_strided in the batching rule (as to enable
124
+ # nested vmap in functorch).
125
+ # - needs to be CompositeExplicitAutograd for jvp support in functorch.
126
+ # functorch currently relies on TensorWrapper which does not have storage
127
+ # CompositeExplicitAutograd makes sure the TensorWrapper is unwrapped.
128
+ # - this function may eventually take on another int argument to store the
129
+ # the number of batch dims for other once we support that use case
130
+ - func: _new_zeros_with_same_feature_meta(Tensor self, Tensor other, *, int self_num_batch_dims=0) -> Tensor
131
+ variants: function
132
+ dispatch:
133
+ CompositeExplicitAutograd: _new_zeros_with_same_feature_meta
134
+
135
+ # This function compares the storage numel of self with that of other, where
136
+ # storage numel is cumputed as: `other.storage().nbytes() / other.itemsize()`.
137
+ # We create this function for composite compliance purposes. The batching rule
138
+ # always returns true because vmapped as_strided does not support accessing
139
+ # storage locations not indexable by the input tensor.
140
+ # See the note above for more information.
141
+ - func: _has_same_storage_numel(Tensor self, Tensor other) -> bool
142
+ variants: function
143
+ dispatch:
144
+ CompositeExplicitAutograd: _has_same_storage_numel
145
+
107
146
  - func: rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!)
108
147
  variants: method
109
148
 
@@ -176,6 +215,17 @@
176
215
  dispatch:
177
216
  CUDA: masked_scale_cuda
178
217
 
218
+ - func: native_dropout(Tensor input, float p, bool? train) -> (Tensor, Tensor)
219
+ variants: function
220
+ dispatch:
221
+ CPU: native_dropout_cpu
222
+ CUDA: native_dropout_cuda
223
+
224
+ - func: native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor
225
+ dispatch:
226
+ CPU: native_dropout_backward_cpu
227
+ CUDA: native_dropout_backward_cuda
228
+
179
229
  - func: _sobol_engine_draw(Tensor quasi, int n, Tensor sobolstate, int dimension, int num_generated, ScalarType? dtype) -> (Tensor, Tensor)
180
230
 
181
231
  - func: _sobol_engine_ff_(Tensor(a!) self, int n, Tensor sobolstate, int dimension, int num_generated) -> Tensor(a!)
@@ -209,17 +259,23 @@
209
259
  variants: function, method
210
260
  dispatch:
211
261
  CompositeExplicitAutograd: abs
262
+ SparseCPU, SparseCUDA: abs_sparse
263
+ SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr
212
264
 
213
265
  - func: abs_(Tensor(a!) self) -> Tensor(a!)
214
266
  device_check: NoCheck # TensorIterator
215
267
  variants: function, method
216
268
  dispatch:
217
269
  CompositeExplicitAutograd: abs_
270
+ SparseCPU, SparseCUDA: abs_sparse_
271
+ SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_
218
272
 
219
273
  - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
220
274
  device_check: NoCheck # TensorIterator
221
275
  dispatch:
222
276
  CPU, CUDA: abs_out
277
+ SparseCPU, SparseCUDA: abs_sparse_out
278
+ SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_out
223
279
 
224
280
  # Note [Adding an alias]
225
281
  # To add an alias do the following:
@@ -231,18 +287,15 @@
231
287
  # will stop it from "inheriting" the original operation's autograd behavior.
232
288
  # 2) Implement the corresponding functions and have them redispatch to the
233
289
  # original function.
234
- # 3) Add entries for the alias (and original function, if needed) to
235
- # aten/src/ATen/core/interned_strings.h
236
- # (This may require removing an entry from ATen/core/aten_interned_strings.h.)
237
- # 4) Add docstrings to the new function that reference the original function,
290
+ # 3) Add docstrings to the new function that reference the original function,
238
291
  # and document the method as usual (if it exists.)
239
292
  # (See torch/_torch_docs.py and docs/source/torch.rst if adding a function,
240
293
  # torch/_tensor_docs.py and docs/source/tensors.rst if adding a method,
241
294
  # or module-specific doc bindings (like torch/linalg/__init__.py) if
242
295
  # adding an alias in a namespace.)
243
- # 5) Update torch/overrides.py consistent with the original function.
244
- # 6) Update the alias_map in torch/csrc/jit/passes/normalize_ops.cpp.
245
- # 7) Add aliases argument to existing OpInfo/UnaryUfuncInfo or create new OpInfo/UnaryUfuncInfo entry
296
+ # 4) Update torch/overrides.py consistent with the original function.
297
+ # 5) Update the alias_map in torch/csrc/jit/passes/normalize_ops.cpp.
298
+ # 6) Add aliases argument to existing OpInfo/UnaryUfuncInfo or create new OpInfo/UnaryUfuncInfo entry
246
299
  # in op_db list in torch/testing/_internal/common_methods_invocations.py
247
300
  #
248
301
  # See torch.absolute, an alias for torch.abs, as an example.
@@ -264,11 +317,13 @@
264
317
  variants: function, method
265
318
  dispatch:
266
319
  CPU, CUDA: angle
320
+ SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr
267
321
 
268
322
  - func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
269
323
  device_check: NoCheck # TensorIterator
270
324
  dispatch:
271
325
  CPU, CUDA: angle_out
326
+ SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr_out
272
327
 
273
328
  - func: view_as_real(Tensor(a) self) -> Tensor(a)
274
329
  variants: function
@@ -283,16 +338,24 @@
283
338
  - func: sgn(Tensor self) -> Tensor
284
339
  variants: function, method
285
340
  structured_delegate: sgn.out
341
+ dispatch:
342
+ SparseCPU, SparseCUDA: sgn_sparse
343
+ SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr
286
344
 
287
345
  - func: sgn_(Tensor(a!) self) -> Tensor(a!)
288
346
  variants: method
289
347
  structured_delegate: sgn.out
348
+ dispatch:
349
+ SparseCPU, SparseCUDA: sgn_sparse_
350
+ SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_
290
351
 
291
352
  - func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
292
353
  structured: True
293
354
  structured_inherits: TensorIteratorBase
294
355
  dispatch:
295
356
  CPU, CUDA: sgn_out
357
+ SparseCPU, SparseCUDA: sgn_sparse_out
358
+ SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
296
359
 
297
360
  - func: real(Tensor(a) self) -> Tensor(a)
298
361
  device_check: NoCheck # TensorIterator
@@ -315,6 +378,7 @@
315
378
  variants: function, method
316
379
  dispatch:
317
380
  CompositeExplicitAutograd: _conj_physical
381
+ SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr
318
382
 
319
383
  - func: conj_physical(Tensor self) -> Tensor
320
384
  variants: function, method
@@ -323,11 +387,13 @@
323
387
  dispatch:
324
388
  CPU, CUDA: conj_physical_out
325
389
  SparseCPU, SparseCUDA: conj_physical_out_sparse
390
+ SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
326
391
 
327
392
  - func: conj_physical_(Tensor(a!) self) -> Tensor(a!)
328
393
  variants: function, method
329
394
  dispatch:
330
395
  CompositeExplicitAutograd: conj_physical_
396
+ SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_
331
397
 
332
398
  - func: resolve_conj(Tensor(a) self) -> Tensor(a)
333
399
  variants: function, method
@@ -381,6 +447,7 @@
381
447
  SparseCPU, SparseCUDA: add_sparse
382
448
  SparseCsrCPU, SparseCsrCUDA: add_sparse_csr
383
449
  MkldnnCPU: mkldnn_add
450
+ ZeroTensor: add_zerotensor
384
451
 
385
452
  - func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
386
453
  device_check: NoCheck # TensorIterator
@@ -454,6 +521,8 @@
454
521
  dispatch:
455
522
  CPU: addmv_out_cpu
456
523
  CUDA: addmv_out_cuda
524
+ SparseCsrCPU: addmv_out_sparse_csr
525
+ SparseCsrCUDA: addmv_out_sparse_csr_cuda
457
526
 
458
527
  - func: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
459
528
  variants: function, method
@@ -532,7 +601,7 @@
532
601
 
533
602
  - func: arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
534
603
  dispatch:
535
- CPU: arange_cpu_out
604
+ CPU, Meta: arange_out
536
605
  CUDA: arange_cuda_out
537
606
 
538
607
  # This function is a temporary hack to allow tracing of arange like constructs with dynamic
@@ -588,16 +657,24 @@
588
657
  - func: asinh(Tensor self) -> Tensor
589
658
  variants: function, method
590
659
  structured_delegate: asinh.out
660
+ dispatch:
661
+ SparseCPU, SparseCUDA: asinh_sparse
662
+ SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr
591
663
 
592
664
  - func: asinh_(Tensor(a!) self) -> Tensor(a!)
593
665
  variants: function, method
594
666
  structured_delegate: asinh.out
667
+ dispatch:
668
+ SparseCPU, SparseCUDA: asinh_sparse_
669
+ SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_
595
670
 
596
671
  - func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
597
672
  structured: True
598
673
  structured_inherits: TensorIteratorBase
599
674
  dispatch:
600
675
  CPU, CUDA: asinh_out
676
+ SparseCPU, SparseCUDA: asinh_sparse_out
677
+ SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_out
601
678
 
602
679
  # arcsinh, alias for asinh
603
680
  - func: arcsinh(Tensor self) -> Tensor
@@ -611,16 +688,25 @@
611
688
  - func: atanh(Tensor self) -> Tensor
612
689
  structured_delegate: atanh.out
613
690
  variants: function, method
691
+ dispatch:
692
+ CompositeExplicitAutograd: atanh
693
+ SparseCPU, SparseCUDA: atanh_sparse
694
+ SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr
614
695
 
615
696
  - func: atanh_(Tensor(a!) self) -> Tensor(a!)
616
697
  structured_delegate: atanh.out
617
698
  variants: function, method
699
+ dispatch:
700
+ SparseCPU, SparseCUDA: atanh_sparse_
701
+ SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_
618
702
 
619
703
  - func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
620
704
  structured: True
621
705
  structured_inherits: TensorIteratorBase
622
706
  dispatch:
623
707
  CPU, CUDA: atanh_out
708
+ SparseCPU, SparseCUDA: atanh_sparse_out
709
+ SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_out
624
710
 
625
711
  # arctanh, alias for atanh
626
712
  - func: arctanh(Tensor self) -> Tensor
@@ -634,7 +720,7 @@
634
720
  - func: as_strided(Tensor(a) self, int[] size, int[] stride, int? storage_offset=None) -> Tensor(a)
635
721
  variants: function, method
636
722
  dispatch:
637
- CPU, CUDA, Meta: as_strided_tensorimpl
723
+ ZeroTensor, CPU, CUDA, Meta: as_strided_tensorimpl
638
724
  QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
639
725
  device_check: NoCheck
640
726
  device_guard: False
@@ -644,6 +730,7 @@
644
730
  variants: function, method
645
731
  device_check: NoCheck
646
732
  device_guard: False
733
+ tags: inplace_view
647
734
  dispatch:
648
735
  CompositeExplicitAutograd: as_strided_
649
736
 
@@ -653,6 +740,7 @@
653
740
  structured_delegate: asin.out
654
741
  dispatch:
655
742
  SparseCPU, SparseCUDA: asin_sparse
743
+ SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr
656
744
 
657
745
  - func: asin_(Tensor(a!) self) -> Tensor(a!)
658
746
  device_check: NoCheck # TensorIterator
@@ -660,6 +748,7 @@
660
748
  structured_delegate: asin.out
661
749
  dispatch:
662
750
  SparseCPU, SparseCUDA: asin_sparse_
751
+ SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_
663
752
 
664
753
  - func: asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
665
754
  device_check: NoCheck # TensorIterator
@@ -667,7 +756,8 @@
667
756
  structured_inherits: TensorIteratorBase
668
757
  dispatch:
669
758
  CPU, CUDA: asin_out
670
- SparseCPU, SparseCUDA: asin_out_sparse
759
+ SparseCPU, SparseCUDA: asin_sparse_out
760
+ SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_out
671
761
 
672
762
  # arcsin, alias of asin
673
763
  - func: arcsin(Tensor self) -> Tensor
@@ -682,11 +772,17 @@
682
772
  device_check: NoCheck # TensorIterator
683
773
  structured_delegate: atan.out
684
774
  variants: function, method
775
+ dispatch:
776
+ SparseCPU, SparseCUDA: atan_sparse
777
+ SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr
685
778
 
686
779
  - func: atan_(Tensor(a!) self) -> Tensor(a!)
687
780
  device_check: NoCheck # TensorIterator
688
781
  structured_delegate: atan.out
689
782
  variants: function, method
783
+ dispatch:
784
+ SparseCPU, SparseCUDA: atan_sparse_
785
+ SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_
690
786
 
691
787
  - func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
692
788
  device_check: NoCheck # TensorIterator
@@ -694,6 +790,8 @@
694
790
  structured_inherits: TensorIteratorBase
695
791
  dispatch:
696
792
  CPU, CUDA: atan_out
793
+ SparseCPU, SparseCUDA: atan_sparse_out
794
+ SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_out
697
795
 
698
796
  # arctan, alias of atan
699
797
  - func: arctan(Tensor self) -> Tensor
@@ -723,24 +821,19 @@
723
821
 
724
822
  - func: baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
725
823
  variants: function, method
726
- dispatch:
727
- CPU: baddbmm_cpu
728
- CUDA: baddbmm_cuda
824
+ structured_delegate: baddbmm.out
729
825
 
730
826
  - func: baddbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
731
827
  variants: method
732
- dispatch:
733
- CPU: baddbmm__cpu
734
- CUDA: baddbmm__cuda
735
-
736
- - func: _baddbmm_mkl_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
737
- variants: function
828
+ structured_delegate: baddbmm.out
738
829
 
739
830
  - func: baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
831
+ structured: True
740
832
  variants: function
741
833
  dispatch:
742
834
  CPU: baddbmm_out_cpu
743
835
  CUDA: baddbmm_out_cuda
836
+ SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
744
837
 
745
838
  - func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
746
839
 
@@ -788,7 +881,7 @@
788
881
  device_check: NoCheck # TensorIterator
789
882
  variants: function, method
790
883
 
791
- - func: bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias) -> Tensor
884
+ - func: bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias=None) -> Tensor
792
885
 
793
886
  - func: binary_cross_entropy(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor
794
887
  device_check: NoCheck # TensorIterator
@@ -886,10 +979,14 @@
886
979
  - func: logical_not(Tensor self) -> Tensor
887
980
  device_check: NoCheck # TensorIterator
888
981
  variants: function, method
982
+ dispatch:
983
+ CompositeExplicitAutograd: logical_not
889
984
 
890
985
  - func: logical_not_(Tensor(a!) self) -> Tensor(a!)
891
986
  device_check: NoCheck # TensorIterator
892
987
  variants: method
988
+ dispatch:
989
+ CompositeExplicitAutograd: logical_not_
893
990
 
894
991
  - func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
895
992
  device_check: NoCheck # TensorIterator
@@ -899,10 +996,14 @@
899
996
  - func: logical_xor(Tensor self, Tensor other) -> Tensor
900
997
  device_check: NoCheck # TensorIterator
901
998
  variants: function, method
999
+ dispatch:
1000
+ CompositeExplicitAutograd: logical_xor
902
1001
 
903
1002
  - func: logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!)
904
1003
  device_check: NoCheck # TensorIterator
905
1004
  variants: method
1005
+ dispatch:
1006
+ CompositeExplicitAutograd: logical_xor_
906
1007
 
907
1008
  - func: logical_xor.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
908
1009
  device_check: NoCheck # TensorIterator
@@ -912,10 +1013,14 @@
912
1013
  - func: logical_and(Tensor self, Tensor other) -> Tensor
913
1014
  device_check: NoCheck # TensorIterator
914
1015
  variants: function, method
1016
+ dispatch:
1017
+ CompositeExplicitAutograd: logical_and
915
1018
 
916
1019
  - func: logical_and_(Tensor(a!) self, Tensor other) -> Tensor(a!)
917
1020
  device_check: NoCheck # TensorIterator
918
1021
  variants: method
1022
+ dispatch:
1023
+ CompositeExplicitAutograd: logical_and_
919
1024
 
920
1025
  - func: logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
921
1026
  device_check: NoCheck # TensorIterator
@@ -925,10 +1030,14 @@
925
1030
  - func: logical_or(Tensor self, Tensor other) -> Tensor
926
1031
  device_check: NoCheck # TensorIterator
927
1032
  variants: function, method
1033
+ dispatch:
1034
+ CompositeExplicitAutograd: logical_or
928
1035
 
929
1036
  - func: logical_or_(Tensor(a!) self, Tensor other) -> Tensor(a!)
930
1037
  device_check: NoCheck # TensorIterator
931
1038
  variants: method
1039
+ dispatch:
1040
+ CompositeExplicitAutograd: logical_or_
932
1041
 
933
1042
  - func: logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
934
1043
  device_check: NoCheck # TensorIterator
@@ -940,20 +1049,21 @@
940
1049
  - func: blackman_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
941
1050
 
942
1051
  - func: bmm(Tensor self, Tensor mat2) -> Tensor
1052
+ structured_delegate: bmm.out
943
1053
  variants: function, method
944
1054
  dispatch:
945
- CPU: bmm_cpu
946
- CUDA: bmm_cuda
947
1055
  SparseCPU: bmm_sparse_cpu
948
1056
  SparseCUDA: bmm_sparse_cuda
949
1057
 
950
1058
  - func: bmm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
1059
+ structured: True
951
1060
  variants: function
952
1061
  dispatch:
953
1062
  CPU: bmm_out_cpu
954
1063
  CUDA: bmm_out_cuda
955
1064
  SparseCPU: bmm_out_sparse_cpu
956
1065
  SparseCUDA: bmm_out_sparse_cuda
1066
+ SparseCsrCUDA: bmm_out_sparse_csr_cuda
957
1067
 
958
1068
  - func: broadcast_tensors(Tensor[] tensors) -> Tensor[]
959
1069
  device_check: NoCheck
@@ -962,6 +1072,11 @@
962
1072
  - func: broadcast_to(Tensor(a) self, int[] size) -> Tensor(a)
963
1073
  variants: function, method
964
1074
 
1075
+ - func: _sparse_broadcast_to(Tensor(a) self, int[] size) -> Tensor(a)
1076
+ variants: function
1077
+ dispatch:
1078
+ SparseCPU, SparseCUDA: sparse_broadcast_to
1079
+
965
1080
  - func: cat(Tensor[] tensors, int dim=0) -> Tensor
966
1081
  dispatch:
967
1082
  CompositeExplicitAutograd: cat
@@ -992,6 +1107,8 @@
992
1107
  variants: function, method
993
1108
  dispatch:
994
1109
  CompositeExplicitAutograd: ceil
1110
+ SparseCPU, SparseCUDA: ceil_sparse
1111
+ SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr
995
1112
 
996
1113
  - func: ceil_(Tensor(a!) self) -> Tensor(a!)
997
1114
  device_check: NoCheck # TensorIterator
@@ -999,6 +1116,8 @@
999
1116
  variants: function, method
1000
1117
  dispatch:
1001
1118
  CompositeExplicitAutograd: ceil_
1119
+ SparseCPU, SparseCUDA: ceil_sparse_
1120
+ SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_
1002
1121
 
1003
1122
  - func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
1004
1123
  device_check: NoCheck # TensorIterator
@@ -1006,6 +1125,8 @@
1006
1125
  structured_inherits: TensorIteratorBase
1007
1126
  dispatch:
1008
1127
  CPU, CUDA: ceil_out
1128
+ SparseCPU, SparseCUDA: ceil_sparse_out
1129
+ SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_out
1009
1130
 
1010
1131
  # alias for torch.linalg.multi_dot
1011
1132
  - func: chain_matmul(Tensor[] matrices) -> Tensor
@@ -1019,18 +1140,18 @@
1019
1140
  device_check: NoCheck
1020
1141
  device_guard: False
1021
1142
 
1022
- - func: chunk(Tensor(a) self, int chunks, int dim=0) -> Tensor(a)[]
1143
+ - func: chunk(Tensor(a -> *) self, int chunks, int dim=0) -> Tensor(a)[]
1023
1144
  variants: function, method
1024
1145
  device_check: NoCheck
1025
1146
  device_guard: False
1026
1147
 
1027
- - func: tensor_split.sections(Tensor(a) self, int sections, int dim=0) -> Tensor(a)[]
1148
+ - func: tensor_split.sections(Tensor(a -> *) self, int sections, int dim=0) -> Tensor(a)[]
1028
1149
  variants: function, method
1029
1150
 
1030
- - func: tensor_split.indices(Tensor(a) self, int[] indices, int dim=0) -> Tensor(a)[]
1151
+ - func: tensor_split.indices(Tensor(a -> *) self, int[] indices, int dim=0) -> Tensor(a)[]
1031
1152
  variants: function, method
1032
1153
 
1033
- - func: tensor_split.tensor_indices_or_sections(Tensor(a) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[]
1154
+ - func: tensor_split.tensor_indices_or_sections(Tensor(a -> *) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[]
1034
1155
  variants: function, method
1035
1156
 
1036
1157
  - func: clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor
@@ -1186,6 +1307,12 @@
1186
1307
  manual_cpp_binding: True
1187
1308
 
1188
1309
  - func: convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
1310
+ dispatch:
1311
+ CompositeExplicitAutograd: convolution
1312
+
1313
+ - func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, int[]? bias_sizes, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1314
+ dispatch:
1315
+ CompositeExplicitAutograd, CUDA: convolution_backward
1189
1316
 
1190
1317
  - func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
1191
1318
  dispatch:
@@ -1196,14 +1323,14 @@
1196
1323
  CompositeExplicitAutograd: convolution_backward_overrideable
1197
1324
 
1198
1325
  - func: _convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
1326
+ dispatch:
1327
+ CompositeExplicitAutograd: _convolution
1199
1328
 
1200
1329
  - func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
1201
1330
 
1202
1331
  - func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[] dilation, int groups) -> Tensor
1203
1332
 
1204
- - func: _convolution_nogroup(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding) -> Tensor
1205
-
1206
- - func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1333
+ - func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1207
1334
 
1208
1335
  - func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, int[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor
1209
1336
 
@@ -1239,7 +1366,9 @@
1239
1366
  device_guard: False
1240
1367
  dispatch:
1241
1368
  MkldnnCPU: copy_mkldnn_
1369
+ SparseCPU, SparseCUDA, SparseHIP: copy_sparse_wrapper_
1242
1370
  CompositeExplicitAutograd: copy_
1371
+ SparseCsrCPU, SparseCsrCUDA: copy_sparse_csr_
1243
1372
 
1244
1373
  - func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
1245
1374
  dispatch: {}
@@ -1320,56 +1449,14 @@
1320
1449
  dispatch:
1321
1450
  CUDA: cudnn_batch_norm_backward
1322
1451
 
1323
- - func: cudnn_convolution.deprecated(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
1324
- dispatch:
1325
- CUDA: cudnn_convolution_deprecated
1326
-
1327
- - func: cudnn_convolution.deprecated2(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
1328
- dispatch:
1329
- CUDA: cudnn_convolution_deprecated2
1330
-
1331
1452
  - func: cudnn_convolution(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1332
1453
  dispatch:
1333
1454
  CUDA: cudnn_convolution
1334
1455
 
1335
- - func: cudnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1336
- dispatch:
1337
- CUDA: cudnn_convolution_backward_input
1338
-
1339
- - func: cudnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32, bool[2] output_mask) -> (Tensor, Tensor)
1340
- dispatch:
1341
- CUDA: cudnn_convolution_backward
1342
-
1343
- - func: cudnn_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1344
- dispatch:
1345
- CUDA: cudnn_convolution_backward_weight
1346
-
1347
- - func: cudnn_convolution_transpose.deprecated(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
1348
- dispatch:
1349
- CUDA: cudnn_convolution_transpose_deprecated
1350
-
1351
- - func: cudnn_convolution_transpose.deprecated2(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
1352
- dispatch:
1353
- CUDA: cudnn_convolution_transpose_deprecated2
1354
-
1355
1456
  - func: cudnn_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1356
1457
  dispatch:
1357
1458
  CUDA: cudnn_convolution_transpose
1358
1459
 
1359
- # NB: output_padding not strictly needed here, but it's helpful for the float
1360
- # backwards
1361
- - func: cudnn_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32, bool[2] output_mask) -> (Tensor, Tensor)
1362
- dispatch:
1363
- CUDA: cudnn_convolution_transpose_backward
1364
-
1365
- - func: cudnn_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1366
- dispatch:
1367
- CUDA: cudnn_convolution_transpose_backward_input
1368
-
1369
- - func: cudnn_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1370
- dispatch:
1371
- CUDA: cudnn_convolution_transpose_backward_weight
1372
-
1373
1460
  - func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
1374
1461
  dispatch:
1375
1462
  CUDA: cudnn_convolution_relu
@@ -1516,6 +1603,8 @@
1516
1603
 
1517
1604
  - func: diag_embed(Tensor self, int offset=0, int dim1=-2, int dim2=-1) -> Tensor
1518
1605
  variants: function, method
1606
+ dispatch:
1607
+ CompositeExplicitAutograd: diag_embed
1519
1608
 
1520
1609
  - func: diagflat(Tensor self, int offset=0) -> Tensor
1521
1610
  variants: function, method
@@ -1525,6 +1614,10 @@
1525
1614
  dispatch:
1526
1615
  CompositeExplicitAutograd: diagonal
1527
1616
 
1617
+ - func: linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a)
1618
+ python_module: linalg
1619
+ variants: function
1620
+
1528
1621
  - func: diagonal.Dimname(Tensor(a) self, *, Dimname outdim, Dimname dim1, Dimname dim2, int offset=0) -> Tensor(a)
1529
1622
  variants: function, method
1530
1623
 
@@ -1571,6 +1664,7 @@
1571
1664
  structured_delegate: div.out
1572
1665
  dispatch:
1573
1666
  SparseCPU, SparseCUDA: div_sparse
1667
+ ZeroTensor: div_zerotensor
1574
1668
 
1575
1669
  - func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
1576
1670
  device_check: NoCheck # TensorIterator
@@ -1781,12 +1875,19 @@
1781
1875
  Meta: empty_meta
1782
1876
  MkldnnCPU: empty_mkldnn
1783
1877
  SparseCPU, SparseCUDA: empty_sparse
1878
+ SparseCsrCPU, SparseCsrCUDA: empty_sparse_csr
1784
1879
 
1880
+ # We do not make new_empty a composite that calls into new_empty_strided, as the strided version
1881
+ # is significantly more difficult to implement by different backends
1785
1882
  - func: new_empty(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
1786
1883
  variants: method
1884
+ dispatch:
1885
+ CompositeExplicitAutograd: new_empty
1787
1886
 
1788
1887
  - func: new_empty_strided(Tensor self, int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
1789
1888
  variants: method
1889
+ dispatch:
1890
+ CompositeExplicitAutograd: new_empty_strided
1790
1891
 
1791
1892
  - func: new_full(Tensor self, int[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
1792
1893
  variants: method
@@ -1820,6 +1921,7 @@
1820
1921
  CPU, Meta: resize_
1821
1922
  CUDA: resize_cuda_
1822
1923
  QuantizedCPU: quantized_resize_cpu_
1924
+ SparseCsrCPU, SparseCsrCUDA: resize_sparse_csr_
1823
1925
 
1824
1926
  - func: empty_quantized(int[] size, Tensor qtensor, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
1825
1927
  category_override: factory
@@ -1834,6 +1936,10 @@
1834
1936
  - func: empty_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
1835
1937
  device_check: NoCheck
1836
1938
  device_guard: False
1939
+ dispatch:
1940
+ CompositeExplicitAutograd: empty_like
1941
+ SparseCPU, SparseCUDA: empty_like_sparse_coo
1942
+ SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
1837
1943
 
1838
1944
  - func: empty_strided(int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
1839
1945
  dispatch:
@@ -1845,11 +1951,17 @@
1845
1951
  device_check: NoCheck # TensorIterator
1846
1952
  structured_delegate: erf.out
1847
1953
  variants: function, method
1954
+ dispatch:
1955
+ SparseCPU, SparseCUDA: erf_sparse
1956
+ SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr
1848
1957
 
1849
1958
  - func: erf_(Tensor(a!) self) -> Tensor(a!)
1850
1959
  device_check: NoCheck # TensorIterator
1851
1960
  structured_delegate: erf.out
1852
1961
  variants: function, method
1962
+ dispatch:
1963
+ SparseCPU, SparseCUDA: erf_sparse_
1964
+ SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_
1853
1965
 
1854
1966
  - func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
1855
1967
  device_check: NoCheck # TensorIterator
@@ -1857,6 +1969,8 @@
1857
1969
  structured_inherits: TensorIteratorBase
1858
1970
  dispatch:
1859
1971
  CPU, CUDA: erf_out
1972
+ SparseCPU, SparseCUDA: erf_sparse_out
1973
+ SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_out
1860
1974
 
1861
1975
  - func: erfc(Tensor self) -> Tensor
1862
1976
  device_check: NoCheck # TensorIterator
@@ -1910,11 +2024,17 @@
1910
2024
  device_check: NoCheck # TensorIterator
1911
2025
  structured_delegate: expm1.out
1912
2026
  variants: function, method
2027
+ dispatch:
2028
+ SparseCPU, SparseCUDA: expm1_sparse
2029
+ SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
1913
2030
 
1914
2031
  - func: expm1_(Tensor(a!) self) -> Tensor(a!)
1915
2032
  device_check: NoCheck # TensorIterator
1916
2033
  structured_delegate: expm1.out
1917
2034
  variants: function, method
2035
+ dispatch:
2036
+ SparseCPU, SparseCUDA: expm1_sparse_
2037
+ SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_
1918
2038
 
1919
2039
  - func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
1920
2040
  device_check: NoCheck # TensorIterator
@@ -1922,6 +2042,8 @@
1922
2042
  structured_inherits: TensorIteratorBase
1923
2043
  dispatch:
1924
2044
  CPU, CUDA: expm1_out
2045
+ SparseCPU, SparseCUDA: expm1_sparse_out
2046
+ SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_out
1925
2047
 
1926
2048
  - func: expand(Tensor(a) self, int[] size, *, bool implicit=False) -> Tensor(a)
1927
2049
  variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
@@ -1971,14 +2093,16 @@
1971
2093
  device_check: NoCheck # TensorIterator
1972
2094
  variants: function, method
1973
2095
  dispatch:
1974
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: fill_
2096
+ CPU, CUDA: fill_
2097
+ QuantizedCPU, QuantizedCUDA: fill_quantized_
1975
2098
  Meta: fill_meta_
1976
2099
 
1977
2100
  - func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
1978
2101
  device_check: NoCheck # TensorIterator
1979
2102
  variants: function, method
1980
2103
  dispatch:
1981
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: fill_
2104
+ CPU, CUDA: fill_
2105
+ QuantizedCPU, QuantizedCUDA: fill_quantized_
1982
2106
  Meta: fill_meta_
1983
2107
 
1984
2108
  - func: floor(Tensor self) -> Tensor
@@ -1987,6 +2111,8 @@
1987
2111
  variants: function, method
1988
2112
  dispatch:
1989
2113
  CompositeExplicitAutograd: floor
2114
+ SparseCPU, SparseCUDA: floor_sparse
2115
+ SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr
1990
2116
 
1991
2117
  - func: floor_(Tensor(a!) self) -> Tensor(a!)
1992
2118
  device_check: NoCheck # TensorIterator
@@ -1994,6 +2120,8 @@
1994
2120
  variants: function, method
1995
2121
  dispatch:
1996
2122
  CompositeExplicitAutograd: floor_
2123
+ SparseCPU, SparseCUDA: floor_sparse_
2124
+ SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_
1997
2125
 
1998
2126
  - func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
1999
2127
  device_check: NoCheck # TensorIterator
@@ -2001,6 +2129,8 @@
2001
2129
  structured_inherits: TensorIteratorBase
2002
2130
  dispatch:
2003
2131
  CPU, CUDA: floor_out
2132
+ SparseCPU, SparseCUDA: floor_sparse_out
2133
+ SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_out
2004
2134
 
2005
2135
  - func: floor_divide(Tensor self, Tensor other) -> Tensor
2006
2136
  device_check: NoCheck # TensorIterator
@@ -2108,10 +2238,13 @@
2108
2238
 
2109
2239
  - func: grid_sampler_2d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor
2110
2240
  dispatch:
2111
- CPU: grid_sampler_2d_cpu
2241
+ CPU, QuantizedCPU: grid_sampler_2d_cpu
2112
2242
  CUDA: grid_sampler_2d_cuda
2113
2243
 
2114
- - func: grid_sampler_2d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> (Tensor, Tensor)
2244
+ # `grid_sampler_2d_backward` takes in `output_mask` to optimize performance for
2245
+ # the case where `input` doesn't require gradient. Gradient for `grid` is always
2246
+ # computed (only `output_mask[0]` is checked by the implementations).
2247
+ - func: grid_sampler_2d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask) -> (Tensor, Tensor)
2115
2248
  dispatch:
2116
2249
  CPU: grid_sampler_2d_backward_cpu
2117
2250
  CUDA: grid_sampler_2d_backward_cuda
@@ -2229,6 +2362,8 @@
2229
2362
 
2230
2363
  - func: index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor
2231
2364
  variants: function, method
2365
+ dispatch:
2366
+ CompositeExplicitAutograd: index_copy
2232
2367
 
2233
2368
  - func: index_copy_.dimname(Tensor(a!) self, Dimname dim, Tensor index, Tensor source) -> Tensor(a!)
2234
2369
  variants: method
@@ -2250,6 +2385,8 @@
2250
2385
  - func: index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor
2251
2386
  device_check: NoCheck # delegate to _index_put_impl_ after clone, which leverages TensorIterator
2252
2387
  variants: function, method
2388
+ dispatch:
2389
+ CompositeExplicitAutograd: index_put
2253
2390
 
2254
2391
  - func: _index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!)
2255
2392
  device_check: NoCheck # TensorIterator
@@ -2269,12 +2406,6 @@
2269
2406
  dispatch:
2270
2407
  CompositeExplicitAutograd: inverse_out
2271
2408
 
2272
- - func: _inverse_helper(Tensor self) -> Tensor
2273
- variants: function
2274
- dispatch:
2275
- CPU: _inverse_helper_cpu
2276
- CUDA: _inverse_helper_cuda
2277
-
2278
2409
  - func: isclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> Tensor
2279
2410
  variants: function, method
2280
2411
 
@@ -2315,6 +2446,7 @@
2315
2446
  dispatch:
2316
2447
  CPU, CUDA: isnan
2317
2448
  SparseCPU, SparseCUDA: isnan_sparse
2449
+ SparseCsrCPU, SparseCsrCUDA: isnan_sparse_csr
2318
2450
 
2319
2451
  - func: is_distributed(Tensor self) -> bool
2320
2452
  variants: function, method
@@ -2338,6 +2470,11 @@
2338
2470
  device_guard: False
2339
2471
  manual_cpp_binding: True
2340
2472
 
2473
+ - func: _is_zerotensor(Tensor self) -> bool
2474
+ variants: function, method
2475
+ device_guard: False
2476
+ manual_cpp_binding: True
2477
+
2341
2478
  - func: is_neg(Tensor self) -> bool
2342
2479
  variants: function, method
2343
2480
  device_guard: False
@@ -2405,6 +2542,11 @@
2405
2542
  CUDA: layer_norm_cuda
2406
2543
  CompositeImplicitAutograd: math_native_layer_norm
2407
2544
 
2545
+ - func: _native_multi_head_self_attention(Tensor query, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor
2546
+ dispatch:
2547
+ CPU: multi_head_self_attention_cpu
2548
+ CUDA: multi_head_self_attention_cuda
2549
+
2408
2550
  - func: native_layer_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
2409
2551
  dispatch:
2410
2552
  CPU: layer_norm_backward_cpu
@@ -2414,15 +2556,18 @@
2414
2556
  variants: function, method
2415
2557
  dispatch:
2416
2558
  CompositeExplicitAutograd: nan_to_num
2559
+ SparseCPU, SparseCUDA: nan_to_num_sparse
2417
2560
 
2418
2561
  - func: nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!)
2419
2562
  variants: function, method
2420
2563
  dispatch:
2421
2564
  CompositeExplicitAutograd: nan_to_num_
2565
+ SparseCPU, SparseCUDA: nan_to_num_sparse_
2422
2566
 
2423
2567
  - func: nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)
2424
2568
  dispatch:
2425
2569
  CPU, CUDA: nan_to_num_out
2570
+ SparseCPU, SparseCUDA: nan_to_num_sparse_out
2426
2571
 
2427
2572
  - func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
2428
2573
  python_module: nn
@@ -2471,11 +2616,11 @@
2471
2616
 
2472
2617
  - func: ldexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
2473
2618
 
2474
- - func: linspace(Scalar start, Scalar end, int? steps=None, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
2619
+ - func: linspace(Scalar start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
2475
2620
 
2476
- - func: linspace.out(Scalar start, Scalar end, int? steps=None, *, Tensor(a!) out) -> Tensor(a!)
2621
+ - func: linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
2477
2622
  dispatch:
2478
- CPU: linspace_cpu_out
2623
+ CPU, Meta: linspace_out
2479
2624
  CUDA: linspace_cuda_out
2480
2625
 
2481
2626
  - func: log(Tensor self) -> Tensor
@@ -2499,6 +2644,8 @@
2499
2644
  device_check: NoCheck # TensorIterator
2500
2645
  structured_delegate: log10.out
2501
2646
  variants: function, method
2647
+ dispatch:
2648
+ CompositeExplicitAutograd: log10
2502
2649
 
2503
2650
  - func: log10_(Tensor(a!) self) -> Tensor(a!)
2504
2651
  device_check: NoCheck # TensorIterator
@@ -2518,6 +2665,7 @@
2518
2665
  variants: function, method
2519
2666
  dispatch:
2520
2667
  SparseCPU, SparseCUDA: log1p_sparse
2668
+ SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
2521
2669
 
2522
2670
  - func: log1p_(Tensor(a!) self) -> Tensor(a!)
2523
2671
  device_check: NoCheck # TensorIterator
@@ -2525,6 +2673,7 @@
2525
2673
  variants: function, method
2526
2674
  dispatch:
2527
2675
  SparseCPU, SparseCUDA: log1p_sparse_
2676
+ SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_
2528
2677
 
2529
2678
  - func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
2530
2679
  device_check: NoCheck # TensorIterator
@@ -2532,7 +2681,8 @@
2532
2681
  structured_inherits: TensorIteratorBase
2533
2682
  dispatch:
2534
2683
  CPU, CUDA: log1p_out
2535
- SparseCPU, SparseCUDA: log1p_out_sparse
2684
+ SparseCPU, SparseCUDA: log1p_sparse_out
2685
+ SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_out
2536
2686
 
2537
2687
  - func: log2(Tensor self) -> Tensor
2538
2688
  device_check: NoCheck # TensorIterator
@@ -2630,11 +2780,11 @@
2630
2780
  dispatch:
2631
2781
  CompositeExplicitAutograd: logdet
2632
2782
 
2633
- - func: logspace(Scalar start, Scalar end, int? steps=None, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
2783
+ - func: logspace(Scalar start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
2634
2784
 
2635
- - func: logspace.out(Scalar start, Scalar end, int? steps=None, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
2785
+ - func: logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
2636
2786
  dispatch:
2637
- CPU: logspace_cpu_out
2787
+ CPU, Meta: logspace_out
2638
2788
  CUDA: logspace_cuda_out
2639
2789
 
2640
2790
  # log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
@@ -2653,10 +2803,10 @@
2653
2803
  CPU: log_softmax_cpu_out
2654
2804
  CUDA: log_softmax_cuda_out
2655
2805
 
2656
- - func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
2806
+ - func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
2657
2807
  structured_delegate: _log_softmax_backward_data.out
2658
2808
 
2659
- - func: _log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!)
2809
+ - func: _log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) out) -> Tensor(a!)
2660
2810
  structured: True
2661
2811
  dispatch:
2662
2812
  CPU: log_softmax_backward_cpu_out
@@ -2722,11 +2872,11 @@
2722
2872
  # Alias to linalg.matrix_power
2723
2873
  - func: matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)
2724
2874
 
2875
+ # Alias to linalg.matrix_exp
2725
2876
  - func: matrix_exp(Tensor self) -> Tensor
2726
2877
  variants: function, method
2727
- dispatch:
2728
- CPU, CUDA: matrix_exp
2729
2878
 
2879
+ # This function should be deprecated in favor of differential_analytic_matrix_function in FunctionsManual.cpp
2730
2880
  - func: matrix_exp_backward(Tensor self, Tensor grad) -> Tensor
2731
2881
 
2732
2882
  # DEPRECATED: Use torch.aminmax instead
@@ -2760,12 +2910,16 @@
2760
2910
 
2761
2911
  - func: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
2762
2912
  device_check: NoCheck # TensorIterator
2913
+ structured_delegate: max.dim_max
2763
2914
  variants: function, method
2764
2915
  dispatch:
2765
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: max
2916
+ QuantizedCPU, QuantizedCUDA: qmax
2766
2917
 
2767
2918
  - func: max.dim_max(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)
2768
2919
  device_check: NoCheck # TensorIterator
2920
+ structured: True
2921
+ precomputed:
2922
+ - dim -> int dim
2769
2923
  dispatch:
2770
2924
  CPU, CUDA: max_out
2771
2925
 
@@ -2903,12 +3057,16 @@
2903
3057
 
2904
3058
  - func: min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
2905
3059
  device_check: NoCheck # TensorIterator
3060
+ structured_delegate: min.dim_min
2906
3061
  variants: function, method
2907
3062
  dispatch:
2908
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: min
3063
+ QuantizedCPU, QuantizedCUDA: qmin
2909
3064
 
2910
3065
  - func: min.dim_min(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)
2911
3066
  device_check: NoCheck # TensorIterator
3067
+ structured: True
3068
+ precomputed:
3069
+ - dim -> int dim
2912
3070
  dispatch:
2913
3071
  CPU, CUDA: min_out
2914
3072
 
@@ -2932,14 +3090,6 @@
2932
3090
  dispatch:
2933
3091
  CompositeExplicitAutograd: mkldnn_convolution
2934
3092
 
2935
- - func: mkldnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> Tensor
2936
-
2937
- - func: mkldnn_convolution_backward_weights(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> (Tensor, Tensor)
2938
-
2939
- - func: mkldnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
2940
- dispatch:
2941
- CompositeExplicitAutograd: mkldnn_convolution_backward
2942
-
2943
3093
  - func: miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor)
2944
3094
  dispatch:
2945
3095
  CUDA: miopen_batch_norm
@@ -2952,56 +3102,14 @@
2952
3102
  dispatch:
2953
3103
  CUDA: miopen_convolution
2954
3104
 
2955
- - func: miopen_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2956
- dispatch:
2957
- CUDA: miopen_convolution_backward_input
2958
-
2959
- - func: miopen_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
2960
- dispatch:
2961
- CUDA: miopen_convolution_backward
2962
-
2963
- - func: miopen_convolution_backward_bias(Tensor grad_output) -> Tensor
2964
- dispatch:
2965
- CUDA: miopen_convolution_backward_bias
2966
-
2967
- - func: miopen_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2968
- dispatch:
2969
- CUDA: miopen_convolution_backward_weight
2970
-
2971
3105
  - func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2972
3106
  dispatch:
2973
3107
  CUDA: miopen_convolution_transpose
2974
3108
 
2975
- # NB: output_padding not strictly needed here, but it's helpful for the float
2976
- # backwards
2977
- - func: miopen_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
2978
- dispatch:
2979
- CUDA: miopen_convolution_transpose_backward
2980
-
2981
- - func: miopen_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2982
- dispatch:
2983
- CUDA: miopen_convolution_transpose_backward_input
2984
-
2985
- - func: miopen_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2986
- dispatch:
2987
- CUDA: miopen_convolution_transpose_backward_weight
2988
-
2989
3109
  - func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2990
3110
  dispatch:
2991
3111
  CUDA: miopen_depthwise_convolution
2992
3112
 
2993
- - func: miopen_depthwise_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2994
- dispatch:
2995
- CUDA: miopen_depthwise_convolution_backward_input
2996
-
2997
- - func: miopen_depthwise_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
2998
- dispatch:
2999
- CUDA: miopen_depthwise_convolution_backward
3000
-
3001
- - func: miopen_depthwise_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
3002
- dispatch:
3003
- CUDA: miopen_depthwise_convolution_backward_weight
3004
-
3005
3113
  - func: miopen_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
3006
3114
  dispatch:
3007
3115
  CUDA: miopen_rnn
@@ -3014,7 +3122,8 @@
3014
3122
  structured_delegate: mm.out
3015
3123
  variants: function, method
3016
3124
  dispatch:
3017
- SparseCPU, SparseCUDA, SparseCsrCPU, SparseCsrCUDA: _sparse_mm
3125
+ SparseCPU, SparseCUDA: _sparse_mm
3126
+ SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm
3018
3127
 
3019
3128
  - func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
3020
3129
  structured: True
@@ -3057,6 +3166,7 @@
3057
3166
  dispatch:
3058
3167
  SparseCPU, SparseCUDA: mul_sparse
3059
3168
  MkldnnCPU: mkldnn_mul
3169
+ ZeroTensor: mul_zerotensor
3060
3170
 
3061
3171
  - func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
3062
3172
  device_check: NoCheck # TensorIterator
@@ -3107,8 +3217,8 @@
3107
3217
  - func: mv(Tensor self, Tensor vec) -> Tensor
3108
3218
  variants: function, method
3109
3219
  dispatch:
3110
- CPU, CUDA: mv
3111
- SparseCPU, SparseCUDA, SparseCsrCPU, SparseCsrCUDA: mv_sparse
3220
+ CompositeExplicitAutograd: mv
3221
+ SparseCPU, SparseCUDA: mv_sparse
3112
3222
 
3113
3223
  - func: mv.out(Tensor self, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)
3114
3224
  dispatch:
@@ -3210,15 +3320,6 @@
3210
3320
  dispatch:
3211
3321
  CompositeExplicitAutograd: _nnpack_spatial_convolution
3212
3322
 
3213
- - func: _nnpack_spatial_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[2] padding, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
3214
- variants: function
3215
-
3216
- - func: _nnpack_spatial_convolution_backward_input(Tensor input, Tensor grad_output, Tensor weight, int[2] padding) -> Tensor
3217
- variants: function
3218
-
3219
- - func: _nnpack_spatial_convolution_backward_weight(Tensor input, int[] weightsize, Tensor grad_output, int[2] padding) -> Tensor
3220
- variants: function
3221
-
3222
3323
  - func: ones.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3223
3324
  device_check: NoCheck
3224
3325
  device_guard: False
@@ -3286,6 +3387,21 @@
3286
3387
  - func: numpy_T(Tensor(a) self) -> Tensor(a)
3287
3388
  variants: method
3288
3389
 
3390
+ # Exposed on Python as an attribute 'H'
3391
+ - func: matrix_H(Tensor(a) self) -> Tensor(a)
3392
+ variants: method
3393
+
3394
+ # Exposed on Python as an attribute 'mT'
3395
+ - func: mT(Tensor(a) self) -> Tensor(a)
3396
+ variants: method
3397
+
3398
+ # Exposed on Python as an attribute 'mH'
3399
+ - func: mH(Tensor(a) self) -> Tensor(a)
3400
+ variants: method
3401
+
3402
+ - func: adjoint(Tensor(a) self) -> Tensor(a)
3403
+ variants: function, method
3404
+
3289
3405
  - func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
3290
3406
 
3291
3407
  - func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
@@ -3295,6 +3411,11 @@
3295
3411
  CPU: channel_shuffle
3296
3412
  QuantizedCPU: channel_shuffle_quantized_cpu
3297
3413
 
3414
+ - func: native_channel_shuffle(Tensor self, int groups) -> Tensor
3415
+ dispatch:
3416
+ CPU: channel_shuffle_cpu
3417
+ CompositeImplicitAutograd: math_channel_shuffle
3418
+
3298
3419
  - func: is_pinned(Tensor self, Device? device=None) -> bool
3299
3420
  variants: method
3300
3421
  dispatch:
@@ -3321,15 +3442,18 @@
3321
3442
  variants: function, method
3322
3443
  dispatch:
3323
3444
  CompositeExplicitAutograd: rad2deg
3445
+ SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr
3324
3446
 
3325
3447
  - func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
3326
3448
  variants: function, method
3327
3449
  dispatch:
3328
3450
  CompositeExplicitAutograd: rad2deg_
3451
+ SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_
3329
3452
 
3330
3453
  - func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
3331
3454
  dispatch:
3332
3455
  CompositeExplicitAutograd: rad2deg_out
3456
+ SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_out
3333
3457
 
3334
3458
  - func: deg2rad(Tensor self) -> Tensor
3335
3459
  variants: function, method
@@ -3420,7 +3544,7 @@
3420
3544
 
3421
3545
  - func: range.out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
3422
3546
  dispatch:
3423
- CPU: range_cpu_out
3547
+ CPU, Meta: range_out
3424
3548
  CUDA: range_cuda_out
3425
3549
 
3426
3550
  - func: ravel(Tensor(a) self) -> Tensor(a)
@@ -3449,6 +3573,7 @@
3449
3573
  variants: function, method
3450
3574
  dispatch:
3451
3575
  SparseCPU, SparseCUDA: neg_sparse
3576
+ SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr
3452
3577
 
3453
3578
  - func: neg_(Tensor(a!) self) -> Tensor(a!)
3454
3579
  device_check: NoCheck # TensorIterator
@@ -3456,6 +3581,7 @@
3456
3581
  variants: function, method
3457
3582
  dispatch:
3458
3583
  SparseCPU, SparseCUDA: neg_sparse_
3584
+ SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_
3459
3585
 
3460
3586
  - func: neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
3461
3587
  device_check: NoCheck # TensorIterator
@@ -3464,6 +3590,7 @@
3464
3590
  dispatch:
3465
3591
  CPU, CUDA: neg_out
3466
3592
  SparseCPU, SparseCUDA: neg_out_sparse
3593
+ SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_out
3467
3594
 
3468
3595
  # Alias for neg
3469
3596
  - func: negative(Tensor self) -> Tensor
@@ -3504,7 +3631,7 @@
3504
3631
  device_check: NoCheck
3505
3632
  device_guard: False
3506
3633
  dispatch:
3507
- CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: _reshape_alias
3634
+ CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor: _reshape_alias
3508
3635
  # We don't need to support mkldnn since this is handled explicitly by the reshape operator.
3509
3636
 
3510
3637
  - func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
@@ -3522,11 +3649,17 @@
3522
3649
  device_check: NoCheck # TensorIterator
3523
3650
  structured_delegate: round.out
3524
3651
  variants: function, method
3652
+ dispatch:
3653
+ SparseCPU, SparseCUDA: round_sparse
3654
+ SparseCsrCPU, SparseCsrCUDA: round_sparse_csr
3525
3655
 
3526
3656
  - func: round_(Tensor(a!) self) -> Tensor(a!)
3527
3657
  device_check: NoCheck # TensorIterator
3528
3658
  structured_delegate: round.out
3529
3659
  variants: function, method
3660
+ dispatch:
3661
+ SparseCPU, SparseCUDA: round_sparse_
3662
+ SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_
3530
3663
 
3531
3664
  - func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
3532
3665
  device_check: NoCheck # TensorIterator
@@ -3535,6 +3668,26 @@
3535
3668
  dispatch:
3536
3669
  CPU: round_out
3537
3670
  CUDA: round_out
3671
+ SparseCPU, SparseCUDA: round_sparse_out
3672
+ SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_out
3673
+
3674
+ - func: round.decimals(Tensor self, *, int decimals) -> Tensor
3675
+ device_check: NoCheck # TensorIterator
3676
+ structured_delegate: round.decimals_out
3677
+ variants: function, method
3678
+
3679
+ - func: round_.decimals(Tensor(a!) self, *, int decimals) -> Tensor(a!)
3680
+ device_check: NoCheck # TensorIterator
3681
+ structured_delegate: round.decimals_out
3682
+ variants: function, method
3683
+
3684
+ - func: round.decimals_out(Tensor self, *, int decimals, Tensor(a!) out) -> Tensor(a!)
3685
+ device_check: NoCheck # TensorIterator
3686
+ structured: True
3687
+ structured_inherits: TensorIteratorBase
3688
+ dispatch:
3689
+ CPU: round_decimals_out
3690
+ CUDA: round_decimals_out
3538
3691
 
3539
3692
  - func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
3540
3693
  device_check: NoCheck # TensorIterator
@@ -3591,6 +3744,7 @@
3591
3744
  python_module: nn
3592
3745
  dispatch:
3593
3746
  MkldnnCPU: mkldnn_gelu
3747
+ QuantizedCPU: gelu_quantized_cpu
3594
3748
 
3595
3749
  - func: gelu_backward.grad_input(Tensor grad, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
3596
3750
  structured: True
@@ -3783,11 +3937,17 @@
3783
3937
  device_check: NoCheck # TensorIterator
3784
3938
  structured_delegate: sin.out
3785
3939
  variants: function, method
3940
+ dispatch:
3941
+ SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
3942
+ SparseCPU, SparseCUDA: sin_sparse
3786
3943
 
3787
3944
  - func: sin_(Tensor(a!) self) -> Tensor(a!)
3788
3945
  device_check: NoCheck # TensorIterator
3789
3946
  structured_delegate: sin.out
3790
3947
  variants: function, method
3948
+ dispatch:
3949
+ SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_
3950
+ SparseCPU, SparseCUDA: sin_sparse_
3791
3951
 
3792
3952
  - func: sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
3793
3953
  device_check: NoCheck # TensorIterator
@@ -3795,6 +3955,8 @@
3795
3955
  structured_inherits: TensorIteratorBase
3796
3956
  dispatch:
3797
3957
  CPU, CUDA: sin_out
3958
+ SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_out
3959
+ SparseCPU, SparseCUDA: sin_sparse_out
3798
3960
 
3799
3961
  - func: sinc(Tensor self) -> Tensor
3800
3962
  structured_delegate: sinc.out
@@ -3814,11 +3976,17 @@
3814
3976
  device_check: NoCheck # TensorIterator
3815
3977
  structured_delegate: sinh.out
3816
3978
  variants: function, method
3979
+ dispatch:
3980
+ SparseCPU, SparseCUDA: sinh_sparse
3981
+ SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr
3817
3982
 
3818
3983
  - func: sinh_(Tensor(a!) self) -> Tensor(a!)
3819
3984
  device_check: NoCheck # TensorIterator
3820
3985
  structured_delegate: sinh.out
3821
3986
  variants: function, method
3987
+ dispatch:
3988
+ SparseCPU, SparseCUDA: sinh_sparse_
3989
+ SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_
3822
3990
 
3823
3991
  - func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
3824
3992
  device_check: NoCheck # TensorIterator
@@ -3826,6 +3994,8 @@
3826
3994
  structured_inherits: TensorIteratorBase
3827
3995
  dispatch:
3828
3996
  CPU, CUDA: sinh_out
3997
+ SparseCPU, SparseCUDA: sinh_sparse_out
3998
+ SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_out
3829
3999
 
3830
4000
  # Returns a copy of this `Variable` that is detached from its autograd graph.
3831
4001
  # This method is OK to call if the `Variable` is a view.
@@ -3848,6 +4018,7 @@
3848
4018
  # this. If this `Variable` is a view, throws an `std::runtime_error()`.
3849
4019
  - func: detach_(Tensor(a!) self) -> Tensor(a!)
3850
4020
  variants: function, method
4021
+ tags: inplace_view
3851
4022
  dispatch:
3852
4023
  CompositeExplicitAutograd: detach_
3853
4024
 
@@ -3876,6 +4047,27 @@
3876
4047
  dispatch:
3877
4048
  CompositeExplicitAutograd: slice_backward
3878
4049
 
4050
+ - func: slice_scatter(Tensor self, Tensor src, int dim=0, int? start=None, int? end=None, int step=1) -> Tensor
4051
+ variants: function, method
4052
+ device_check: NoCheck
4053
+ device_guard: False
4054
+ dispatch:
4055
+ CompositeExplicitAutograd: slice_scatter
4056
+
4057
+ - func: select_scatter(Tensor self, Tensor src, int dim, int index) -> Tensor
4058
+ variants: function, method
4059
+ device_check: NoCheck
4060
+ device_guard: False
4061
+ dispatch:
4062
+ CompositeExplicitAutograd: select_scatter
4063
+
4064
+ - func: diagonal_scatter(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1) -> Tensor
4065
+ variants: function, method
4066
+ device_check: NoCheck
4067
+ device_guard: False
4068
+ dispatch:
4069
+ CompositeExplicitAutograd: diagonal_scatter
4070
+
3879
4071
  - func: slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)
3880
4072
  variants: function, method
3881
4073
  dispatch:
@@ -3902,10 +4094,10 @@
3902
4094
  CPU: softmax_cpu_out
3903
4095
  CUDA: softmax_cuda_out
3904
4096
 
3905
- - func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
4097
+ - func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
3906
4098
  structured_delegate: _softmax_backward_data.out
3907
4099
 
3908
- - func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
4100
+ - func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!)
3909
4101
  structured: True
3910
4102
  dispatch:
3911
4103
  CPU: softmax_backward_cpu_out
@@ -3918,7 +4110,7 @@
3918
4110
  dispatch:
3919
4111
  CompositeExplicitAutograd: unsafe_split
3920
4112
 
3921
- - func: split.Tensor(Tensor(a) self, int split_size, int dim=0) -> Tensor(a)[]
4113
+ - func: split.Tensor(Tensor(a -> *) self, int split_size, int dim=0) -> Tensor(a)[]
3922
4114
  variants: function, method
3923
4115
  device_check: NoCheck
3924
4116
  device_guard: False
@@ -3932,29 +4124,29 @@
3932
4124
  dispatch:
3933
4125
  CompositeExplicitAutograd: unsafe_split_with_sizes
3934
4126
 
3935
- - func: split_with_sizes(Tensor(a) self, int[] split_sizes, int dim=0) -> Tensor(a)[]
4127
+ - func: split_with_sizes(Tensor(a -> *) self, int[] split_sizes, int dim=0) -> Tensor(a)[]
3936
4128
  variants: function, method
3937
4129
  device_check: NoCheck
3938
4130
  device_guard: False
3939
4131
  dispatch:
3940
4132
  CompositeExplicitAutograd: split_with_sizes
3941
4133
 
3942
- - func: hsplit.int(Tensor(a) self, int sections) -> Tensor(a)[]
4134
+ - func: hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
3943
4135
  variants: function, method
3944
4136
 
3945
- - func: hsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[]
4137
+ - func: hsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]
3946
4138
  variants: function, method
3947
4139
 
3948
- - func: vsplit.int(Tensor(a) self, int sections) -> Tensor(a)[]
4140
+ - func: vsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
3949
4141
  variants: function, method
3950
4142
 
3951
- - func: vsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[]
4143
+ - func: vsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]
3952
4144
  variants: function, method
3953
4145
 
3954
- - func: dsplit.int(Tensor(a) self, int sections) -> Tensor(a)[]
4146
+ - func: dsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
3955
4147
  variants: function, method
3956
4148
 
3957
- - func: dsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[]
4149
+ - func: dsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]
3958
4150
  variants: function, method
3959
4151
 
3960
4152
  - func: squeeze(Tensor(a) self) -> Tensor(a)
@@ -3962,14 +4154,16 @@
3962
4154
  device_check: NoCheck
3963
4155
  device_guard: False
3964
4156
  dispatch:
3965
- CompositeExplicitAutograd: squeeze
4157
+ CPU, CUDA: squeeze
4158
+ QuantizedCPU, QuantizedCUDA: squeeze_quantized
3966
4159
 
3967
4160
  - func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
3968
4161
  variants: function, method
3969
4162
  device_check: NoCheck
3970
4163
  device_guard: False
3971
4164
  dispatch:
3972
- CompositeExplicitAutograd: squeeze
4165
+ CPU, CUDA: squeeze
4166
+ QuantizedCPU, QuantizedCUDA: squeeze_quantized
3973
4167
 
3974
4168
  - func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
3975
4169
  variants: function, method
@@ -3980,6 +4174,7 @@
3980
4174
  variants: method
3981
4175
  device_check: NoCheck
3982
4176
  device_guard: False
4177
+ tags: inplace_view
3983
4178
  dispatch:
3984
4179
  CompositeExplicitAutograd: squeeze_
3985
4180
 
@@ -3987,6 +4182,7 @@
3987
4182
  variants: method
3988
4183
  device_check: NoCheck
3989
4184
  device_guard: False
4185
+ tags: inplace_view
3990
4186
  dispatch:
3991
4187
  CompositeExplicitAutograd: squeeze_
3992
4188
 
@@ -3994,6 +4190,7 @@
3994
4190
  variants: method
3995
4191
  device_check: NoCheck
3996
4192
  device_guard: False
4193
+ tags: inplace_view
3997
4194
 
3998
4195
  - func: sspaddmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
3999
4196
  variants: function, method
@@ -4105,11 +4302,15 @@
4105
4302
  variants: function, method
4106
4303
  dispatch:
4107
4304
  SparseCPU, SparseCUDA: sqrt_sparse
4305
+ SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr
4108
4306
 
4109
4307
  - func: sqrt_(Tensor(a!) self) -> Tensor(a!)
4110
4308
  device_check: NoCheck # TensorIterator
4111
4309
  structured_delegate: sqrt.out
4112
4310
  variants: function, method
4311
+ dispatch:
4312
+ SparseCPU, SparseCUDA: sqrt_sparse_
4313
+ SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_
4113
4314
 
4114
4315
  - func: sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4115
4316
  device_check: NoCheck # TensorIterator
@@ -4117,7 +4318,8 @@
4117
4318
  structured_inherits: TensorIteratorBase
4118
4319
  dispatch:
4119
4320
  CPU, CUDA: sqrt_out
4120
- SparseCPU, SparseCUDA: sqrt_out_sparse
4321
+ SparseCPU, SparseCUDA: sqrt_sparse_out
4322
+ SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_out
4121
4323
 
4122
4324
  - func: square(Tensor self) -> Tensor
4123
4325
  device_check: NoCheck # TensorIterator
@@ -4225,6 +4427,7 @@
4225
4427
  device_check: NoCheck
4226
4428
  device_guard: False
4227
4429
  variants: method
4430
+ tags: inplace_view
4228
4431
  dispatch:
4229
4432
  CompositeExplicitAutograd: t_
4230
4433
 
@@ -4232,11 +4435,17 @@
4232
4435
  device_check: NoCheck # TensorIterator
4233
4436
  structured_delegate: tan.out
4234
4437
  variants: function, method
4438
+ dispatch:
4439
+ SparseCPU, SparseCUDA: tan_sparse
4440
+ SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr
4235
4441
 
4236
4442
  - func: tan_(Tensor(a!) self) -> Tensor(a!)
4237
4443
  device_check: NoCheck # TensorIterator
4238
4444
  structured_delegate: tan.out
4239
4445
  variants: function, method
4446
+ dispatch:
4447
+ SparseCPU, SparseCUDA: tan_sparse_
4448
+ SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_
4240
4449
 
4241
4450
  - func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4242
4451
  device_check: NoCheck # TensorIterator
@@ -4244,6 +4453,8 @@
4244
4453
  structured_inherits: TensorIteratorBase
4245
4454
  dispatch:
4246
4455
  CPU, CUDA: tan_out
4456
+ SparseCPU, SparseCUDA: tan_sparse_out
4457
+ SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_out
4247
4458
 
4248
4459
  - func: tanh(Tensor self) -> Tensor
4249
4460
  device_check: NoCheck # TensorIterator
@@ -4252,6 +4463,8 @@
4252
4463
  dispatch:
4253
4464
  QuantizedCPU: tanh_quantized_cpu
4254
4465
  MkldnnCPU: mkldnn_tanh
4466
+ SparseCPU, SparseCUDA: tanh_sparse
4467
+ SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr
4255
4468
 
4256
4469
  - func: tanh_(Tensor(a!) self) -> Tensor(a!)
4257
4470
  device_check: NoCheck # TensorIterator
@@ -4259,12 +4472,17 @@
4259
4472
  variants: function, method
4260
4473
  dispatch:
4261
4474
  MkldnnCPU: mkldnn_tanh_
4475
+ SparseCPU, SparseCUDA: tanh_sparse_
4476
+ SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_
4477
+
4262
4478
  - func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4263
4479
  device_check: NoCheck # TensorIterator
4264
4480
  structured: True
4265
4481
  structured_inherits: TensorIteratorBase
4266
4482
  dispatch:
4267
4483
  CPU, CUDA: tanh_out
4484
+ SparseCPU, SparseCUDA: tanh_sparse_out
4485
+ SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_out
4268
4486
 
4269
4487
  - func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor
4270
4488
  variants: function
@@ -4331,6 +4549,7 @@
4331
4549
  variants: method
4332
4550
  device_check: NoCheck
4333
4551
  device_guard: False
4552
+ tags: inplace_view
4334
4553
  dispatch:
4335
4554
  CompositeExplicitAutograd: transpose_
4336
4555
 
@@ -4388,6 +4607,8 @@
4388
4607
  variants: function, method
4389
4608
  dispatch:
4390
4609
  CompositeExplicitAutograd: trunc
4610
+ SparseCPU, SparseCUDA: trunc_sparse
4611
+ SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
4391
4612
 
4392
4613
  - func: trunc_(Tensor(a!) self) -> Tensor(a!)
4393
4614
  structured_delegate: trunc.out
@@ -4395,6 +4616,8 @@
4395
4616
  variants: function, method
4396
4617
  dispatch:
4397
4618
  CompositeExplicitAutograd: trunc_
4619
+ SparseCPU, SparseCUDA: trunc_sparse_
4620
+ SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_
4398
4621
 
4399
4622
  - func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4400
4623
  structured: True
@@ -4402,6 +4625,8 @@
4402
4625
  device_check: NoCheck # TensorIterator
4403
4626
  dispatch:
4404
4627
  CPU, CUDA: trunc_out
4628
+ SparseCPU, SparseCUDA: trunc_sparse_out
4629
+ SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_out
4405
4630
 
4406
4631
  # Alias for trunc
4407
4632
  - func: fix(Tensor self) -> Tensor
@@ -4461,12 +4686,15 @@
4461
4686
  device_check: NoCheck
4462
4687
  device_guard: False
4463
4688
  dispatch:
4464
- CompositeExplicitAutograd: unsqueeze
4689
+ CPU, CUDA: unsqueeze
4690
+ SparseCPU, SparseCUDA: unsqueeze_sparse
4691
+ QuantizedCPU, QuantizedCUDA: unsqueeze_quantized
4465
4692
 
4466
4693
  - func: unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)
4467
4694
  variants: method
4468
4695
  device_check: NoCheck
4469
4696
  device_guard: False
4697
+ tags: inplace_view
4470
4698
  dispatch:
4471
4699
  CompositeExplicitAutograd: unsqueeze_
4472
4700
 
@@ -4586,6 +4814,11 @@
4586
4814
  device_check: NoCheck
4587
4815
  device_guard: False
4588
4816
 
4817
+ - func: _efficientzerotensor(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
4818
+ dispatch:
4819
+ CPU: _efficientzerotensor
4820
+ CUDA: _efficientzerotensor_cuda
4821
+
4589
4822
  - func: zeros(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
4590
4823
 
4591
4824
  - func: zeros.out(int[] size, *, Tensor(a!) out) -> Tensor(a!)
@@ -4655,12 +4888,15 @@
4655
4888
  SparseCUDA: _sparse_sum_backward_cuda
4656
4889
 
4657
4890
  - func: _sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
4891
+ python_module: sparse
4658
4892
  variants: function
4659
4893
 
4660
4894
  - func: _sparse_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
4895
+ python_module: sparse
4661
4896
  variants: function
4662
4897
 
4663
4898
  - func: _sparse_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
4899
+ python_module: sparse
4664
4900
  dispatch:
4665
4901
  SparseCPU: softmax_sparse_cpu
4666
4902
  SparseCUDA: softmax_sparse_cuda
@@ -4671,12 +4907,15 @@
4671
4907
  SparseCUDA: softmax_backward_sparse_cuda
4672
4908
 
4673
4909
  - func: _sparse_log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
4910
+ python_module: sparse
4674
4911
  variants: function
4675
4912
 
4676
4913
  - func: _sparse_log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
4914
+ python_module: sparse
4677
4915
  variants: function
4678
4916
 
4679
4917
  - func: _sparse_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
4918
+ python_module: sparse
4680
4919
  dispatch:
4681
4920
  SparseCPU: log_softmax_sparse_cpu
4682
4921
  SparseCUDA: log_softmax_sparse_cuda
@@ -4774,6 +5013,7 @@
4774
5013
  dispatch:
4775
5014
  CompositeExplicitAutograd: clone
4776
5015
  SparseCPU, SparseCUDA: clone_sparse
5016
+ SparseCsrCPU, SparseCsrCUDA: clone_sparse_csr
4777
5017
  MkldnnCPU: mkldnn_clone
4778
5018
  QuantizedCPU, QuantizedCUDA: quantized_clone
4779
5019
 
@@ -4886,9 +5126,20 @@
4886
5126
  # Functionally the same as addmm, but we give it a different derivative formula
4887
5127
  # that doesn't propagate gradients to non-present entries on sparse.
4888
5128
  - func: _sparse_addmm(Tensor self, Tensor sparse, Tensor dense, *, Scalar beta=1, Scalar alpha=1) -> Tensor
5129
+ python_module: sparse
4889
5130
  dispatch:
4890
5131
  CompositeExplicitAutograd: _sparse_addmm
4891
5132
 
5133
+ - func: sparse_sampled_addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
5134
+ python_module: sparse
5135
+ dispatch:
5136
+ SparseCsrCUDA: sparse_sampled_addmm_out_sparse_csr_cuda
5137
+
5138
+ - func: sparse_sampled_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
5139
+ python_module: sparse
5140
+ dispatch:
5141
+ SparseCsrCUDA: sparse_sampled_addmm_sparse_csr_cuda
5142
+
4892
5143
  - func: addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
4893
5144
  structured: True
4894
5145
  dispatch:
@@ -4896,8 +5147,8 @@
4896
5147
  CUDA: addmm_out_cuda
4897
5148
  SparseCPU: addmm_out_sparse_dense_cpu
4898
5149
  SparseCUDA: addmm_out_sparse_dense_cuda
4899
- SparseCsrCPU: addmm_out_sparse_csr_dense_cpu
4900
- SparseCsrCUDA: addmm_out_sparse_csr_dense_cuda
5150
+ SparseCsrCPU: addmm_out_sparse_csr_cpu
5151
+ SparseCsrCUDA: addmm_out_sparse_csr_cuda
4901
5152
 
4902
5153
  - func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
4903
5154
  structured_delegate: addmm.out
@@ -5209,12 +5460,12 @@
5209
5460
  dispatch:
5210
5461
  SparseCPU, SparseCUDA: copy_sparse_
5211
5462
 
5212
- - func: unbind.int(Tensor(a) self, int dim=0) -> Tensor(a)[]
5463
+ - func: unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[]
5213
5464
  variants: function, method
5214
5465
  dispatch:
5215
5466
  CompositeExplicitAutograd: unbind
5216
5467
 
5217
- - func: unbind.Dimname(Tensor(a) self, Dimname dim) -> Tensor(a)[]
5468
+ - func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
5218
5469
  variants: function, method
5219
5470
 
5220
5471
  - func: to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
@@ -5246,6 +5497,11 @@
5246
5497
 
5247
5498
  - func: to_mkldnn_backward(Tensor grad, Tensor input) -> Tensor
5248
5499
 
5500
+ - func: quantize_per_tensor_dynamic(Tensor self, ScalarType dtype, bool reduce_range) -> Tensor
5501
+ variants: function
5502
+ dispatch:
5503
+ CPU, CUDA: quantize_per_tensor_dynamic
5504
+
5249
5505
  - func: quantize_per_tensor(Tensor self, float scale, int zero_point, ScalarType dtype) -> Tensor
5250
5506
  variants: function
5251
5507
  dispatch:
@@ -5269,7 +5525,7 @@
5269
5525
  - func: dequantize.self(Tensor self) -> Tensor
5270
5526
  variants: function, method
5271
5527
  dispatch:
5272
- CPU: dequantize_cpu
5528
+ CPU, CUDA: dequantize_cpu_or_cuda
5273
5529
  QuantizedCPU, QuantizedCUDA: dequantize_quantized
5274
5530
 
5275
5531
  - func: dequantize.tensors(Tensor[] tensors) -> Tensor[]
@@ -5391,6 +5647,14 @@
5391
5647
  - func: choose_qparams_optimized(Tensor input, int numel, int n_bins, float ratio, int bit_width) -> (Tensor, Tensor)
5392
5648
  variants: function
5393
5649
 
5650
+ - func: _autocast_to_reduced_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled, ScalarType cuda_dtype, ScalarType cpu_dtype) -> Tensor(a)
5651
+ variants: method
5652
+ device_guard: False
5653
+
5654
+ - func: _autocast_to_full_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled) -> Tensor(a)
5655
+ variants: method
5656
+ device_guard: False
5657
+
5394
5658
  - func: _to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, MemoryFormat? memory_format=None) -> Tensor
5395
5659
  device_check: NoCheck
5396
5660
  device_guard: False
@@ -5589,6 +5853,8 @@
5589
5853
  - func: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor
5590
5854
  device_check: NoCheck # TensorIterator
5591
5855
  variants: function, method
5856
+ dispatch:
5857
+ CompositeExplicitAutograd: masked_fill
5592
5858
 
5593
5859
  - func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
5594
5860
  device_check: NoCheck # TensorIterator
@@ -5600,6 +5866,8 @@
5600
5866
  - func: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor
5601
5867
  device_check: NoCheck # TensorIterator
5602
5868
  variants: function, method
5869
+ dispatch:
5870
+ CompositeExplicitAutograd: masked_fill
5603
5871
 
5604
5872
  - func: masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!)
5605
5873
  variants: method
@@ -5609,13 +5877,20 @@
5609
5877
 
5610
5878
  - func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor
5611
5879
  variants: function, method
5880
+ dispatch:
5881
+ CompositeExplicitAutograd: masked_scatter
5882
+
5883
+ - func: _masked_softmax(Tensor self, Tensor mask) -> Tensor
5884
+ dispatch:
5885
+ CUDA: masked_softmax_cuda
5886
+ CPU: masked_softmax_cpu
5612
5887
 
5613
5888
  - func: view(Tensor(a) self, int[] size) -> Tensor(a)
5614
5889
  variants: method
5615
5890
  device_check: NoCheck
5616
5891
  device_guard: False
5617
5892
  dispatch:
5618
- CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view
5893
+ ZeroTensor, CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view
5619
5894
  MkldnnCPU: mkldnn_view
5620
5895
 
5621
5896
  # Warning: If you want to change the name or overload name of this
@@ -5639,19 +5914,21 @@
5639
5914
  - func: put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor
5640
5915
  variants: function, method
5641
5916
 
5642
- - func: index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
5643
- variants: method
5644
-
5645
- - func: index_add_.alpha(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha) -> Tensor(a!)
5646
- variants: method
5917
+ - func: index_add.out(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
5918
+ structured: True
5919
+ variants: function
5920
+ precomputed:
5921
+ - dim -> int dim
5647
5922
  dispatch:
5648
- CPU: index_add_cpu_
5649
- CUDA: index_add_cuda_
5923
+ CPU: index_add_cpu_out
5924
+ CUDA: index_add_cuda_out
5650
5925
 
5651
- - func: index_add(Tensor self, int dim, Tensor index, Tensor source) -> Tensor
5652
- variants: function, method
5926
+ - func: index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor(a!)
5927
+ structured_delegate: index_add.out
5928
+ variants: method
5653
5929
 
5654
- - func: index_add.alpha(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha) -> Tensor
5930
+ - func: index_add(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor
5931
+ structured_delegate: index_add.out
5655
5932
  variants: function, method
5656
5933
 
5657
5934
  - func: index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor
@@ -5667,6 +5944,8 @@
5667
5944
  - func: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
5668
5945
  device_check: NoCheck # TensorIterator
5669
5946
  variants: function, method
5947
+ dispatch:
5948
+ CompositeExplicitAutograd: index_fill
5670
5949
 
5671
5950
  - func: index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!)
5672
5951
  device_check: NoCheck # TensorIterator
@@ -5677,6 +5956,8 @@
5677
5956
  - func: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor
5678
5957
  device_check: NoCheck # TensorIterator
5679
5958
  variants: function, method
5959
+ dispatch:
5960
+ CompositeExplicitAutograd: index_fill
5680
5961
 
5681
5962
  - func: index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!)
5682
5963
  device_check: NoCheck # TensorIterator
@@ -5773,6 +6054,11 @@
5773
6054
  - func: scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor
5774
6055
  variants: function, method
5775
6056
 
6057
+ - func: scatter_reduce.two(Tensor self, int dim, Tensor index, str reduce, *, int? output_size=None) -> Tensor
6058
+ variants: function, method
6059
+ dispatch:
6060
+ CPU: scatter_reduce_two_cpu
6061
+
5776
6062
  - func: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
5777
6063
  structured_delegate: eq.Scalar_out
5778
6064
  device_check: NoCheck # TensorIterator
@@ -6064,16 +6350,12 @@
6064
6350
  CPU, CUDA: bitwise_right_shift
6065
6351
 
6066
6352
  - func: tril_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
6353
+ structured_delegate: tril.out
6067
6354
  variants: method
6068
- dispatch:
6069
- CPU: tril_cpu_
6070
- CUDA: tril_cuda_
6071
6355
 
6072
6356
  - func: triu_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
6357
+ structured_delegate: triu.out
6073
6358
  variants: method
6074
- dispatch:
6075
- CPU: triu_cpu_
6076
- CUDA: triu_cuda_
6077
6359
 
6078
6360
  - func: digamma_(Tensor(a!) self) -> Tensor(a!)
6079
6361
  device_check: NoCheck # TensorIterator
@@ -6083,16 +6365,12 @@
6083
6365
  - func: lerp_.Scalar(Tensor(a!) self, Tensor end, Scalar weight) -> Tensor(a!)
6084
6366
  device_check: NoCheck # TensorIterator
6085
6367
  variants: method
6086
- dispatch:
6087
- CPU: lerp_cpu_scalar_
6088
- CUDA: lerp_cuda_scalar_
6368
+ structured_delegate: lerp.Scalar_out
6089
6369
 
6090
6370
  - func: lerp_.Tensor(Tensor(a!) self, Tensor end, Tensor weight) -> Tensor(a!)
6091
6371
  device_check: NoCheck # TensorIterator
6092
6372
  variants: method
6093
- dispatch:
6094
- CPU: lerp_cpu_tensor_
6095
- CUDA: lerp_cuda_tensor_
6373
+ structured_delegate: lerp.Tensor_out
6096
6374
 
6097
6375
  - func: addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
6098
6376
  variants: method
@@ -6178,33 +6456,29 @@
6178
6456
  device_guard: False
6179
6457
 
6180
6458
  - func: cross.out(Tensor self, Tensor other, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)
6181
- dispatch:
6182
- CPU, CUDA: cross_out
6183
6459
 
6184
6460
  - func: cross(Tensor self, Tensor other, int? dim=None) -> Tensor
6185
6461
  variants: method, function
6186
- dispatch:
6187
- CPU, CUDA: cross
6188
6462
 
6189
6463
  - func: triu.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
6464
+ structured: True
6190
6465
  dispatch:
6191
- CPU: triu_cpu_out
6192
- CUDA: triu_cuda_out
6466
+ CPU: triu_cpu
6467
+ CUDA: triu_cuda
6193
6468
 
6194
6469
  - func: triu(Tensor self, int diagonal=0) -> Tensor
6470
+ structured_delegate: triu.out
6195
6471
  variants: method, function
6196
- dispatch:
6197
- CompositeExplicitAutograd: triu
6198
6472
 
6199
6473
  - func: tril.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
6474
+ structured: True
6200
6475
  dispatch:
6201
- CPU: tril_cpu_out
6202
- CUDA: tril_cuda_out
6476
+ CPU: tril_cpu
6477
+ CUDA: tril_cuda
6203
6478
 
6204
6479
  - func: tril(Tensor self, int diagonal=0) -> Tensor
6480
+ structured_delegate: tril.out
6205
6481
  variants: method, function
6206
- dispatch:
6207
- CompositeExplicitAutograd: tril
6208
6482
 
6209
6483
  - func: tril_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
6210
6484
  dispatch:
@@ -6584,7 +6858,8 @@
6584
6858
  - func: index_select(Tensor self, int dim, Tensor index) -> Tensor
6585
6859
  variants: method, function
6586
6860
  dispatch:
6587
- CPU, QuantizedCPU: index_select_cpu_
6861
+ CPU: index_select_cpu_
6862
+ QuantizedCPU: index_select_quantized_cpu_
6588
6863
  CUDA, QuantizedCUDA: index_select_cuda
6589
6864
  SparseCPU: index_select_sparse
6590
6865
  SparseCUDA: index_select_sparse
@@ -6629,6 +6904,9 @@
6629
6904
  - func: nonzero_numpy(Tensor self) -> Tensor[]
6630
6905
  variants: method, function
6631
6906
 
6907
+ - func: argwhere(Tensor self) -> Tensor
6908
+ variants: method, function
6909
+
6632
6910
  - func: gather.out(Tensor self, int dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!)
6633
6911
  structured: True
6634
6912
  dispatch:
@@ -6699,13 +6977,30 @@
6699
6977
  CUDA: legacy_lstsq_cuda
6700
6978
 
6701
6979
  - func: triangular_solve.X(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False, *, Tensor(a!) X, Tensor(b!) M) -> (Tensor(a!) solution, Tensor(b!) cloned_coefficient)
6980
+ structured: True
6702
6981
  dispatch:
6703
6982
  CPU, CUDA: triangular_solve_out
6983
+ SparseCsrCPU: triangular_solve_out_sparse_csr_cpu
6984
+ SparseCsrCUDA: triangular_solve_out_sparse_csr_cuda
6704
6985
 
6705
6986
  - func: triangular_solve(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False) -> (Tensor solution, Tensor cloned_coefficient)
6987
+ structured_delegate: triangular_solve.X
6988
+ variants: method, function
6989
+
6990
+ - func: _linalg_check_errors(Tensor info, str api_name, *, bool is_matrix) -> ()
6991
+ dispatch:
6992
+ CompositeExplicitAutograd: _linalg_check_errors
6993
+
6994
+ - func: linalg_solve_triangular.out(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False, Tensor(a!) out) -> Tensor(a!)
6995
+ python_module: linalg
6996
+ dispatch:
6997
+ CPU, CUDA: linalg_solve_triangular_out
6998
+
6999
+ - func: linalg_solve_triangular(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False) -> Tensor
7000
+ python_module: linalg
6706
7001
  variants: method, function
6707
7002
  dispatch:
6708
- CPU, CUDA: triangular_solve
7003
+ CPU, CUDA: linalg_solve_triangular
6709
7004
 
6710
7005
  - func: symeig.e(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)
6711
7006
  dispatch:
@@ -6736,12 +7031,6 @@
6736
7031
  - func: svd(Tensor self, bool some=True, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor V)
6737
7032
  variants: method, function
6738
7033
 
6739
- - func: _svd_helper(Tensor self, bool some, bool compute_uv) -> (Tensor U, Tensor S, Tensor V)
6740
- variants: function
6741
- dispatch:
6742
- CPU: _svd_helper_cpu
6743
- CUDA: _svd_helper_cuda
6744
-
6745
7034
  # swapaxes, alias for transpose
6746
7035
  - func: swapaxes(Tensor(a) self, int axis0, int axis1) -> Tensor(a)
6747
7036
  variants: function, method
@@ -6752,6 +7041,7 @@
6752
7041
  variants: method
6753
7042
  device_check: NoCheck
6754
7043
  device_guard: False
7044
+ tags: inplace_view
6755
7045
 
6756
7046
  # swapdims, alias for transpose
6757
7047
  - func: swapdims(Tensor(a) self, int dim0, int dim1) -> Tensor(a)
@@ -6763,6 +7053,7 @@
6763
7053
  variants: method
6764
7054
  device_check: NoCheck
6765
7055
  device_guard: False
7056
+ tags: inplace_view
6766
7057
 
6767
7058
  - func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
6768
7059
  dispatch:
@@ -6843,8 +7134,6 @@
6843
7134
 
6844
7135
  - func: _lu_with_info(Tensor self, bool pivot=True, bool check_errors=True) -> (Tensor LU, Tensor pivots, Tensor info)
6845
7136
  variants: function
6846
- dispatch:
6847
- CPU, CUDA: _lu_with_info
6848
7137
 
6849
7138
  - func: lu_solve.out(Tensor self, Tensor LU_data, Tensor LU_pivots, *, Tensor(a!) out) -> Tensor(a!)
6850
7139
  dispatch:
@@ -6926,11 +7215,17 @@
6926
7215
  device_check: NoCheck # TensorIterator
6927
7216
  structured_delegate: erfinv.out
6928
7217
  variants: method, function
7218
+ dispatch:
7219
+ SparseCPU, SparseCUDA: erfinv_sparse
7220
+ SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr
6929
7221
 
6930
7222
  - func: erfinv_(Tensor(a!) self) -> Tensor(a!)
6931
7223
  device_check: NoCheck # TensorIterator
6932
7224
  structured_delegate: erfinv.out
6933
7225
  variants: method
7226
+ dispatch:
7227
+ SparseCPU, SparseCUDA: erfinv_sparse_
7228
+ SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_
6934
7229
 
6935
7230
  - func: erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
6936
7231
  device_check: NoCheck # TensorIterator
@@ -6938,6 +7233,8 @@
6938
7233
  structured_inherits: TensorIteratorBase
6939
7234
  dispatch:
6940
7235
  CPU, CUDA: erfinv_out
7236
+ SparseCPU, SparseCUDA: erfinv_sparse_out
7237
+ SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_out
6941
7238
 
6942
7239
  - func: i0(Tensor self) -> Tensor
6943
7240
  structured_delegate: i0.out
@@ -6959,6 +7256,8 @@
6959
7256
  variants: function, method
6960
7257
  dispatch:
6961
7258
  CompositeExplicitAutograd: sign
7259
+ SparseCPU, SparseCUDA: sign_sparse
7260
+ SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr
6962
7261
 
6963
7262
  - func: sign_(Tensor(a!) self) -> Tensor(a!)
6964
7263
  device_check: NoCheck # TensorIterator
@@ -6966,6 +7265,8 @@
6966
7265
  variants: method
6967
7266
  dispatch:
6968
7267
  CompositeExplicitAutograd: sign_
7268
+ SparseCPU, SparseCUDA: sign_sparse_
7269
+ SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_
6969
7270
 
6970
7271
  - func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
6971
7272
  device_check: NoCheck # TensorIterator
@@ -6973,10 +7274,15 @@
6973
7274
  structured_inherits: TensorIteratorBase
6974
7275
  dispatch:
6975
7276
  CPU, CUDA: sign_out
7277
+ SparseCPU, SparseCUDA: sign_sparse_out
7278
+ SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_out
6976
7279
 
6977
7280
  - func: signbit(Tensor self) -> Tensor
6978
7281
  variants: function, method
6979
7282
  structured_delegate: signbit.out
7283
+ dispatch:
7284
+ SparseCPU, SparseCUDA: signbit_sparse
7285
+ SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr
6980
7286
 
6981
7287
  - func: signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
6982
7288
  structured: True
@@ -6984,6 +7290,8 @@
6984
7290
  dispatch:
6985
7291
  CPU: signbit_out
6986
7292
  CUDA: signbit_out
7293
+ SparseCPU, SparseCUDA: signbit_sparse_out
7294
+ SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr_out
6987
7295
 
6988
7296
  - func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor
6989
7297
  device_check: NoCheck # TensorIterator
@@ -7008,31 +7316,39 @@
7008
7316
  structured_delegate: atan2.out
7009
7317
  variants: method, function
7010
7318
 
7319
+ # arctan2, alias of atan2
7320
+ - func: arctan2(Tensor self, Tensor other) -> Tensor
7321
+ variants: method, function
7322
+
7323
+ - func: arctan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
7324
+ device_check: NoCheck # TensorIterator
7325
+
7326
+ - func: arctan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
7327
+ variants: method
7328
+
7011
7329
  - func: lerp.Scalar_out(Tensor self, Tensor end, Scalar weight, *, Tensor(a!) out) -> Tensor(a!)
7012
7330
  device_check: NoCheck # TensorIterator
7331
+ structured: True
7332
+ structured_inherits: TensorIteratorBase
7013
7333
  dispatch:
7014
- CPU: lerp_cpu_scalar_out
7015
- CUDA: lerp_cuda_scalar_out
7334
+ CPU, CUDA: lerp_Scalar
7016
7335
 
7017
7336
  - func: lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)
7018
7337
  device_check: NoCheck # TensorIterator
7338
+ structured: True
7339
+ structured_inherits: TensorIteratorBase
7019
7340
  dispatch:
7020
- CPU: lerp_cpu_tensor_out
7021
- CUDA: lerp_cuda_tensor_out
7341
+ CPU, CUDA: lerp_Tensor
7022
7342
 
7023
7343
  - func: lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor
7024
7344
  device_check: NoCheck # TensorIterator
7025
7345
  variants: method, function
7026
- dispatch:
7027
- CPU: lerp_cpu_scalar
7028
- CUDA: lerp_cuda_scalar
7346
+ structured_delegate: lerp.Scalar_out
7029
7347
 
7030
7348
  - func: lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor
7031
7349
  device_check: NoCheck # TensorIterator
7032
7350
  variants: method, function
7033
- dispatch:
7034
- CPU: lerp_cpu_tensor
7035
- CUDA: lerp_cuda_tensor
7351
+ structured_delegate: lerp.Tensor_out
7036
7352
 
7037
7353
  - func: histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!)
7038
7354
  dispatch:
@@ -7063,6 +7379,18 @@
7063
7379
  dispatch:
7064
7380
  CPU: histogram_cpu
7065
7381
 
7382
+ - func: _histogramdd_bin_edges(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor[]
7383
+ dispatch:
7384
+ CPU: histogramdd_bin_edges_cpu
7385
+
7386
+ - func: _histogramdd_from_bin_cts(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor
7387
+ dispatch:
7388
+ CPU: histogramdd_cpu
7389
+
7390
+ - func: _histogramdd_from_bin_tensors(Tensor self, Tensor[] bins, *, Tensor? weight=None, bool density=False) -> Tensor
7391
+ dispatch:
7392
+ CPU: histogramdd_cpu
7393
+
7066
7394
  - func: fmod.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
7067
7395
  device_check: NoCheck # TensorIterator
7068
7396
  dispatch:
@@ -7275,48 +7603,25 @@
7275
7603
  device_check: NoCheck # TensorIterator
7276
7604
  variants: method, function
7277
7605
 
7278
- # The following quantile signatures are DEPRECATED in favor of the new ones with the interpolation kwarg.
7279
- - func: quantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
7280
-
7281
- - func: quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False) -> Tensor
7606
+ - func: quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
7282
7607
  variants: method, function
7283
7608
 
7284
- - func: quantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
7609
+ - func: quantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
7285
7610
 
7286
- - func: quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False) -> Tensor
7611
+ - func: quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
7287
7612
  variants: method, function
7288
7613
 
7289
- - func: nanquantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
7614
+ - func: quantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
7290
7615
 
7291
- - func: nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False) -> Tensor
7616
+ - func: nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
7292
7617
  variants: method, function
7293
7618
 
7294
- - func: nanquantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
7619
+ - func: nanquantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
7295
7620
 
7296
- - func: nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False) -> Tensor
7621
+ - func: nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
7297
7622
  variants: method, function
7298
7623
 
7299
- # To keep backward and forward compatibility, and to avoid ambiguity with the original signatures, dim, keepdim and interpolation
7300
- # parameters are required for now. Once the deprecated signatures are removed they will be made optional.
7301
- - func: quantile.new_scalar_out(Tensor self, float q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
7302
-
7303
- - func: quantile.new_scalar(Tensor self, float q, int? dim, bool keepdim, *, str interpolation) -> Tensor
7304
- variants: method, function
7305
-
7306
- - func: quantile.new_out(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
7307
-
7308
- - func: quantile.new(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation) -> Tensor
7309
- variants: method, function
7310
-
7311
- - func: nanquantile.new_scalar_out(Tensor self, float q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
7312
-
7313
- - func: nanquantile.new_scalar(Tensor self, float q, int? dim, bool keepdim, *, str interpolation) -> Tensor
7314
- variants: method, function
7315
-
7316
- - func: nanquantile.new_out(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
7317
-
7318
- - func: nanquantile.new(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation) -> Tensor
7319
- variants: method, function
7624
+ - func: nanquantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
7320
7625
 
7321
7626
  - func: sort.values(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
7322
7627
  device_check: NoCheck # TensorIterator
@@ -7511,6 +7816,7 @@
7511
7816
  dispatch:
7512
7817
  CPU, CUDA: normal_
7513
7818
  Meta: normal_meta_
7819
+ SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
7514
7820
 
7515
7821
  - func: normal.Tensor_float_out(Tensor mean, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
7516
7822
  dispatch:
@@ -8209,6 +8515,13 @@
8209
8515
  CPU: foreach_tensor_minimum_slow
8210
8516
  CUDA: foreach_tensor_minimum_cuda
8211
8517
 
8518
+ - func: _foreach_norm.Scalar(Tensor[] tensors, Scalar ord=2) -> Tensor[]
8519
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
8520
+ variants: function
8521
+ dispatch:
8522
+ CPU: foreach_tensor_norm_slow
8523
+ CUDA: foreach_tensor_norm_cuda
8524
+
8212
8525
  - func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
8213
8526
  dispatch:
8214
8527
  CPU: bucketize_cpu
@@ -8224,17 +8537,27 @@
8224
8537
  CPU: bucketize_cpu
8225
8538
  CUDA: bucketize_cuda
8226
8539
 
8227
- - func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False) -> Tensor
8540
+ - func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
8228
8541
  dispatch:
8229
8542
  CPU: searchsorted_cpu
8230
8543
  CUDA: searchsorted_cuda
8231
8544
 
8232
- - func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)
8545
+ # [Note about _torch_cuda_cu_linker_symbol_op and torch_cuda_cu]
8546
+ # This is a DUMMY function to force the linking against torch_cuda_cu on Windows.
8547
+ # Otherwise, the Windows linker will optimize and not include torch_cuda_cu even when we
8548
+ # want it to be included. This is similar to what we do with warp_size for torch_cuda_cpp,
8549
+ # described as the solution to this issue: https://github.com/pytorch/pytorch/issues/31611
8550
+ # This op should NOT be used or exposed or edited or else Windows builds (with BUILD_SPLIT_CUDA) will break.
8551
+ - func: _torch_cuda_cu_linker_symbol_op(Tensor self) -> Tensor
8552
+ dispatch:
8553
+ CUDA: _torch_cuda_cu_linker_symbol_op_cuda
8554
+
8555
+ - func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
8233
8556
  dispatch:
8234
8557
  CPU: searchsorted_out_cpu
8235
8558
  CUDA: searchsorted_out_cuda
8236
8559
 
8237
- - func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False) -> Tensor
8560
+ - func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
8238
8561
  dispatch:
8239
8562
  CPU: searchsorted_cpu
8240
8563
  CUDA: searchsorted_cuda
@@ -8248,6 +8571,15 @@
8248
8571
  CPU: _convert_indices_from_coo_to_csr_structured_cpu
8249
8572
  CUDA: _convert_indices_from_coo_to_csr_structured_cuda
8250
8573
 
8574
+ - func: _convert_indices_from_csr_to_coo(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False) -> Tensor
8575
+ structured_delegate: _convert_indices_from_csr_to_coo.out
8576
+
8577
+ - func: _convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!)
8578
+ structured: True
8579
+ dispatch:
8580
+ CPU: _convert_indices_from_csr_to_coo_structured_cpu
8581
+ CUDA: _convert_indices_from_csr_to_coo_structured_cuda
8582
+
8251
8583
  ## NN wrappers
8252
8584
 
8253
8585
  - func: mse_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
@@ -8409,16 +8741,16 @@
8409
8741
 
8410
8742
  - func: smooth_l1_loss.out(Tensor self, Tensor target, int reduction=Mean, float beta=1.0, *, Tensor(a!) out) -> Tensor(a!)
8411
8743
  device_check: NoCheck # TensorIterator
8744
+ structured: True
8745
+ structured_inherits: TensorIteratorBase
8412
8746
  python_module: nn
8413
8747
  dispatch:
8414
- CPU: smooth_l1_loss_out
8415
- CUDA: smooth_l1_loss_out
8748
+ CPU, CUDA: smooth_l1_loss_out
8416
8749
 
8417
8750
  - func: smooth_l1_loss(Tensor self, Tensor target, int reduction=Mean, float beta=1.0) -> Tensor
8418
8751
  device_check: NoCheck # TensorIterator
8752
+ structured_delegate: smooth_l1_loss.out
8419
8753
  python_module: nn
8420
- dispatch:
8421
- CPU, CUDA: smooth_l1_loss
8422
8754
 
8423
8755
  - func: smooth_l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta, *, Tensor(a!) grad_input) -> Tensor(a!)
8424
8756
  python_module: nn
@@ -8533,6 +8865,7 @@
8533
8865
  python_module: nn
8534
8866
  dispatch:
8535
8867
  CPU, CUDA: hardsigmoid_out
8868
+ QuantizedCPU: hardsigmoid_out_quantized_cpu
8536
8869
 
8537
8870
  - func: hardsigmoid(Tensor self) -> Tensor
8538
8871
  structured_delegate: hardsigmoid.out
@@ -8715,14 +9048,14 @@
8715
9048
  device_check: NoCheck # TensorIterator
8716
9049
  python_module: nn
8717
9050
 
8718
- - func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)
9051
+ - func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)
8719
9052
  structured: True
8720
9053
  structured_inherits: TensorIteratorBase
8721
9054
  python_module: nn
8722
9055
  dispatch:
8723
9056
  CPU, CUDA: softplus_backward_out
8724
9057
 
8725
- - func: softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, Tensor output) -> Tensor
9058
+ - func: softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold) -> Tensor
8726
9059
  structured_delegate: softplus_backward.grad_input
8727
9060
  python_module: nn
8728
9061
 
@@ -8933,19 +9266,22 @@
8933
9266
 
8934
9267
  - func: fractional_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
8935
9268
  python_module: nn
9269
+ structured: True
8936
9270
  dispatch:
8937
- CPU: fractional_max_pool2d_backward_out_cpu
8938
- CUDA: fractional_max_pool2d_backward_out_cuda
9271
+ CPU: fractional_max_pool2d_backward_cpu
9272
+ CUDA: fractional_max_pool2d_backward_cuda
8939
9273
 
8940
9274
  - func: fractional_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices) -> Tensor
8941
9275
  python_module: nn
8942
- dispatch:
8943
- CPU: fractional_max_pool2d_backward_cpu
8944
- CUDA: fractional_max_pool2d_backward_cuda
9276
+ structured_delegate: fractional_max_pool2d_backward.grad_input
8945
9277
 
8946
9278
  # Return: (Tensor output, Tensor indices)
8947
9279
  - func: fractional_max_pool3d.output(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples, *, Tensor(a!) output, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))
8948
9280
  python_module: nn
9281
+ structured: True
9282
+ precomputed:
9283
+ - kernel_size -> int poolSizeT, int poolSizeH, int poolSizeW
9284
+ - output_size -> int outputT, int outputH, int outputW
8949
9285
  dispatch:
8950
9286
  CPU: fractional_max_pool3d_out_cpu
8951
9287
  CUDA: fractional_max_pool3d_out_cuda
@@ -8953,9 +9289,7 @@
8953
9289
  # Return: (Tensor output, Tensor indices)
8954
9290
  - func: fractional_max_pool3d(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples) -> (Tensor, Tensor)
8955
9291
  python_module: nn
8956
- dispatch:
8957
- CPU: fractional_max_pool3d_cpu
8958
- CUDA: fractional_max_pool3d_cuda
9292
+ structured_delegate: fractional_max_pool3d.output
8959
9293
 
8960
9294
  - func: fractional_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
8961
9295
  python_module: nn
@@ -9225,6 +9559,16 @@
9225
9559
  dispatch:
9226
9560
  CompositeExplicitAutograd: upsample_bilinear2d_backward
9227
9561
 
9562
+ - func: _upsample_bilinear2d_aa.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
9563
+ python_module: nn
9564
+ dispatch:
9565
+ CompositeExplicitAutograd: _upsample_bilinear2d_aa
9566
+
9567
+ - func: _upsample_bilinear2d_aa_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
9568
+ python_module: nn
9569
+ dispatch:
9570
+ CompositeExplicitAutograd: _upsample_bilinear2d_aa_backward
9571
+
9228
9572
  - func: upsample_trilinear3d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
9229
9573
  python_module: nn
9230
9574
  dispatch:
@@ -9245,26 +9589,56 @@
9245
9589
  dispatch:
9246
9590
  CompositeExplicitAutograd: upsample_bicubic2d_backward
9247
9591
 
9592
+ - func: _upsample_bicubic2d_aa.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
9593
+ python_module: nn
9594
+ dispatch:
9595
+ CompositeExplicitAutograd: _upsample_bicubic2d_aa
9596
+
9597
+ - func: _upsample_bicubic2d_aa_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
9598
+ python_module: nn
9599
+ dispatch:
9600
+ CompositeExplicitAutograd: _upsample_bicubic2d_aa_backward
9601
+
9248
9602
  - func: upsample_nearest1d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9249
9603
  python_module: nn
9250
9604
  dispatch:
9251
9605
  CompositeExplicitAutograd: upsample_nearest1d
9252
9606
 
9607
+ - func: _upsample_nearest_exact1d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9608
+ python_module: nn
9609
+ dispatch:
9610
+ CompositeExplicitAutograd: _upsample_nearest_exact1d
9611
+
9253
9612
  - func: upsample_nearest1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9254
9613
  python_module: nn
9255
9614
  dispatch:
9256
9615
  CompositeExplicitAutograd: upsample_nearest1d_backward
9257
9616
 
9617
+ - func: _upsample_nearest_exact1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9618
+ python_module: nn
9619
+ dispatch:
9620
+ CompositeExplicitAutograd: _upsample_nearest_exact1d_backward
9621
+
9258
9622
  - func: upsample_nearest2d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9259
9623
  python_module: nn
9260
9624
  dispatch:
9261
9625
  CompositeExplicitAutograd: upsample_nearest2d
9262
9626
 
9627
+ - func: _upsample_nearest_exact2d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9628
+ python_module: nn
9629
+ dispatch:
9630
+ CompositeExplicitAutograd: _upsample_nearest_exact2d
9631
+
9263
9632
  - func: upsample_nearest2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9264
9633
  python_module: nn
9265
9634
  dispatch:
9266
9635
  CompositeExplicitAutograd: upsample_nearest2d_backward
9267
9636
 
9637
+ - func: _upsample_nearest_exact2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9638
+ python_module: nn
9639
+ dispatch:
9640
+ CompositeExplicitAutograd: _upsample_nearest_exact2d_backward
9641
+
9268
9642
  - func: upsample_nearest3d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9269
9643
  python_module: nn
9270
9644
  dispatch:
@@ -9272,12 +9646,25 @@
9272
9646
  CUDA: upsample_nearest3d_cuda
9273
9647
  QuantizedCPU: upsample_nearest3d_quantized_cpu
9274
9648
 
9649
+ - func: _upsample_nearest_exact3d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9650
+ python_module: nn
9651
+ dispatch:
9652
+ CPU: _upsample_nearest_exact3d_cpu
9653
+ CUDA: _upsample_nearest_exact3d_cuda
9654
+ QuantizedCPU: _upsample_nearest_exact3d_quantized_cpu
9655
+
9275
9656
  - func: upsample_nearest3d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9276
9657
  python_module: nn
9277
9658
  dispatch:
9278
9659
  CPU: upsample_nearest3d_backward_cpu
9279
9660
  CUDA: upsample_nearest3d_backward_cuda
9280
9661
 
9662
+ - func: _upsample_nearest_exact3d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9663
+ python_module: nn
9664
+ dispatch:
9665
+ CPU: _upsample_nearest_exact3d_backward_cpu
9666
+ CUDA: _upsample_nearest_exact3d_backward_cuda
9667
+
9281
9668
  # NOTE: all of the non-"vec" upsample overloads are only kept for backward compatibility.
9282
9669
  - func: upsample_linear1d.out(Tensor self, int[1] output_size, bool align_corners, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)
9283
9670
  python_module: nn
@@ -9325,6 +9712,28 @@
9325
9712
  python_module: nn
9326
9713
  structured_delegate: upsample_bilinear2d_backward.grad_input
9327
9714
 
9715
+ - func: _upsample_bilinear2d_aa.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9716
+ python_module: nn
9717
+ structured: True
9718
+ dispatch:
9719
+ CPU: _upsample_bilinear2d_aa_out_cpu
9720
+ CUDA: _upsample_bilinear2d_aa_out_cuda
9721
+
9722
+ - func: _upsample_bilinear2d_aa(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
9723
+ python_module: nn
9724
+ structured_delegate: _upsample_bilinear2d_aa.out
9725
+
9726
+ - func: _upsample_bilinear2d_aa_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9727
+ python_module: nn
9728
+ structured: True
9729
+ dispatch:
9730
+ CPU: _upsample_bilinear2d_aa_backward_out_cpu
9731
+ CUDA: _upsample_bilinear2d_aa_backward_out_cuda
9732
+
9733
+ - func: _upsample_bilinear2d_aa_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
9734
+ python_module: nn
9735
+ structured_delegate: _upsample_bilinear2d_aa_backward.grad_input
9736
+
9328
9737
  - func: upsample_bicubic2d.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9329
9738
  python_module: nn
9330
9739
  structured: True
@@ -9347,6 +9756,28 @@
9347
9756
  python_module: nn
9348
9757
  structured_delegate: upsample_bicubic2d_backward.grad_input
9349
9758
 
9759
+ - func: _upsample_bicubic2d_aa.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9760
+ python_module: nn
9761
+ structured: True
9762
+ dispatch:
9763
+ CPU: _upsample_bicubic2d_aa_out_cpu
9764
+ CUDA: _upsample_bicubic2d_aa_out_cuda
9765
+
9766
+ - func: _upsample_bicubic2d_aa(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
9767
+ python_module: nn
9768
+ structured_delegate: _upsample_bicubic2d_aa.out
9769
+
9770
+ - func: _upsample_bicubic2d_aa_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9771
+ python_module: nn
9772
+ structured: True
9773
+ dispatch:
9774
+ CPU: _upsample_bicubic2d_aa_backward_out_cpu
9775
+ CUDA: _upsample_bicubic2d_aa_backward_out_cuda
9776
+
9777
+ - func: _upsample_bicubic2d_aa_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
9778
+ python_module: nn
9779
+ structured_delegate: _upsample_bicubic2d_aa_backward.grad_input
9780
+
9350
9781
  - func: upsample_trilinear3d.out(Tensor self, int[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9351
9782
  python_module: nn
9352
9783
  structured: True
@@ -9376,10 +9807,21 @@
9376
9807
  CPU: upsample_nearest1d_out_cpu
9377
9808
  CUDA: upsample_nearest1d_out_cuda
9378
9809
 
9810
+ - func: _upsample_nearest_exact1d.out(Tensor self, int[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)
9811
+ python_module: nn
9812
+ structured: True
9813
+ dispatch:
9814
+ CPU: _upsample_nearest_exact1d_out_cpu
9815
+ CUDA: _upsample_nearest_exact1d_out_cuda
9816
+
9379
9817
  - func: upsample_nearest1d(Tensor self, int[1] output_size, float? scales=None) -> Tensor
9380
9818
  python_module: nn
9381
9819
  structured_delegate: upsample_nearest1d.out
9382
9820
 
9821
+ - func: _upsample_nearest_exact1d(Tensor self, int[1] output_size, float? scales=None) -> Tensor
9822
+ python_module: nn
9823
+ structured_delegate: _upsample_nearest_exact1d.out
9824
+
9383
9825
  - func: upsample_nearest1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9384
9826
  python_module: nn
9385
9827
  structured: True
@@ -9387,10 +9829,21 @@
9387
9829
  CPU: upsample_nearest1d_backward_out_cpu
9388
9830
  CUDA: upsample_nearest1d_backward_out_cuda
9389
9831
 
9832
+ - func: _upsample_nearest_exact1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9833
+ python_module: nn
9834
+ structured: True
9835
+ dispatch:
9836
+ CPU: _upsample_nearest_exact1d_backward_out_cpu
9837
+ CUDA: _upsample_nearest_exact1d_backward_out_cuda
9838
+
9390
9839
  - func: upsample_nearest1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None) -> Tensor
9391
9840
  python_module: nn
9392
9841
  structured_delegate: upsample_nearest1d_backward.grad_input
9393
9842
 
9843
+ - func: _upsample_nearest_exact1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None) -> Tensor
9844
+ python_module: nn
9845
+ structured_delegate: _upsample_nearest_exact1d_backward.grad_input
9846
+
9394
9847
  - func: upsample_nearest2d.out(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9395
9848
  python_module: nn
9396
9849
  structured: True
@@ -9398,12 +9851,25 @@
9398
9851
  CPU: upsample_nearest2d_out_cpu
9399
9852
  CUDA: upsample_nearest2d_out_cuda
9400
9853
 
9854
+ - func: _upsample_nearest_exact2d.out(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9855
+ python_module: nn
9856
+ structured: True
9857
+ dispatch:
9858
+ CPU: _upsample_nearest_exact2d_out_cpu
9859
+ CUDA: _upsample_nearest_exact2d_out_cuda
9860
+
9401
9861
  - func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
9402
9862
  python_module: nn
9403
9863
  structured_delegate: upsample_nearest2d.out
9404
9864
  dispatch:
9405
9865
  QuantizedCPU: upsample_nearest2d_quantized_cpu
9406
9866
 
9867
+ - func: _upsample_nearest_exact2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
9868
+ python_module: nn
9869
+ structured_delegate: _upsample_nearest_exact2d.out
9870
+ dispatch:
9871
+ QuantizedCPU: _upsample_nearest_exact2d_quantized_cpu
9872
+
9407
9873
  - func: upsample_nearest2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9408
9874
  python_module: nn
9409
9875
  structured: True
@@ -9411,10 +9877,21 @@
9411
9877
  CPU: upsample_nearest2d_backward_out_cpu
9412
9878
  CUDA: upsample_nearest2d_backward_out_cuda
9413
9879
 
9880
+ - func: _upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9881
+ python_module: nn
9882
+ structured: True
9883
+ dispatch:
9884
+ CPU: _upsample_nearest_exact2d_backward_out_cpu
9885
+ CUDA: _upsample_nearest_exact2d_backward_out_cuda
9886
+
9414
9887
  - func: upsample_nearest2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
9415
9888
  python_module: nn
9416
9889
  structured_delegate: upsample_nearest2d_backward.grad_input
9417
9890
 
9891
+ - func: _upsample_nearest_exact2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
9892
+ python_module: nn
9893
+ structured_delegate: _upsample_nearest_exact2d_backward.grad_input
9894
+
9418
9895
  - func: upsample_nearest3d.out(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9419
9896
  python_module: nn
9420
9897
  structured: True
@@ -9422,12 +9899,25 @@
9422
9899
  CPU: upsample_nearest3d_out_cpu
9423
9900
  CUDA: upsample_nearest3d_out_cuda
9424
9901
 
9902
+ - func: _upsample_nearest_exact3d.out(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9903
+ python_module: nn
9904
+ structured: True
9905
+ dispatch:
9906
+ CPU: _upsample_nearest_exact3d_out_cpu
9907
+ CUDA: _upsample_nearest_exact3d_out_cuda
9908
+
9425
9909
  - func: upsample_nearest3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
9426
9910
  python_module: nn
9427
9911
  structured_delegate: upsample_nearest3d.out
9428
9912
  dispatch:
9429
9913
  QuantizedCPU: upsample_nearest3d_quantized_cpu
9430
9914
 
9915
+ - func: _upsample_nearest_exact3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
9916
+ python_module: nn
9917
+ structured_delegate: _upsample_nearest_exact3d.out
9918
+ dispatch:
9919
+ QuantizedCPU: _upsample_nearest_exact3d_quantized_cpu
9920
+
9431
9921
  - func: upsample_nearest3d_backward.grad_input(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9432
9922
  python_module: nn
9433
9923
  structured: True
@@ -9435,10 +9925,21 @@
9435
9925
  CPU: upsample_nearest3d_backward_out_cpu
9436
9926
  CUDA: upsample_nearest3d_backward_out_cuda
9437
9927
 
9928
+ - func: _upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9929
+ python_module: nn
9930
+ structured: True
9931
+ dispatch:
9932
+ CPU: _upsample_nearest_exact3d_backward_out_cpu
9933
+ CUDA: _upsample_nearest_exact3d_backward_out_cuda
9934
+
9438
9935
  - func: upsample_nearest3d_backward(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
9439
9936
  python_module: nn
9440
9937
  structured_delegate: upsample_nearest3d_backward.grad_input
9441
9938
 
9939
+ - func: _upsample_nearest_exact3d_backward(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
9940
+ python_module: nn
9941
+ structured_delegate: _upsample_nearest_exact3d_backward.grad_input
9942
+
9442
9943
  - func: sigmoid_backward.grad_input(Tensor grad_output, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)
9443
9944
  python_module: nn
9444
9945
  structured: True
@@ -9501,18 +10002,6 @@
9501
10002
  python_module: nn
9502
10003
  structured_delegate: slow_conv_transpose2d.out
9503
10004
 
9504
- - func: slow_conv_transpose2d_backward.grad_output(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] output_padding, int[2] dilation, Tensor columns, Tensor ones, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
9505
- python_module: nn
9506
- dispatch:
9507
- CPU: slow_conv_transpose2d_backward_out_cpu
9508
- CUDA: slow_conv_transpose2d_backward_out_cuda
9509
-
9510
- - func: slow_conv_transpose2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] output_padding, int[2] dilation, Tensor columns, Tensor ones, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9511
- python_module: nn
9512
- dispatch:
9513
- CPU: slow_conv_transpose2d_backward_cpu
9514
- CUDA: slow_conv_transpose2d_backward_cuda
9515
-
9516
10005
  - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
9517
10006
  python_module: nn
9518
10007
  dispatch:
@@ -9525,43 +10014,31 @@
9525
10014
  CPU: slow_conv_transpose3d_cpu
9526
10015
  CUDA: slow_conv_transpose3d_cuda
9527
10016
 
9528
- - func: slow_conv_transpose3d_backward.grad_output(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] output_padding, int[3] dilation, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
9529
- python_module: nn
9530
- dispatch:
9531
- CPU: slow_conv_transpose3d_backward_out_cpu
9532
- CUDA: slow_conv_transpose3d_backward_out_cuda
9533
-
9534
- - func: slow_conv_transpose3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] output_padding, int[3] dilation, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9535
- python_module: nn
9536
- dispatch:
9537
- CPU: slow_conv_transpose3d_backward_cpu
9538
- CUDA: slow_conv_transpose3d_backward_cuda
9539
-
9540
10017
  - func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
9541
10018
  python_module: nn
9542
10019
 
9543
10020
  - func: thnn_conv2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0) -> Tensor
9544
10021
  python_module: nn
9545
10022
 
9546
- - func: thnn_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output, Tensor(b!) finput, Tensor(c!) fgrad_input) -> (Tensor(a!), Tensor(b!), Tensor(c!))
10023
+ - func: _slow_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output) -> Tensor(a!)
9547
10024
  python_module: nn
9548
10025
  dispatch:
9549
10026
  CPU: slow_conv2d_forward_out_cpu
9550
10027
  CUDA: slow_conv2d_forward_out_cuda
9551
10028
 
9552
- - func: thnn_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> (Tensor output, Tensor finput, Tensor fgrad_input)
10029
+ - func: _slow_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> Tensor
9553
10030
  python_module: nn
9554
10031
  dispatch:
9555
10032
  CPU: slow_conv2d_forward_cpu
9556
10033
  CUDA: slow_conv2d_forward_cuda
9557
10034
 
9558
- - func: thnn_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
10035
+ - func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
9559
10036
  python_module: nn
9560
10037
  dispatch:
9561
10038
  CPU: slow_conv2d_backward_out_cpu
9562
10039
  CUDA: slow_conv2d_backward_out_cuda
9563
10040
 
9564
- - func: thnn_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
10041
+ - func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9565
10042
  python_module: nn
9566
10043
  dispatch:
9567
10044
  CPU: slow_conv2d_backward_cpu
@@ -9578,81 +10055,39 @@
9578
10055
  dispatch:
9579
10056
  CUDA: conv_depthwise2d_cuda
9580
10057
 
9581
- - func: _conv_depthwise2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, *, Tensor(a!) grad_input, Tensor(b!) grad_weight) -> (Tensor(a!), Tensor(b!))
9582
- python_module: nn
9583
- dispatch:
9584
- CUDA: conv_depthwise2d_backward_cuda_out
9585
-
9586
- - func: _conv_depthwise2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool[2] output_mask) -> (Tensor grad_input, Tensor grad_weight)
9587
- python_module: nn
9588
- dispatch:
9589
- CUDA: conv_depthwise2d_backward_cuda
9590
-
9591
10058
  - func: conv_depthwise3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, int[3] dilation) -> Tensor
9592
10059
  python_module: nn
9593
10060
  dispatch:
9594
10061
  CUDA: conv_depthwise3d_cuda
9595
10062
 
9596
- - func: conv_depthwise3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
9597
- python_module: nn
9598
- dispatch:
9599
- CUDA: conv_depthwise3d_backward_cuda_out
9600
-
9601
- - func: conv_depthwise3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9602
- python_module: nn
9603
- dispatch:
9604
- CUDA: conv_depthwise3d_backward_cuda
9605
-
9606
10063
  - func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
9607
10064
  python_module: nn
9608
10065
 
9609
10066
  - func: slow_conv3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0) -> Tensor
9610
10067
  python_module: nn
9611
10068
 
9612
- - func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, *, Tensor(a!) output, Tensor(b!) finput, Tensor(c!) fgrad_input) -> (Tensor(a!), Tensor(b!), Tensor(c!))
10069
+ - func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, *, Tensor(a!) output) -> Tensor(a!)
9613
10070
  python_module: nn
9614
10071
  dispatch:
9615
10072
  CPU: slow_conv3d_forward_out_cpu
9616
10073
 
9617
- - func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding) -> (Tensor output, Tensor finput, Tensor fgrad_input)
10074
+ - func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding) -> Tensor
9618
10075
  python_module: nn
9619
10076
  dispatch:
9620
10077
  CPU: slow_conv3d_forward_cpu
9621
10078
 
9622
- - func: slow_conv3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
9623
- python_module: nn
9624
- dispatch:
9625
- CPU: slow_conv3d_backward_out_cpu
9626
-
9627
- - func: slow_conv3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9628
- python_module: nn
9629
- dispatch:
9630
- CPU: slow_conv3d_backward_cpu
9631
-
9632
10079
  - func: slow_conv_dilated2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1) -> Tensor
9633
10080
  python_module: nn
9634
10081
  dispatch:
9635
10082
  CPU: slow_conv_dilated2d_cpu
9636
10083
  CUDA: slow_conv_dilated2d_cuda
9637
10084
 
9638
- - func: slow_conv_dilated2d_backward(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9639
- python_module: nn
9640
- dispatch:
9641
- CPU: slow_conv_dilated2d_backward_cpu
9642
- CUDA: slow_conv_dilated2d_backward_cuda
9643
-
9644
10085
  - func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1) -> Tensor
9645
10086
  python_module: nn
9646
10087
  dispatch:
9647
10088
  CPU: slow_conv_dilated3d_cpu
9648
10089
  CUDA: slow_conv_dilated3d_cuda
9649
10090
 
9650
- - func: slow_conv_dilated3d_backward(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9651
- python_module: nn
9652
- dispatch:
9653
- CPU: slow_conv_dilated3d_backward_cpu
9654
- CUDA: slow_conv_dilated3d_backward_cuda
9655
-
9656
10091
  - func: col2im.out(Tensor self, int[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!)
9657
10092
  python_module: nn
9658
10093
  dispatch:
@@ -9714,6 +10149,10 @@
9714
10149
  variants: function, method
9715
10150
  device_check: NoCheck
9716
10151
  device_guard: False
10152
+ dispatch:
10153
+ CompositeExplicitAutograd: isinf
10154
+ SparseCPU, SparseCUDA: isinf_sparse
10155
+ SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
9717
10156
 
9718
10157
  - func: record_stream(Tensor(a!) self, Stream s) -> ()
9719
10158
  variants: method
@@ -9723,22 +10162,32 @@
9723
10162
  - func: isposinf(Tensor self) -> Tensor
9724
10163
  variants: function, method
9725
10164
  structured_delegate: isposinf.out
10165
+ dispatch:
10166
+ SparseCPU, SparseCUDA: isposinf_sparse
10167
+ SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr
9726
10168
 
9727
10169
  - func: isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
9728
10170
  structured: True
9729
10171
  structured_inherits: TensorIteratorBase
9730
10172
  dispatch:
9731
10173
  CPU, CUDA: isposinf_out
10174
+ SparseCPU, SparseCUDA: isposinf_sparse_out
10175
+ SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr_out
9732
10176
 
9733
10177
  - func: isneginf(Tensor self) -> Tensor
9734
10178
  variants: function, method
9735
10179
  structured_delegate: isneginf.out
10180
+ dispatch:
10181
+ SparseCPU, SparseCUDA: isneginf_sparse
10182
+ SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr
9736
10183
 
9737
10184
  - func: isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
9738
10185
  structured: True
9739
10186
  structured_inherits: TensorIteratorBase
9740
10187
  dispatch:
9741
10188
  CPU, CUDA: isneginf_out
10189
+ SparseCPU, SparseCUDA: isneginf_sparse_out
10190
+ SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr_out
9742
10191
 
9743
10192
  # NOTE [_add_batch_dim and _remove_batch_dim]
9744
10193
  # _add_batch_dim and _remove_batch_dim are meant to be used in the implementation
@@ -10065,11 +10514,11 @@
10065
10514
  python_module: special
10066
10515
  variants: function
10067
10516
 
10068
- - func: special_round(Tensor self) -> Tensor
10517
+ - func: special_round(Tensor self, *, int decimals=0) -> Tensor
10069
10518
  python_module: special
10070
10519
  variants: function
10071
10520
 
10072
- - func: special_round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
10521
+ - func: special_round.out(Tensor self, *, int decimals=0, Tensor(a!) out) -> Tensor(a!)
10073
10522
  python_module: special
10074
10523
  variants: function
10075
10524
 
@@ -10109,6 +10558,10 @@
10109
10558
  python_module: special
10110
10559
  variants: function
10111
10560
 
10561
+ - func: special_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
10562
+ python_module: special
10563
+ variants: function
10564
+
10112
10565
  ## Functions related to the fast Fourier transform and the torch.fft namespace
10113
10566
  # Note [FFT namespace binding]
10114
10567
  # Functions in the fft python module should have their names start with
@@ -10200,6 +10653,26 @@
10200
10653
  python_module: fft
10201
10654
  variants: function
10202
10655
 
10656
+ - func: fft_hfft2(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
10657
+ use_const_ref_for_mutable_tensors: True
10658
+ python_module: fft
10659
+ variants: function
10660
+
10661
+ - func: fft_hfft2.out(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
10662
+ use_const_ref_for_mutable_tensors: True
10663
+ python_module: fft
10664
+ variants: function
10665
+
10666
+ - func: fft_ihfft2(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
10667
+ use_const_ref_for_mutable_tensors: True
10668
+ python_module: fft
10669
+ variants: function
10670
+
10671
+ - func: fft_ihfft2.out(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
10672
+ use_const_ref_for_mutable_tensors: True
10673
+ python_module: fft
10674
+ variants: function
10675
+
10203
10676
  - func: fft_fftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
10204
10677
  python_module: fft
10205
10678
  variants: function
@@ -10232,6 +10705,26 @@
10232
10705
  python_module: fft
10233
10706
  variants: function
10234
10707
 
10708
+ - func: fft_hfftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
10709
+ use_const_ref_for_mutable_tensors: True
10710
+ python_module: fft
10711
+ variants: function
10712
+
10713
+ - func: fft_hfftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
10714
+ use_const_ref_for_mutable_tensors: True
10715
+ python_module: fft
10716
+ variants: function
10717
+
10718
+ - func: fft_ihfftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
10719
+ use_const_ref_for_mutable_tensors: True
10720
+ python_module: fft
10721
+ variants: function
10722
+
10723
+ - func: fft_ihfftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
10724
+ use_const_ref_for_mutable_tensors: True
10725
+ python_module: fft
10726
+ variants: function
10727
+
10235
10728
  - func: fft_fftfreq(int n, float d=1.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
10236
10729
  python_module: fft
10237
10730
  variants: function
@@ -10286,6 +10779,38 @@
10286
10779
  python_module: linalg
10287
10780
  variants: function
10288
10781
 
10782
+ - func: linalg_cross(Tensor self, Tensor other, *, int dim=-1) -> Tensor
10783
+ python_module: linalg
10784
+ variants: function
10785
+ dispatch:
10786
+ CPU, CUDA: linalg_cross
10787
+
10788
+ - func: linalg_cross.out(Tensor self, Tensor other, *, int dim=-1, Tensor(a!) out) -> Tensor(a!)
10789
+ python_module: linalg
10790
+ dispatch:
10791
+ CPU, CUDA: linalg_cross_out
10792
+
10793
+ # linalg.lu_factor
10794
+ - func: linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots)
10795
+ python_module: linalg
10796
+ variants: function
10797
+
10798
+ - func: linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots)
10799
+ python_module: linalg
10800
+ variants: function
10801
+
10802
+ - func: linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)
10803
+ python_module: linalg
10804
+ structured_delegate: linalg_lu_factor_ex.out
10805
+ variants: function
10806
+
10807
+ - func: linalg_lu_factor_ex.out(Tensor A, *, bool pivot=True, bool check_errors=False, Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info) -> (Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info)
10808
+ python_module: linalg
10809
+ variants: function
10810
+ structured: True
10811
+ dispatch:
10812
+ CPU, CUDA: linalg_lu_factor_ex_out
10813
+
10289
10814
  - func: linalg_det(Tensor self) -> Tensor
10290
10815
  python_module: linalg
10291
10816
  variants: function
@@ -10327,6 +10852,12 @@
10327
10852
  - func: linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
10328
10853
  python_module: linalg
10329
10854
 
10855
+ - func: linalg_matrix_exp(Tensor self) -> Tensor
10856
+ python_module: linalg
10857
+ variants: function
10858
+ dispatch:
10859
+ CPU, CUDA: linalg_matrix_exp
10860
+
10330
10861
  - func: linalg_slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)
10331
10862
  python_module: linalg
10332
10863
  variants: function
@@ -10467,18 +10998,30 @@
10467
10998
  - func: linalg_matrix_norm.str_ord_out(Tensor self, str ord='fro', int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
10468
10999
  python_module: linalg
10469
11000
 
10470
- - func: linalg_svd.U(Tensor self, bool full_matrices=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)
11001
+ # This function is exposes the `compute_uv` flag, which is then used to implement `linalg.svd` and
11002
+ # `linalg.svdvals` as composite functions that call this one
11003
+ - func: _linalg_svd(Tensor A, bool full_matrices=False, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor Vh)
11004
+ variants: function
11005
+ structured_delegate: _linalg_svd.U
11006
+
11007
+ - func: _linalg_svd.U(Tensor A, bool full_matrices=False, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)
11008
+ structured: True
11009
+ dispatch:
11010
+ CPU, CUDA: _linalg_svd_out
11011
+
11012
+ - func: linalg_svd(Tensor A, bool full_matrices=True) -> (Tensor U, Tensor S, Tensor Vh)
10471
11013
  python_module: linalg
11014
+ variants: function
10472
11015
 
10473
- - func: linalg_svd(Tensor self, bool full_matrices=True) -> (Tensor U, Tensor S, Tensor Vh)
11016
+ - func: linalg_svd.U(Tensor A, bool full_matrices=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)
10474
11017
  python_module: linalg
10475
11018
  variants: function
10476
11019
 
10477
- - func: linalg_svdvals(Tensor input) -> Tensor
11020
+ - func: linalg_svdvals(Tensor A) -> Tensor
10478
11021
  python_module: linalg
10479
11022
  variants: function
10480
11023
 
10481
- - func: linalg_svdvals.out(Tensor input, *, Tensor(a!) out) -> Tensor(a!)
11024
+ - func: linalg_svdvals.out(Tensor A, *, Tensor(a!) out) -> Tensor(a!)
10482
11025
  python_module: linalg
10483
11026
  variants: function
10484
11027
 
@@ -10498,7 +11041,29 @@
10498
11041
  python_module: linalg
10499
11042
  variants: function
10500
11043
 
10501
- - func: linalg_pinv(Tensor self, float rcond=1e-15, bool hermitian=False) -> Tensor
11044
+ - func: linalg_pinv.atol_rtol_tensor(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor
11045
+ python_module: linalg
11046
+ variants: function
11047
+ dispatch:
11048
+ CompositeExplicitAutograd: linalg_pinv
11049
+
11050
+ - func: linalg_pinv.atol_rtol_tensor_out(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
11051
+ python_module: linalg
11052
+ variants: function
11053
+ dispatch:
11054
+ CompositeExplicitAutograd: linalg_pinv_out
11055
+
11056
+ - func: linalg_pinv.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor
11057
+ cpp_no_default_args: ['atol', 'rtol']
11058
+ python_module: linalg
11059
+ variants: function
11060
+
11061
+ - func: linalg_pinv.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
11062
+ cpp_no_default_args: ['atol', 'rtol']
11063
+ python_module: linalg
11064
+ variants: function
11065
+
11066
+ - func: linalg_pinv(Tensor self, float rcond, bool hermitian=False) -> Tensor
10502
11067
  python_module: linalg
10503
11068
  variants: function
10504
11069
 
@@ -10506,7 +11071,7 @@
10506
11071
  python_module: linalg
10507
11072
  variants: function
10508
11073
 
10509
- - func: linalg_pinv.out(Tensor self, float rcond=1e-15, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
11074
+ - func: linalg_pinv.out(Tensor self, float rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
10510
11075
  python_module: linalg
10511
11076
  variants: function
10512
11077
 
@@ -10565,11 +11130,29 @@
10565
11130
  - func: linalg_matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)
10566
11131
  python_module: linalg
10567
11132
 
10568
- - func: linalg_matrix_rank(Tensor self, float? tol=None, bool hermitian=False) -> Tensor
11133
+ - func: linalg_matrix_rank.atol_rtol_tensor(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor
10569
11134
  python_module: linalg
10570
11135
  variants: function
10571
11136
 
10572
- - func: linalg_matrix_rank.out(Tensor self, float? tol=None, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
11137
+ - func: linalg_matrix_rank.atol_rtol_tensor_out(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
11138
+ python_module: linalg
11139
+ variants: function
11140
+
11141
+ - func: linalg_matrix_rank.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor
11142
+ cpp_no_default_args: ['atol', 'rtol']
11143
+ python_module: linalg
11144
+ variants: function
11145
+
11146
+ - func: linalg_matrix_rank.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
11147
+ cpp_no_default_args: ['atol', 'rtol']
11148
+ python_module: linalg
11149
+ variants: function
11150
+
11151
+ - func: linalg_matrix_rank(Tensor self, float tol, bool hermitian=False) -> Tensor
11152
+ python_module: linalg
11153
+ variants: function
11154
+
11155
+ - func: linalg_matrix_rank.out(Tensor self, float tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
10573
11156
  python_module: linalg
10574
11157
  variants: function
10575
11158
 
@@ -10622,6 +11205,12 @@
10622
11205
  cpp_no_default_args: ['a', 'b']
10623
11206
  python_module: nn
10624
11207
 
11208
+ # Note: this function is only for testing.
11209
+ - func: _test_warn_in_autograd(Tensor self) -> Tensor
11210
+ python_module: nn
11211
+ dispatch:
11212
+ CompositeExplicitAutograd: _test_warn_in_autograd
11213
+
10625
11214
  - func: segment_reduce(Tensor data, str reduce, *, Tensor? lengths=None, Tensor? indices=None, int axis=0, bool unsafe=False, Scalar? initial=None) -> Tensor
10626
11215
  variants: function
10627
11216
  dispatch: