torch-rb 0.9.2 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -100,10 +100,49 @@
100
100
 
101
101
  - func: _make_dual(Tensor(a) primal, Tensor tangent, int level) -> Tensor(a)
102
102
  variants: function
103
+ dispatch:
104
+ CompositeExplicitAutograd: _make_dual
103
105
 
104
106
  - func: _unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent)
105
107
  variants: function
106
108
 
109
+ # NOTE: [_new_zeros_with_same_feature_meta]
110
+ # This function creates a new tensor with the layout and TensorOptions
111
+ # of `other` but also takes into account the batch dimensions of `self`
112
+ #
113
+ # This function has a couple extra constraints because it is also used for `jvp`
114
+ # in functorch.
115
+ # - is used for forward AD because there is the restriction
116
+ # that the primal and tangent must have the same layout
117
+ # - We cannot assume that `self` and `other` have the same sizes or even dim
118
+ # because in the inplace over view case, `other` is the base tensor, and
119
+ # `self` is the forward grad with respect to the view, which can have an
120
+ # entirely different shape
121
+ # - takes the number of batch dims for `self` because we also handle
122
+ # some batching logic. We handle that here instead of a batching rule because
123
+ # we'd like to avoid calling as_strided in the batching rule (as to enable
124
+ # nested vmap in functorch).
125
+ # - needs to be CompositeExplicitAutograd for jvp support in functorch.
126
+ # functorch currently relies on TensorWrapper which does not have storage
127
+ # CompositeExplicitAutograd makes sure the TensorWrapper is unwrapped.
128
+ # - this function may eventually take on another int argument to store the
129
+ # the number of batch dims for other once we support that use case
130
+ - func: _new_zeros_with_same_feature_meta(Tensor self, Tensor other, *, int self_num_batch_dims=0) -> Tensor
131
+ variants: function
132
+ dispatch:
133
+ CompositeExplicitAutograd: _new_zeros_with_same_feature_meta
134
+
135
+ # This function compares the storage numel of self with that of other, where
136
+ # storage numel is cumputed as: `other.storage().nbytes() / other.itemsize()`.
137
+ # We create this function for composite compliance purposes. The batching rule
138
+ # always returns true because vmapped as_strided does not support accessing
139
+ # storage locations not indexable by the input tensor.
140
+ # See the note above for more information.
141
+ - func: _has_same_storage_numel(Tensor self, Tensor other) -> bool
142
+ variants: function
143
+ dispatch:
144
+ CompositeExplicitAutograd: _has_same_storage_numel
145
+
107
146
  - func: rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!)
108
147
  variants: method
109
148
 
@@ -176,6 +215,17 @@
176
215
  dispatch:
177
216
  CUDA: masked_scale_cuda
178
217
 
218
+ - func: native_dropout(Tensor input, float p, bool? train) -> (Tensor, Tensor)
219
+ variants: function
220
+ dispatch:
221
+ CPU: native_dropout_cpu
222
+ CUDA: native_dropout_cuda
223
+
224
+ - func: native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor
225
+ dispatch:
226
+ CPU: native_dropout_backward_cpu
227
+ CUDA: native_dropout_backward_cuda
228
+
179
229
  - func: _sobol_engine_draw(Tensor quasi, int n, Tensor sobolstate, int dimension, int num_generated, ScalarType? dtype) -> (Tensor, Tensor)
180
230
 
181
231
  - func: _sobol_engine_ff_(Tensor(a!) self, int n, Tensor sobolstate, int dimension, int num_generated) -> Tensor(a!)
@@ -209,17 +259,23 @@
209
259
  variants: function, method
210
260
  dispatch:
211
261
  CompositeExplicitAutograd: abs
262
+ SparseCPU, SparseCUDA: abs_sparse
263
+ SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr
212
264
 
213
265
  - func: abs_(Tensor(a!) self) -> Tensor(a!)
214
266
  device_check: NoCheck # TensorIterator
215
267
  variants: function, method
216
268
  dispatch:
217
269
  CompositeExplicitAutograd: abs_
270
+ SparseCPU, SparseCUDA: abs_sparse_
271
+ SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_
218
272
 
219
273
  - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
220
274
  device_check: NoCheck # TensorIterator
221
275
  dispatch:
222
276
  CPU, CUDA: abs_out
277
+ SparseCPU, SparseCUDA: abs_sparse_out
278
+ SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_out
223
279
 
224
280
  # Note [Adding an alias]
225
281
  # To add an alias do the following:
@@ -231,18 +287,15 @@
231
287
  # will stop it from "inheriting" the original operation's autograd behavior.
232
288
  # 2) Implement the corresponding functions and have them redispatch to the
233
289
  # original function.
234
- # 3) Add entries for the alias (and original function, if needed) to
235
- # aten/src/ATen/core/interned_strings.h
236
- # (This may require removing an entry from ATen/core/aten_interned_strings.h.)
237
- # 4) Add docstrings to the new function that reference the original function,
290
+ # 3) Add docstrings to the new function that reference the original function,
238
291
  # and document the method as usual (if it exists.)
239
292
  # (See torch/_torch_docs.py and docs/source/torch.rst if adding a function,
240
293
  # torch/_tensor_docs.py and docs/source/tensors.rst if adding a method,
241
294
  # or module-specific doc bindings (like torch/linalg/__init__.py) if
242
295
  # adding an alias in a namespace.)
243
- # 5) Update torch/overrides.py consistent with the original function.
244
- # 6) Update the alias_map in torch/csrc/jit/passes/normalize_ops.cpp.
245
- # 7) Add aliases argument to existing OpInfo/UnaryUfuncInfo or create new OpInfo/UnaryUfuncInfo entry
296
+ # 4) Update torch/overrides.py consistent with the original function.
297
+ # 5) Update the alias_map in torch/csrc/jit/passes/normalize_ops.cpp.
298
+ # 6) Add aliases argument to existing OpInfo/UnaryUfuncInfo or create new OpInfo/UnaryUfuncInfo entry
246
299
  # in op_db list in torch/testing/_internal/common_methods_invocations.py
247
300
  #
248
301
  # See torch.absolute, an alias for torch.abs, as an example.
@@ -264,11 +317,13 @@
264
317
  variants: function, method
265
318
  dispatch:
266
319
  CPU, CUDA: angle
320
+ SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr
267
321
 
268
322
  - func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
269
323
  device_check: NoCheck # TensorIterator
270
324
  dispatch:
271
325
  CPU, CUDA: angle_out
326
+ SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr_out
272
327
 
273
328
  - func: view_as_real(Tensor(a) self) -> Tensor(a)
274
329
  variants: function
@@ -283,16 +338,24 @@
283
338
  - func: sgn(Tensor self) -> Tensor
284
339
  variants: function, method
285
340
  structured_delegate: sgn.out
341
+ dispatch:
342
+ SparseCPU, SparseCUDA: sgn_sparse
343
+ SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr
286
344
 
287
345
  - func: sgn_(Tensor(a!) self) -> Tensor(a!)
288
346
  variants: method
289
347
  structured_delegate: sgn.out
348
+ dispatch:
349
+ SparseCPU, SparseCUDA: sgn_sparse_
350
+ SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_
290
351
 
291
352
  - func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
292
353
  structured: True
293
354
  structured_inherits: TensorIteratorBase
294
355
  dispatch:
295
356
  CPU, CUDA: sgn_out
357
+ SparseCPU, SparseCUDA: sgn_sparse_out
358
+ SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
296
359
 
297
360
  - func: real(Tensor(a) self) -> Tensor(a)
298
361
  device_check: NoCheck # TensorIterator
@@ -315,6 +378,7 @@
315
378
  variants: function, method
316
379
  dispatch:
317
380
  CompositeExplicitAutograd: _conj_physical
381
+ SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr
318
382
 
319
383
  - func: conj_physical(Tensor self) -> Tensor
320
384
  variants: function, method
@@ -323,11 +387,13 @@
323
387
  dispatch:
324
388
  CPU, CUDA: conj_physical_out
325
389
  SparseCPU, SparseCUDA: conj_physical_out_sparse
390
+ SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
326
391
 
327
392
  - func: conj_physical_(Tensor(a!) self) -> Tensor(a!)
328
393
  variants: function, method
329
394
  dispatch:
330
395
  CompositeExplicitAutograd: conj_physical_
396
+ SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_
331
397
 
332
398
  - func: resolve_conj(Tensor(a) self) -> Tensor(a)
333
399
  variants: function, method
@@ -381,6 +447,7 @@
381
447
  SparseCPU, SparseCUDA: add_sparse
382
448
  SparseCsrCPU, SparseCsrCUDA: add_sparse_csr
383
449
  MkldnnCPU: mkldnn_add
450
+ ZeroTensor: add_zerotensor
384
451
 
385
452
  - func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
386
453
  device_check: NoCheck # TensorIterator
@@ -454,6 +521,8 @@
454
521
  dispatch:
455
522
  CPU: addmv_out_cpu
456
523
  CUDA: addmv_out_cuda
524
+ SparseCsrCPU: addmv_out_sparse_csr
525
+ SparseCsrCUDA: addmv_out_sparse_csr_cuda
457
526
 
458
527
  - func: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
459
528
  variants: function, method
@@ -532,7 +601,7 @@
532
601
 
533
602
  - func: arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
534
603
  dispatch:
535
- CPU: arange_cpu_out
604
+ CPU, Meta: arange_out
536
605
  CUDA: arange_cuda_out
537
606
 
538
607
  # This function is a temporary hack to allow tracing of arange like constructs with dynamic
@@ -588,16 +657,24 @@
588
657
  - func: asinh(Tensor self) -> Tensor
589
658
  variants: function, method
590
659
  structured_delegate: asinh.out
660
+ dispatch:
661
+ SparseCPU, SparseCUDA: asinh_sparse
662
+ SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr
591
663
 
592
664
  - func: asinh_(Tensor(a!) self) -> Tensor(a!)
593
665
  variants: function, method
594
666
  structured_delegate: asinh.out
667
+ dispatch:
668
+ SparseCPU, SparseCUDA: asinh_sparse_
669
+ SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_
595
670
 
596
671
  - func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
597
672
  structured: True
598
673
  structured_inherits: TensorIteratorBase
599
674
  dispatch:
600
675
  CPU, CUDA: asinh_out
676
+ SparseCPU, SparseCUDA: asinh_sparse_out
677
+ SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_out
601
678
 
602
679
  # arcsinh, alias for asinh
603
680
  - func: arcsinh(Tensor self) -> Tensor
@@ -611,16 +688,25 @@
611
688
  - func: atanh(Tensor self) -> Tensor
612
689
  structured_delegate: atanh.out
613
690
  variants: function, method
691
+ dispatch:
692
+ CompositeExplicitAutograd: atanh
693
+ SparseCPU, SparseCUDA: atanh_sparse
694
+ SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr
614
695
 
615
696
  - func: atanh_(Tensor(a!) self) -> Tensor(a!)
616
697
  structured_delegate: atanh.out
617
698
  variants: function, method
699
+ dispatch:
700
+ SparseCPU, SparseCUDA: atanh_sparse_
701
+ SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_
618
702
 
619
703
  - func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
620
704
  structured: True
621
705
  structured_inherits: TensorIteratorBase
622
706
  dispatch:
623
707
  CPU, CUDA: atanh_out
708
+ SparseCPU, SparseCUDA: atanh_sparse_out
709
+ SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_out
624
710
 
625
711
  # arctanh, alias for atanh
626
712
  - func: arctanh(Tensor self) -> Tensor
@@ -634,7 +720,7 @@
634
720
  - func: as_strided(Tensor(a) self, int[] size, int[] stride, int? storage_offset=None) -> Tensor(a)
635
721
  variants: function, method
636
722
  dispatch:
637
- CPU, CUDA, Meta: as_strided_tensorimpl
723
+ ZeroTensor, CPU, CUDA, Meta: as_strided_tensorimpl
638
724
  QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
639
725
  device_check: NoCheck
640
726
  device_guard: False
@@ -644,6 +730,7 @@
644
730
  variants: function, method
645
731
  device_check: NoCheck
646
732
  device_guard: False
733
+ tags: inplace_view
647
734
  dispatch:
648
735
  CompositeExplicitAutograd: as_strided_
649
736
 
@@ -653,6 +740,7 @@
653
740
  structured_delegate: asin.out
654
741
  dispatch:
655
742
  SparseCPU, SparseCUDA: asin_sparse
743
+ SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr
656
744
 
657
745
  - func: asin_(Tensor(a!) self) -> Tensor(a!)
658
746
  device_check: NoCheck # TensorIterator
@@ -660,6 +748,7 @@
660
748
  structured_delegate: asin.out
661
749
  dispatch:
662
750
  SparseCPU, SparseCUDA: asin_sparse_
751
+ SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_
663
752
 
664
753
  - func: asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
665
754
  device_check: NoCheck # TensorIterator
@@ -667,7 +756,8 @@
667
756
  structured_inherits: TensorIteratorBase
668
757
  dispatch:
669
758
  CPU, CUDA: asin_out
670
- SparseCPU, SparseCUDA: asin_out_sparse
759
+ SparseCPU, SparseCUDA: asin_sparse_out
760
+ SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_out
671
761
 
672
762
  # arcsin, alias of asin
673
763
  - func: arcsin(Tensor self) -> Tensor
@@ -682,11 +772,17 @@
682
772
  device_check: NoCheck # TensorIterator
683
773
  structured_delegate: atan.out
684
774
  variants: function, method
775
+ dispatch:
776
+ SparseCPU, SparseCUDA: atan_sparse
777
+ SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr
685
778
 
686
779
  - func: atan_(Tensor(a!) self) -> Tensor(a!)
687
780
  device_check: NoCheck # TensorIterator
688
781
  structured_delegate: atan.out
689
782
  variants: function, method
783
+ dispatch:
784
+ SparseCPU, SparseCUDA: atan_sparse_
785
+ SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_
690
786
 
691
787
  - func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
692
788
  device_check: NoCheck # TensorIterator
@@ -694,6 +790,8 @@
694
790
  structured_inherits: TensorIteratorBase
695
791
  dispatch:
696
792
  CPU, CUDA: atan_out
793
+ SparseCPU, SparseCUDA: atan_sparse_out
794
+ SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_out
697
795
 
698
796
  # arctan, alias of atan
699
797
  - func: arctan(Tensor self) -> Tensor
@@ -723,24 +821,19 @@
723
821
 
724
822
  - func: baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
725
823
  variants: function, method
726
- dispatch:
727
- CPU: baddbmm_cpu
728
- CUDA: baddbmm_cuda
824
+ structured_delegate: baddbmm.out
729
825
 
730
826
  - func: baddbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
731
827
  variants: method
732
- dispatch:
733
- CPU: baddbmm__cpu
734
- CUDA: baddbmm__cuda
735
-
736
- - func: _baddbmm_mkl_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
737
- variants: function
828
+ structured_delegate: baddbmm.out
738
829
 
739
830
  - func: baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
831
+ structured: True
740
832
  variants: function
741
833
  dispatch:
742
834
  CPU: baddbmm_out_cpu
743
835
  CUDA: baddbmm_out_cuda
836
+ SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
744
837
 
745
838
  - func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
746
839
 
@@ -788,7 +881,7 @@
788
881
  device_check: NoCheck # TensorIterator
789
882
  variants: function, method
790
883
 
791
- - func: bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias) -> Tensor
884
+ - func: bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias=None) -> Tensor
792
885
 
793
886
  - func: binary_cross_entropy(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor
794
887
  device_check: NoCheck # TensorIterator
@@ -886,10 +979,14 @@
886
979
  - func: logical_not(Tensor self) -> Tensor
887
980
  device_check: NoCheck # TensorIterator
888
981
  variants: function, method
982
+ dispatch:
983
+ CompositeExplicitAutograd: logical_not
889
984
 
890
985
  - func: logical_not_(Tensor(a!) self) -> Tensor(a!)
891
986
  device_check: NoCheck # TensorIterator
892
987
  variants: method
988
+ dispatch:
989
+ CompositeExplicitAutograd: logical_not_
893
990
 
894
991
  - func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
895
992
  device_check: NoCheck # TensorIterator
@@ -899,10 +996,14 @@
899
996
  - func: logical_xor(Tensor self, Tensor other) -> Tensor
900
997
  device_check: NoCheck # TensorIterator
901
998
  variants: function, method
999
+ dispatch:
1000
+ CompositeExplicitAutograd: logical_xor
902
1001
 
903
1002
  - func: logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!)
904
1003
  device_check: NoCheck # TensorIterator
905
1004
  variants: method
1005
+ dispatch:
1006
+ CompositeExplicitAutograd: logical_xor_
906
1007
 
907
1008
  - func: logical_xor.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
908
1009
  device_check: NoCheck # TensorIterator
@@ -912,10 +1013,14 @@
912
1013
  - func: logical_and(Tensor self, Tensor other) -> Tensor
913
1014
  device_check: NoCheck # TensorIterator
914
1015
  variants: function, method
1016
+ dispatch:
1017
+ CompositeExplicitAutograd: logical_and
915
1018
 
916
1019
  - func: logical_and_(Tensor(a!) self, Tensor other) -> Tensor(a!)
917
1020
  device_check: NoCheck # TensorIterator
918
1021
  variants: method
1022
+ dispatch:
1023
+ CompositeExplicitAutograd: logical_and_
919
1024
 
920
1025
  - func: logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
921
1026
  device_check: NoCheck # TensorIterator
@@ -925,10 +1030,14 @@
925
1030
  - func: logical_or(Tensor self, Tensor other) -> Tensor
926
1031
  device_check: NoCheck # TensorIterator
927
1032
  variants: function, method
1033
+ dispatch:
1034
+ CompositeExplicitAutograd: logical_or
928
1035
 
929
1036
  - func: logical_or_(Tensor(a!) self, Tensor other) -> Tensor(a!)
930
1037
  device_check: NoCheck # TensorIterator
931
1038
  variants: method
1039
+ dispatch:
1040
+ CompositeExplicitAutograd: logical_or_
932
1041
 
933
1042
  - func: logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
934
1043
  device_check: NoCheck # TensorIterator
@@ -940,20 +1049,21 @@
940
1049
  - func: blackman_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
941
1050
 
942
1051
  - func: bmm(Tensor self, Tensor mat2) -> Tensor
1052
+ structured_delegate: bmm.out
943
1053
  variants: function, method
944
1054
  dispatch:
945
- CPU: bmm_cpu
946
- CUDA: bmm_cuda
947
1055
  SparseCPU: bmm_sparse_cpu
948
1056
  SparseCUDA: bmm_sparse_cuda
949
1057
 
950
1058
  - func: bmm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
1059
+ structured: True
951
1060
  variants: function
952
1061
  dispatch:
953
1062
  CPU: bmm_out_cpu
954
1063
  CUDA: bmm_out_cuda
955
1064
  SparseCPU: bmm_out_sparse_cpu
956
1065
  SparseCUDA: bmm_out_sparse_cuda
1066
+ SparseCsrCUDA: bmm_out_sparse_csr_cuda
957
1067
 
958
1068
  - func: broadcast_tensors(Tensor[] tensors) -> Tensor[]
959
1069
  device_check: NoCheck
@@ -962,6 +1072,11 @@
962
1072
  - func: broadcast_to(Tensor(a) self, int[] size) -> Tensor(a)
963
1073
  variants: function, method
964
1074
 
1075
+ - func: _sparse_broadcast_to(Tensor(a) self, int[] size) -> Tensor(a)
1076
+ variants: function
1077
+ dispatch:
1078
+ SparseCPU, SparseCUDA: sparse_broadcast_to
1079
+
965
1080
  - func: cat(Tensor[] tensors, int dim=0) -> Tensor
966
1081
  dispatch:
967
1082
  CompositeExplicitAutograd: cat
@@ -992,6 +1107,8 @@
992
1107
  variants: function, method
993
1108
  dispatch:
994
1109
  CompositeExplicitAutograd: ceil
1110
+ SparseCPU, SparseCUDA: ceil_sparse
1111
+ SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr
995
1112
 
996
1113
  - func: ceil_(Tensor(a!) self) -> Tensor(a!)
997
1114
  device_check: NoCheck # TensorIterator
@@ -999,6 +1116,8 @@
999
1116
  variants: function, method
1000
1117
  dispatch:
1001
1118
  CompositeExplicitAutograd: ceil_
1119
+ SparseCPU, SparseCUDA: ceil_sparse_
1120
+ SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_
1002
1121
 
1003
1122
  - func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
1004
1123
  device_check: NoCheck # TensorIterator
@@ -1006,6 +1125,8 @@
1006
1125
  structured_inherits: TensorIteratorBase
1007
1126
  dispatch:
1008
1127
  CPU, CUDA: ceil_out
1128
+ SparseCPU, SparseCUDA: ceil_sparse_out
1129
+ SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_out
1009
1130
 
1010
1131
  # alias for torch.linalg.multi_dot
1011
1132
  - func: chain_matmul(Tensor[] matrices) -> Tensor
@@ -1019,18 +1140,18 @@
1019
1140
  device_check: NoCheck
1020
1141
  device_guard: False
1021
1142
 
1022
- - func: chunk(Tensor(a) self, int chunks, int dim=0) -> Tensor(a)[]
1143
+ - func: chunk(Tensor(a -> *) self, int chunks, int dim=0) -> Tensor(a)[]
1023
1144
  variants: function, method
1024
1145
  device_check: NoCheck
1025
1146
  device_guard: False
1026
1147
 
1027
- - func: tensor_split.sections(Tensor(a) self, int sections, int dim=0) -> Tensor(a)[]
1148
+ - func: tensor_split.sections(Tensor(a -> *) self, int sections, int dim=0) -> Tensor(a)[]
1028
1149
  variants: function, method
1029
1150
 
1030
- - func: tensor_split.indices(Tensor(a) self, int[] indices, int dim=0) -> Tensor(a)[]
1151
+ - func: tensor_split.indices(Tensor(a -> *) self, int[] indices, int dim=0) -> Tensor(a)[]
1031
1152
  variants: function, method
1032
1153
 
1033
- - func: tensor_split.tensor_indices_or_sections(Tensor(a) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[]
1154
+ - func: tensor_split.tensor_indices_or_sections(Tensor(a -> *) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[]
1034
1155
  variants: function, method
1035
1156
 
1036
1157
  - func: clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor
@@ -1186,6 +1307,12 @@
1186
1307
  manual_cpp_binding: True
1187
1308
 
1188
1309
  - func: convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
1310
+ dispatch:
1311
+ CompositeExplicitAutograd: convolution
1312
+
1313
+ - func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, int[]? bias_sizes, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1314
+ dispatch:
1315
+ CompositeExplicitAutograd, CUDA: convolution_backward
1189
1316
 
1190
1317
  - func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
1191
1318
  dispatch:
@@ -1196,14 +1323,14 @@
1196
1323
  CompositeExplicitAutograd: convolution_backward_overrideable
1197
1324
 
1198
1325
  - func: _convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
1326
+ dispatch:
1327
+ CompositeExplicitAutograd: _convolution
1199
1328
 
1200
1329
  - func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
1201
1330
 
1202
1331
  - func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[] dilation, int groups) -> Tensor
1203
1332
 
1204
- - func: _convolution_nogroup(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding) -> Tensor
1205
-
1206
- - func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1333
+ - func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
1207
1334
 
1208
1335
  - func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, int[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor
1209
1336
 
@@ -1239,7 +1366,9 @@
1239
1366
  device_guard: False
1240
1367
  dispatch:
1241
1368
  MkldnnCPU: copy_mkldnn_
1369
+ SparseCPU, SparseCUDA, SparseHIP: copy_sparse_wrapper_
1242
1370
  CompositeExplicitAutograd: copy_
1371
+ SparseCsrCPU, SparseCsrCUDA: copy_sparse_csr_
1243
1372
 
1244
1373
  - func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
1245
1374
  dispatch: {}
@@ -1320,56 +1449,14 @@
1320
1449
  dispatch:
1321
1450
  CUDA: cudnn_batch_norm_backward
1322
1451
 
1323
- - func: cudnn_convolution.deprecated(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
1324
- dispatch:
1325
- CUDA: cudnn_convolution_deprecated
1326
-
1327
- - func: cudnn_convolution.deprecated2(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
1328
- dispatch:
1329
- CUDA: cudnn_convolution_deprecated2
1330
-
1331
1452
  - func: cudnn_convolution(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1332
1453
  dispatch:
1333
1454
  CUDA: cudnn_convolution
1334
1455
 
1335
- - func: cudnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1336
- dispatch:
1337
- CUDA: cudnn_convolution_backward_input
1338
-
1339
- - func: cudnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32, bool[2] output_mask) -> (Tensor, Tensor)
1340
- dispatch:
1341
- CUDA: cudnn_convolution_backward
1342
-
1343
- - func: cudnn_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1344
- dispatch:
1345
- CUDA: cudnn_convolution_backward_weight
1346
-
1347
- - func: cudnn_convolution_transpose.deprecated(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
1348
- dispatch:
1349
- CUDA: cudnn_convolution_transpose_deprecated
1350
-
1351
- - func: cudnn_convolution_transpose.deprecated2(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
1352
- dispatch:
1353
- CUDA: cudnn_convolution_transpose_deprecated2
1354
-
1355
1456
  - func: cudnn_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1356
1457
  dispatch:
1357
1458
  CUDA: cudnn_convolution_transpose
1358
1459
 
1359
- # NB: output_padding not strictly needed here, but it's helpful for the float
1360
- # backwards
1361
- - func: cudnn_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32, bool[2] output_mask) -> (Tensor, Tensor)
1362
- dispatch:
1363
- CUDA: cudnn_convolution_transpose_backward
1364
-
1365
- - func: cudnn_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1366
- dispatch:
1367
- CUDA: cudnn_convolution_transpose_backward_input
1368
-
1369
- - func: cudnn_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
1370
- dispatch:
1371
- CUDA: cudnn_convolution_transpose_backward_weight
1372
-
1373
1460
  - func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
1374
1461
  dispatch:
1375
1462
  CUDA: cudnn_convolution_relu
@@ -1516,6 +1603,8 @@
1516
1603
 
1517
1604
  - func: diag_embed(Tensor self, int offset=0, int dim1=-2, int dim2=-1) -> Tensor
1518
1605
  variants: function, method
1606
+ dispatch:
1607
+ CompositeExplicitAutograd: diag_embed
1519
1608
 
1520
1609
  - func: diagflat(Tensor self, int offset=0) -> Tensor
1521
1610
  variants: function, method
@@ -1525,6 +1614,10 @@
1525
1614
  dispatch:
1526
1615
  CompositeExplicitAutograd: diagonal
1527
1616
 
1617
+ - func: linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a)
1618
+ python_module: linalg
1619
+ variants: function
1620
+
1528
1621
  - func: diagonal.Dimname(Tensor(a) self, *, Dimname outdim, Dimname dim1, Dimname dim2, int offset=0) -> Tensor(a)
1529
1622
  variants: function, method
1530
1623
 
@@ -1571,6 +1664,7 @@
1571
1664
  structured_delegate: div.out
1572
1665
  dispatch:
1573
1666
  SparseCPU, SparseCUDA: div_sparse
1667
+ ZeroTensor: div_zerotensor
1574
1668
 
1575
1669
  - func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
1576
1670
  device_check: NoCheck # TensorIterator
@@ -1781,12 +1875,19 @@
1781
1875
  Meta: empty_meta
1782
1876
  MkldnnCPU: empty_mkldnn
1783
1877
  SparseCPU, SparseCUDA: empty_sparse
1878
+ SparseCsrCPU, SparseCsrCUDA: empty_sparse_csr
1784
1879
 
1880
+ # We do not make new_empty a composite that calls into new_empty_strided, as the strided version
1881
+ # is significantly more difficult to implement by different backends
1785
1882
  - func: new_empty(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
1786
1883
  variants: method
1884
+ dispatch:
1885
+ CompositeExplicitAutograd: new_empty
1787
1886
 
1788
1887
  - func: new_empty_strided(Tensor self, int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
1789
1888
  variants: method
1889
+ dispatch:
1890
+ CompositeExplicitAutograd: new_empty_strided
1790
1891
 
1791
1892
  - func: new_full(Tensor self, int[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
1792
1893
  variants: method
@@ -1820,6 +1921,7 @@
1820
1921
  CPU, Meta: resize_
1821
1922
  CUDA: resize_cuda_
1822
1923
  QuantizedCPU: quantized_resize_cpu_
1924
+ SparseCsrCPU, SparseCsrCUDA: resize_sparse_csr_
1823
1925
 
1824
1926
  - func: empty_quantized(int[] size, Tensor qtensor, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
1825
1927
  category_override: factory
@@ -1834,6 +1936,10 @@
1834
1936
  - func: empty_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
1835
1937
  device_check: NoCheck
1836
1938
  device_guard: False
1939
+ dispatch:
1940
+ CompositeExplicitAutograd: empty_like
1941
+ SparseCPU, SparseCUDA: empty_like_sparse_coo
1942
+ SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
1837
1943
 
1838
1944
  - func: empty_strided(int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
1839
1945
  dispatch:
@@ -1845,11 +1951,17 @@
1845
1951
  device_check: NoCheck # TensorIterator
1846
1952
  structured_delegate: erf.out
1847
1953
  variants: function, method
1954
+ dispatch:
1955
+ SparseCPU, SparseCUDA: erf_sparse
1956
+ SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr
1848
1957
 
1849
1958
  - func: erf_(Tensor(a!) self) -> Tensor(a!)
1850
1959
  device_check: NoCheck # TensorIterator
1851
1960
  structured_delegate: erf.out
1852
1961
  variants: function, method
1962
+ dispatch:
1963
+ SparseCPU, SparseCUDA: erf_sparse_
1964
+ SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_
1853
1965
 
1854
1966
  - func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
1855
1967
  device_check: NoCheck # TensorIterator
@@ -1857,6 +1969,8 @@
1857
1969
  structured_inherits: TensorIteratorBase
1858
1970
  dispatch:
1859
1971
  CPU, CUDA: erf_out
1972
+ SparseCPU, SparseCUDA: erf_sparse_out
1973
+ SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_out
1860
1974
 
1861
1975
  - func: erfc(Tensor self) -> Tensor
1862
1976
  device_check: NoCheck # TensorIterator
@@ -1910,11 +2024,17 @@
1910
2024
  device_check: NoCheck # TensorIterator
1911
2025
  structured_delegate: expm1.out
1912
2026
  variants: function, method
2027
+ dispatch:
2028
+ SparseCPU, SparseCUDA: expm1_sparse
2029
+ SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
1913
2030
 
1914
2031
  - func: expm1_(Tensor(a!) self) -> Tensor(a!)
1915
2032
  device_check: NoCheck # TensorIterator
1916
2033
  structured_delegate: expm1.out
1917
2034
  variants: function, method
2035
+ dispatch:
2036
+ SparseCPU, SparseCUDA: expm1_sparse_
2037
+ SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_
1918
2038
 
1919
2039
  - func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
1920
2040
  device_check: NoCheck # TensorIterator
@@ -1922,6 +2042,8 @@
1922
2042
  structured_inherits: TensorIteratorBase
1923
2043
  dispatch:
1924
2044
  CPU, CUDA: expm1_out
2045
+ SparseCPU, SparseCUDA: expm1_sparse_out
2046
+ SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_out
1925
2047
 
1926
2048
  - func: expand(Tensor(a) self, int[] size, *, bool implicit=False) -> Tensor(a)
1927
2049
  variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
@@ -1971,14 +2093,16 @@
1971
2093
  device_check: NoCheck # TensorIterator
1972
2094
  variants: function, method
1973
2095
  dispatch:
1974
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: fill_
2096
+ CPU, CUDA: fill_
2097
+ QuantizedCPU, QuantizedCUDA: fill_quantized_
1975
2098
  Meta: fill_meta_
1976
2099
 
1977
2100
  - func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
1978
2101
  device_check: NoCheck # TensorIterator
1979
2102
  variants: function, method
1980
2103
  dispatch:
1981
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: fill_
2104
+ CPU, CUDA: fill_
2105
+ QuantizedCPU, QuantizedCUDA: fill_quantized_
1982
2106
  Meta: fill_meta_
1983
2107
 
1984
2108
  - func: floor(Tensor self) -> Tensor
@@ -1987,6 +2111,8 @@
1987
2111
  variants: function, method
1988
2112
  dispatch:
1989
2113
  CompositeExplicitAutograd: floor
2114
+ SparseCPU, SparseCUDA: floor_sparse
2115
+ SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr
1990
2116
 
1991
2117
  - func: floor_(Tensor(a!) self) -> Tensor(a!)
1992
2118
  device_check: NoCheck # TensorIterator
@@ -1994,6 +2120,8 @@
1994
2120
  variants: function, method
1995
2121
  dispatch:
1996
2122
  CompositeExplicitAutograd: floor_
2123
+ SparseCPU, SparseCUDA: floor_sparse_
2124
+ SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_
1997
2125
 
1998
2126
  - func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
1999
2127
  device_check: NoCheck # TensorIterator
@@ -2001,6 +2129,8 @@
2001
2129
  structured_inherits: TensorIteratorBase
2002
2130
  dispatch:
2003
2131
  CPU, CUDA: floor_out
2132
+ SparseCPU, SparseCUDA: floor_sparse_out
2133
+ SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_out
2004
2134
 
2005
2135
  - func: floor_divide(Tensor self, Tensor other) -> Tensor
2006
2136
  device_check: NoCheck # TensorIterator
@@ -2108,10 +2238,13 @@
2108
2238
 
2109
2239
  - func: grid_sampler_2d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor
2110
2240
  dispatch:
2111
- CPU: grid_sampler_2d_cpu
2241
+ CPU, QuantizedCPU: grid_sampler_2d_cpu
2112
2242
  CUDA: grid_sampler_2d_cuda
2113
2243
 
2114
- - func: grid_sampler_2d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> (Tensor, Tensor)
2244
+ # `grid_sampler_2d_backward` takes in `output_mask` to optimize performance for
2245
+ # the case where `input` doesn't require gradient. Gradient for `grid` is always
2246
+ # computed (only `output_mask[0]` is checked by the implementations).
2247
+ - func: grid_sampler_2d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask) -> (Tensor, Tensor)
2115
2248
  dispatch:
2116
2249
  CPU: grid_sampler_2d_backward_cpu
2117
2250
  CUDA: grid_sampler_2d_backward_cuda
@@ -2229,6 +2362,8 @@
2229
2362
 
2230
2363
  - func: index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor
2231
2364
  variants: function, method
2365
+ dispatch:
2366
+ CompositeExplicitAutograd: index_copy
2232
2367
 
2233
2368
  - func: index_copy_.dimname(Tensor(a!) self, Dimname dim, Tensor index, Tensor source) -> Tensor(a!)
2234
2369
  variants: method
@@ -2250,6 +2385,8 @@
2250
2385
  - func: index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor
2251
2386
  device_check: NoCheck # delegate to _index_put_impl_ after clone, which leverages TensorIterator
2252
2387
  variants: function, method
2388
+ dispatch:
2389
+ CompositeExplicitAutograd: index_put
2253
2390
 
2254
2391
  - func: _index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!)
2255
2392
  device_check: NoCheck # TensorIterator
@@ -2269,12 +2406,6 @@
2269
2406
  dispatch:
2270
2407
  CompositeExplicitAutograd: inverse_out
2271
2408
 
2272
- - func: _inverse_helper(Tensor self) -> Tensor
2273
- variants: function
2274
- dispatch:
2275
- CPU: _inverse_helper_cpu
2276
- CUDA: _inverse_helper_cuda
2277
-
2278
2409
  - func: isclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> Tensor
2279
2410
  variants: function, method
2280
2411
 
@@ -2315,6 +2446,7 @@
2315
2446
  dispatch:
2316
2447
  CPU, CUDA: isnan
2317
2448
  SparseCPU, SparseCUDA: isnan_sparse
2449
+ SparseCsrCPU, SparseCsrCUDA: isnan_sparse_csr
2318
2450
 
2319
2451
  - func: is_distributed(Tensor self) -> bool
2320
2452
  variants: function, method
@@ -2338,6 +2470,11 @@
2338
2470
  device_guard: False
2339
2471
  manual_cpp_binding: True
2340
2472
 
2473
+ - func: _is_zerotensor(Tensor self) -> bool
2474
+ variants: function, method
2475
+ device_guard: False
2476
+ manual_cpp_binding: True
2477
+
2341
2478
  - func: is_neg(Tensor self) -> bool
2342
2479
  variants: function, method
2343
2480
  device_guard: False
@@ -2405,6 +2542,11 @@
2405
2542
  CUDA: layer_norm_cuda
2406
2543
  CompositeImplicitAutograd: math_native_layer_norm
2407
2544
 
2545
+ - func: _native_multi_head_self_attention(Tensor query, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor
2546
+ dispatch:
2547
+ CPU: multi_head_self_attention_cpu
2548
+ CUDA: multi_head_self_attention_cuda
2549
+
2408
2550
  - func: native_layer_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
2409
2551
  dispatch:
2410
2552
  CPU: layer_norm_backward_cpu
@@ -2414,15 +2556,18 @@
2414
2556
  variants: function, method
2415
2557
  dispatch:
2416
2558
  CompositeExplicitAutograd: nan_to_num
2559
+ SparseCPU, SparseCUDA: nan_to_num_sparse
2417
2560
 
2418
2561
  - func: nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!)
2419
2562
  variants: function, method
2420
2563
  dispatch:
2421
2564
  CompositeExplicitAutograd: nan_to_num_
2565
+ SparseCPU, SparseCUDA: nan_to_num_sparse_
2422
2566
 
2423
2567
  - func: nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)
2424
2568
  dispatch:
2425
2569
  CPU, CUDA: nan_to_num_out
2570
+ SparseCPU, SparseCUDA: nan_to_num_sparse_out
2426
2571
 
2427
2572
  - func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
2428
2573
  python_module: nn
@@ -2471,11 +2616,11 @@
2471
2616
 
2472
2617
  - func: ldexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
2473
2618
 
2474
- - func: linspace(Scalar start, Scalar end, int? steps=None, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
2619
+ - func: linspace(Scalar start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
2475
2620
 
2476
- - func: linspace.out(Scalar start, Scalar end, int? steps=None, *, Tensor(a!) out) -> Tensor(a!)
2621
+ - func: linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
2477
2622
  dispatch:
2478
- CPU: linspace_cpu_out
2623
+ CPU, Meta: linspace_out
2479
2624
  CUDA: linspace_cuda_out
2480
2625
 
2481
2626
  - func: log(Tensor self) -> Tensor
@@ -2499,6 +2644,8 @@
2499
2644
  device_check: NoCheck # TensorIterator
2500
2645
  structured_delegate: log10.out
2501
2646
  variants: function, method
2647
+ dispatch:
2648
+ CompositeExplicitAutograd: log10
2502
2649
 
2503
2650
  - func: log10_(Tensor(a!) self) -> Tensor(a!)
2504
2651
  device_check: NoCheck # TensorIterator
@@ -2518,6 +2665,7 @@
2518
2665
  variants: function, method
2519
2666
  dispatch:
2520
2667
  SparseCPU, SparseCUDA: log1p_sparse
2668
+ SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
2521
2669
 
2522
2670
  - func: log1p_(Tensor(a!) self) -> Tensor(a!)
2523
2671
  device_check: NoCheck # TensorIterator
@@ -2525,6 +2673,7 @@
2525
2673
  variants: function, method
2526
2674
  dispatch:
2527
2675
  SparseCPU, SparseCUDA: log1p_sparse_
2676
+ SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_
2528
2677
 
2529
2678
  - func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
2530
2679
  device_check: NoCheck # TensorIterator
@@ -2532,7 +2681,8 @@
2532
2681
  structured_inherits: TensorIteratorBase
2533
2682
  dispatch:
2534
2683
  CPU, CUDA: log1p_out
2535
- SparseCPU, SparseCUDA: log1p_out_sparse
2684
+ SparseCPU, SparseCUDA: log1p_sparse_out
2685
+ SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_out
2536
2686
 
2537
2687
  - func: log2(Tensor self) -> Tensor
2538
2688
  device_check: NoCheck # TensorIterator
@@ -2630,11 +2780,11 @@
2630
2780
  dispatch:
2631
2781
  CompositeExplicitAutograd: logdet
2632
2782
 
2633
- - func: logspace(Scalar start, Scalar end, int? steps=None, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
2783
+ - func: logspace(Scalar start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
2634
2784
 
2635
- - func: logspace.out(Scalar start, Scalar end, int? steps=None, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
2785
+ - func: logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
2636
2786
  dispatch:
2637
- CPU: logspace_cpu_out
2787
+ CPU, Meta: logspace_out
2638
2788
  CUDA: logspace_cuda_out
2639
2789
 
2640
2790
  # log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
@@ -2653,10 +2803,10 @@
2653
2803
  CPU: log_softmax_cpu_out
2654
2804
  CUDA: log_softmax_cuda_out
2655
2805
 
2656
- - func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
2806
+ - func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
2657
2807
  structured_delegate: _log_softmax_backward_data.out
2658
2808
 
2659
- - func: _log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!)
2809
+ - func: _log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) out) -> Tensor(a!)
2660
2810
  structured: True
2661
2811
  dispatch:
2662
2812
  CPU: log_softmax_backward_cpu_out
@@ -2722,11 +2872,11 @@
2722
2872
  # Alias to linalg.matrix_power
2723
2873
  - func: matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)
2724
2874
 
2875
+ # Alias to linalg.matrix_exp
2725
2876
  - func: matrix_exp(Tensor self) -> Tensor
2726
2877
  variants: function, method
2727
- dispatch:
2728
- CPU, CUDA: matrix_exp
2729
2878
 
2879
+ # This function should be deprecated in favor of differential_analytic_matrix_function in FunctionsManual.cpp
2730
2880
  - func: matrix_exp_backward(Tensor self, Tensor grad) -> Tensor
2731
2881
 
2732
2882
  # DEPRECATED: Use torch.aminmax instead
@@ -2760,12 +2910,16 @@
2760
2910
 
2761
2911
  - func: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
2762
2912
  device_check: NoCheck # TensorIterator
2913
+ structured_delegate: max.dim_max
2763
2914
  variants: function, method
2764
2915
  dispatch:
2765
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: max
2916
+ QuantizedCPU, QuantizedCUDA: qmax
2766
2917
 
2767
2918
  - func: max.dim_max(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)
2768
2919
  device_check: NoCheck # TensorIterator
2920
+ structured: True
2921
+ precomputed:
2922
+ - dim -> int dim
2769
2923
  dispatch:
2770
2924
  CPU, CUDA: max_out
2771
2925
 
@@ -2903,12 +3057,16 @@
2903
3057
 
2904
3058
  - func: min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
2905
3059
  device_check: NoCheck # TensorIterator
3060
+ structured_delegate: min.dim_min
2906
3061
  variants: function, method
2907
3062
  dispatch:
2908
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: min
3063
+ QuantizedCPU, QuantizedCUDA: qmin
2909
3064
 
2910
3065
  - func: min.dim_min(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)
2911
3066
  device_check: NoCheck # TensorIterator
3067
+ structured: True
3068
+ precomputed:
3069
+ - dim -> int dim
2912
3070
  dispatch:
2913
3071
  CPU, CUDA: min_out
2914
3072
 
@@ -2932,14 +3090,6 @@
2932
3090
  dispatch:
2933
3091
  CompositeExplicitAutograd: mkldnn_convolution
2934
3092
 
2935
- - func: mkldnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> Tensor
2936
-
2937
- - func: mkldnn_convolution_backward_weights(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> (Tensor, Tensor)
2938
-
2939
- - func: mkldnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
2940
- dispatch:
2941
- CompositeExplicitAutograd: mkldnn_convolution_backward
2942
-
2943
3093
  - func: miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor)
2944
3094
  dispatch:
2945
3095
  CUDA: miopen_batch_norm
@@ -2952,56 +3102,14 @@
2952
3102
  dispatch:
2953
3103
  CUDA: miopen_convolution
2954
3104
 
2955
- - func: miopen_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2956
- dispatch:
2957
- CUDA: miopen_convolution_backward_input
2958
-
2959
- - func: miopen_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
2960
- dispatch:
2961
- CUDA: miopen_convolution_backward
2962
-
2963
- - func: miopen_convolution_backward_bias(Tensor grad_output) -> Tensor
2964
- dispatch:
2965
- CUDA: miopen_convolution_backward_bias
2966
-
2967
- - func: miopen_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2968
- dispatch:
2969
- CUDA: miopen_convolution_backward_weight
2970
-
2971
3105
  - func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2972
3106
  dispatch:
2973
3107
  CUDA: miopen_convolution_transpose
2974
3108
 
2975
- # NB: output_padding not strictly needed here, but it's helpful for the float
2976
- # backwards
2977
- - func: miopen_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
2978
- dispatch:
2979
- CUDA: miopen_convolution_transpose_backward
2980
-
2981
- - func: miopen_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2982
- dispatch:
2983
- CUDA: miopen_convolution_transpose_backward_input
2984
-
2985
- - func: miopen_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2986
- dispatch:
2987
- CUDA: miopen_convolution_transpose_backward_weight
2988
-
2989
3109
  - func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2990
3110
  dispatch:
2991
3111
  CUDA: miopen_depthwise_convolution
2992
3112
 
2993
- - func: miopen_depthwise_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
2994
- dispatch:
2995
- CUDA: miopen_depthwise_convolution_backward_input
2996
-
2997
- - func: miopen_depthwise_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
2998
- dispatch:
2999
- CUDA: miopen_depthwise_convolution_backward
3000
-
3001
- - func: miopen_depthwise_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
3002
- dispatch:
3003
- CUDA: miopen_depthwise_convolution_backward_weight
3004
-
3005
3113
  - func: miopen_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
3006
3114
  dispatch:
3007
3115
  CUDA: miopen_rnn
@@ -3014,7 +3122,8 @@
3014
3122
  structured_delegate: mm.out
3015
3123
  variants: function, method
3016
3124
  dispatch:
3017
- SparseCPU, SparseCUDA, SparseCsrCPU, SparseCsrCUDA: _sparse_mm
3125
+ SparseCPU, SparseCUDA: _sparse_mm
3126
+ SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm
3018
3127
 
3019
3128
  - func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
3020
3129
  structured: True
@@ -3057,6 +3166,7 @@
3057
3166
  dispatch:
3058
3167
  SparseCPU, SparseCUDA: mul_sparse
3059
3168
  MkldnnCPU: mkldnn_mul
3169
+ ZeroTensor: mul_zerotensor
3060
3170
 
3061
3171
  - func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
3062
3172
  device_check: NoCheck # TensorIterator
@@ -3107,8 +3217,8 @@
3107
3217
  - func: mv(Tensor self, Tensor vec) -> Tensor
3108
3218
  variants: function, method
3109
3219
  dispatch:
3110
- CPU, CUDA: mv
3111
- SparseCPU, SparseCUDA, SparseCsrCPU, SparseCsrCUDA: mv_sparse
3220
+ CompositeExplicitAutograd: mv
3221
+ SparseCPU, SparseCUDA: mv_sparse
3112
3222
 
3113
3223
  - func: mv.out(Tensor self, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)
3114
3224
  dispatch:
@@ -3210,15 +3320,6 @@
3210
3320
  dispatch:
3211
3321
  CompositeExplicitAutograd: _nnpack_spatial_convolution
3212
3322
 
3213
- - func: _nnpack_spatial_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[2] padding, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
3214
- variants: function
3215
-
3216
- - func: _nnpack_spatial_convolution_backward_input(Tensor input, Tensor grad_output, Tensor weight, int[2] padding) -> Tensor
3217
- variants: function
3218
-
3219
- - func: _nnpack_spatial_convolution_backward_weight(Tensor input, int[] weightsize, Tensor grad_output, int[2] padding) -> Tensor
3220
- variants: function
3221
-
3222
3323
  - func: ones.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
3223
3324
  device_check: NoCheck
3224
3325
  device_guard: False
@@ -3286,6 +3387,21 @@
3286
3387
  - func: numpy_T(Tensor(a) self) -> Tensor(a)
3287
3388
  variants: method
3288
3389
 
3390
+ # Exposed on Python as an attribute 'H'
3391
+ - func: matrix_H(Tensor(a) self) -> Tensor(a)
3392
+ variants: method
3393
+
3394
+ # Exposed on Python as an attribute 'mT'
3395
+ - func: mT(Tensor(a) self) -> Tensor(a)
3396
+ variants: method
3397
+
3398
+ # Exposed on Python as an attribute 'mH'
3399
+ - func: mH(Tensor(a) self) -> Tensor(a)
3400
+ variants: method
3401
+
3402
+ - func: adjoint(Tensor(a) self) -> Tensor(a)
3403
+ variants: function, method
3404
+
3289
3405
  - func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
3290
3406
 
3291
3407
  - func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
@@ -3295,6 +3411,11 @@
3295
3411
  CPU: channel_shuffle
3296
3412
  QuantizedCPU: channel_shuffle_quantized_cpu
3297
3413
 
3414
+ - func: native_channel_shuffle(Tensor self, int groups) -> Tensor
3415
+ dispatch:
3416
+ CPU: channel_shuffle_cpu
3417
+ CompositeImplicitAutograd: math_channel_shuffle
3418
+
3298
3419
  - func: is_pinned(Tensor self, Device? device=None) -> bool
3299
3420
  variants: method
3300
3421
  dispatch:
@@ -3321,15 +3442,18 @@
3321
3442
  variants: function, method
3322
3443
  dispatch:
3323
3444
  CompositeExplicitAutograd: rad2deg
3445
+ SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr
3324
3446
 
3325
3447
  - func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
3326
3448
  variants: function, method
3327
3449
  dispatch:
3328
3450
  CompositeExplicitAutograd: rad2deg_
3451
+ SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_
3329
3452
 
3330
3453
  - func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
3331
3454
  dispatch:
3332
3455
  CompositeExplicitAutograd: rad2deg_out
3456
+ SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_out
3333
3457
 
3334
3458
  - func: deg2rad(Tensor self) -> Tensor
3335
3459
  variants: function, method
@@ -3420,7 +3544,7 @@
3420
3544
 
3421
3545
  - func: range.out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
3422
3546
  dispatch:
3423
- CPU: range_cpu_out
3547
+ CPU, Meta: range_out
3424
3548
  CUDA: range_cuda_out
3425
3549
 
3426
3550
  - func: ravel(Tensor(a) self) -> Tensor(a)
@@ -3449,6 +3573,7 @@
3449
3573
  variants: function, method
3450
3574
  dispatch:
3451
3575
  SparseCPU, SparseCUDA: neg_sparse
3576
+ SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr
3452
3577
 
3453
3578
  - func: neg_(Tensor(a!) self) -> Tensor(a!)
3454
3579
  device_check: NoCheck # TensorIterator
@@ -3456,6 +3581,7 @@
3456
3581
  variants: function, method
3457
3582
  dispatch:
3458
3583
  SparseCPU, SparseCUDA: neg_sparse_
3584
+ SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_
3459
3585
 
3460
3586
  - func: neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
3461
3587
  device_check: NoCheck # TensorIterator
@@ -3464,6 +3590,7 @@
3464
3590
  dispatch:
3465
3591
  CPU, CUDA: neg_out
3466
3592
  SparseCPU, SparseCUDA: neg_out_sparse
3593
+ SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_out
3467
3594
 
3468
3595
  # Alias for neg
3469
3596
  - func: negative(Tensor self) -> Tensor
@@ -3504,7 +3631,7 @@
3504
3631
  device_check: NoCheck
3505
3632
  device_guard: False
3506
3633
  dispatch:
3507
- CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: _reshape_alias
3634
+ CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor: _reshape_alias
3508
3635
  # We don't need to support mkldnn since this is handled explicitly by the reshape operator.
3509
3636
 
3510
3637
  - func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
@@ -3522,11 +3649,17 @@
3522
3649
  device_check: NoCheck # TensorIterator
3523
3650
  structured_delegate: round.out
3524
3651
  variants: function, method
3652
+ dispatch:
3653
+ SparseCPU, SparseCUDA: round_sparse
3654
+ SparseCsrCPU, SparseCsrCUDA: round_sparse_csr
3525
3655
 
3526
3656
  - func: round_(Tensor(a!) self) -> Tensor(a!)
3527
3657
  device_check: NoCheck # TensorIterator
3528
3658
  structured_delegate: round.out
3529
3659
  variants: function, method
3660
+ dispatch:
3661
+ SparseCPU, SparseCUDA: round_sparse_
3662
+ SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_
3530
3663
 
3531
3664
  - func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
3532
3665
  device_check: NoCheck # TensorIterator
@@ -3535,6 +3668,26 @@
3535
3668
  dispatch:
3536
3669
  CPU: round_out
3537
3670
  CUDA: round_out
3671
+ SparseCPU, SparseCUDA: round_sparse_out
3672
+ SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_out
3673
+
3674
+ - func: round.decimals(Tensor self, *, int decimals) -> Tensor
3675
+ device_check: NoCheck # TensorIterator
3676
+ structured_delegate: round.decimals_out
3677
+ variants: function, method
3678
+
3679
+ - func: round_.decimals(Tensor(a!) self, *, int decimals) -> Tensor(a!)
3680
+ device_check: NoCheck # TensorIterator
3681
+ structured_delegate: round.decimals_out
3682
+ variants: function, method
3683
+
3684
+ - func: round.decimals_out(Tensor self, *, int decimals, Tensor(a!) out) -> Tensor(a!)
3685
+ device_check: NoCheck # TensorIterator
3686
+ structured: True
3687
+ structured_inherits: TensorIteratorBase
3688
+ dispatch:
3689
+ CPU: round_decimals_out
3690
+ CUDA: round_decimals_out
3538
3691
 
3539
3692
  - func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
3540
3693
  device_check: NoCheck # TensorIterator
@@ -3591,6 +3744,7 @@
3591
3744
  python_module: nn
3592
3745
  dispatch:
3593
3746
  MkldnnCPU: mkldnn_gelu
3747
+ QuantizedCPU: gelu_quantized_cpu
3594
3748
 
3595
3749
  - func: gelu_backward.grad_input(Tensor grad, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
3596
3750
  structured: True
@@ -3783,11 +3937,17 @@
3783
3937
  device_check: NoCheck # TensorIterator
3784
3938
  structured_delegate: sin.out
3785
3939
  variants: function, method
3940
+ dispatch:
3941
+ SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
3942
+ SparseCPU, SparseCUDA: sin_sparse
3786
3943
 
3787
3944
  - func: sin_(Tensor(a!) self) -> Tensor(a!)
3788
3945
  device_check: NoCheck # TensorIterator
3789
3946
  structured_delegate: sin.out
3790
3947
  variants: function, method
3948
+ dispatch:
3949
+ SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_
3950
+ SparseCPU, SparseCUDA: sin_sparse_
3791
3951
 
3792
3952
  - func: sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
3793
3953
  device_check: NoCheck # TensorIterator
@@ -3795,6 +3955,8 @@
3795
3955
  structured_inherits: TensorIteratorBase
3796
3956
  dispatch:
3797
3957
  CPU, CUDA: sin_out
3958
+ SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_out
3959
+ SparseCPU, SparseCUDA: sin_sparse_out
3798
3960
 
3799
3961
  - func: sinc(Tensor self) -> Tensor
3800
3962
  structured_delegate: sinc.out
@@ -3814,11 +3976,17 @@
3814
3976
  device_check: NoCheck # TensorIterator
3815
3977
  structured_delegate: sinh.out
3816
3978
  variants: function, method
3979
+ dispatch:
3980
+ SparseCPU, SparseCUDA: sinh_sparse
3981
+ SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr
3817
3982
 
3818
3983
  - func: sinh_(Tensor(a!) self) -> Tensor(a!)
3819
3984
  device_check: NoCheck # TensorIterator
3820
3985
  structured_delegate: sinh.out
3821
3986
  variants: function, method
3987
+ dispatch:
3988
+ SparseCPU, SparseCUDA: sinh_sparse_
3989
+ SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_
3822
3990
 
3823
3991
  - func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
3824
3992
  device_check: NoCheck # TensorIterator
@@ -3826,6 +3994,8 @@
3826
3994
  structured_inherits: TensorIteratorBase
3827
3995
  dispatch:
3828
3996
  CPU, CUDA: sinh_out
3997
+ SparseCPU, SparseCUDA: sinh_sparse_out
3998
+ SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_out
3829
3999
 
3830
4000
  # Returns a copy of this `Variable` that is detached from its autograd graph.
3831
4001
  # This method is OK to call if the `Variable` is a view.
@@ -3848,6 +4018,7 @@
3848
4018
  # this. If this `Variable` is a view, throws an `std::runtime_error()`.
3849
4019
  - func: detach_(Tensor(a!) self) -> Tensor(a!)
3850
4020
  variants: function, method
4021
+ tags: inplace_view
3851
4022
  dispatch:
3852
4023
  CompositeExplicitAutograd: detach_
3853
4024
 
@@ -3876,6 +4047,27 @@
3876
4047
  dispatch:
3877
4048
  CompositeExplicitAutograd: slice_backward
3878
4049
 
4050
+ - func: slice_scatter(Tensor self, Tensor src, int dim=0, int? start=None, int? end=None, int step=1) -> Tensor
4051
+ variants: function, method
4052
+ device_check: NoCheck
4053
+ device_guard: False
4054
+ dispatch:
4055
+ CompositeExplicitAutograd: slice_scatter
4056
+
4057
+ - func: select_scatter(Tensor self, Tensor src, int dim, int index) -> Tensor
4058
+ variants: function, method
4059
+ device_check: NoCheck
4060
+ device_guard: False
4061
+ dispatch:
4062
+ CompositeExplicitAutograd: select_scatter
4063
+
4064
+ - func: diagonal_scatter(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1) -> Tensor
4065
+ variants: function, method
4066
+ device_check: NoCheck
4067
+ device_guard: False
4068
+ dispatch:
4069
+ CompositeExplicitAutograd: diagonal_scatter
4070
+
3879
4071
  - func: slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)
3880
4072
  variants: function, method
3881
4073
  dispatch:
@@ -3902,10 +4094,10 @@
3902
4094
  CPU: softmax_cpu_out
3903
4095
  CUDA: softmax_cuda_out
3904
4096
 
3905
- - func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
4097
+ - func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
3906
4098
  structured_delegate: _softmax_backward_data.out
3907
4099
 
3908
- - func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
4100
+ - func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!)
3909
4101
  structured: True
3910
4102
  dispatch:
3911
4103
  CPU: softmax_backward_cpu_out
@@ -3918,7 +4110,7 @@
3918
4110
  dispatch:
3919
4111
  CompositeExplicitAutograd: unsafe_split
3920
4112
 
3921
- - func: split.Tensor(Tensor(a) self, int split_size, int dim=0) -> Tensor(a)[]
4113
+ - func: split.Tensor(Tensor(a -> *) self, int split_size, int dim=0) -> Tensor(a)[]
3922
4114
  variants: function, method
3923
4115
  device_check: NoCheck
3924
4116
  device_guard: False
@@ -3932,29 +4124,29 @@
3932
4124
  dispatch:
3933
4125
  CompositeExplicitAutograd: unsafe_split_with_sizes
3934
4126
 
3935
- - func: split_with_sizes(Tensor(a) self, int[] split_sizes, int dim=0) -> Tensor(a)[]
4127
+ - func: split_with_sizes(Tensor(a -> *) self, int[] split_sizes, int dim=0) -> Tensor(a)[]
3936
4128
  variants: function, method
3937
4129
  device_check: NoCheck
3938
4130
  device_guard: False
3939
4131
  dispatch:
3940
4132
  CompositeExplicitAutograd: split_with_sizes
3941
4133
 
3942
- - func: hsplit.int(Tensor(a) self, int sections) -> Tensor(a)[]
4134
+ - func: hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
3943
4135
  variants: function, method
3944
4136
 
3945
- - func: hsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[]
4137
+ - func: hsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]
3946
4138
  variants: function, method
3947
4139
 
3948
- - func: vsplit.int(Tensor(a) self, int sections) -> Tensor(a)[]
4140
+ - func: vsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
3949
4141
  variants: function, method
3950
4142
 
3951
- - func: vsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[]
4143
+ - func: vsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]
3952
4144
  variants: function, method
3953
4145
 
3954
- - func: dsplit.int(Tensor(a) self, int sections) -> Tensor(a)[]
4146
+ - func: dsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
3955
4147
  variants: function, method
3956
4148
 
3957
- - func: dsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[]
4149
+ - func: dsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]
3958
4150
  variants: function, method
3959
4151
 
3960
4152
  - func: squeeze(Tensor(a) self) -> Tensor(a)
@@ -3962,14 +4154,16 @@
3962
4154
  device_check: NoCheck
3963
4155
  device_guard: False
3964
4156
  dispatch:
3965
- CompositeExplicitAutograd: squeeze
4157
+ CPU, CUDA: squeeze
4158
+ QuantizedCPU, QuantizedCUDA: squeeze_quantized
3966
4159
 
3967
4160
  - func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
3968
4161
  variants: function, method
3969
4162
  device_check: NoCheck
3970
4163
  device_guard: False
3971
4164
  dispatch:
3972
- CompositeExplicitAutograd: squeeze
4165
+ CPU, CUDA: squeeze
4166
+ QuantizedCPU, QuantizedCUDA: squeeze_quantized
3973
4167
 
3974
4168
  - func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
3975
4169
  variants: function, method
@@ -3980,6 +4174,7 @@
3980
4174
  variants: method
3981
4175
  device_check: NoCheck
3982
4176
  device_guard: False
4177
+ tags: inplace_view
3983
4178
  dispatch:
3984
4179
  CompositeExplicitAutograd: squeeze_
3985
4180
 
@@ -3987,6 +4182,7 @@
3987
4182
  variants: method
3988
4183
  device_check: NoCheck
3989
4184
  device_guard: False
4185
+ tags: inplace_view
3990
4186
  dispatch:
3991
4187
  CompositeExplicitAutograd: squeeze_
3992
4188
 
@@ -3994,6 +4190,7 @@
3994
4190
  variants: method
3995
4191
  device_check: NoCheck
3996
4192
  device_guard: False
4193
+ tags: inplace_view
3997
4194
 
3998
4195
  - func: sspaddmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
3999
4196
  variants: function, method
@@ -4105,11 +4302,15 @@
4105
4302
  variants: function, method
4106
4303
  dispatch:
4107
4304
  SparseCPU, SparseCUDA: sqrt_sparse
4305
+ SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr
4108
4306
 
4109
4307
  - func: sqrt_(Tensor(a!) self) -> Tensor(a!)
4110
4308
  device_check: NoCheck # TensorIterator
4111
4309
  structured_delegate: sqrt.out
4112
4310
  variants: function, method
4311
+ dispatch:
4312
+ SparseCPU, SparseCUDA: sqrt_sparse_
4313
+ SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_
4113
4314
 
4114
4315
  - func: sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4115
4316
  device_check: NoCheck # TensorIterator
@@ -4117,7 +4318,8 @@
4117
4318
  structured_inherits: TensorIteratorBase
4118
4319
  dispatch:
4119
4320
  CPU, CUDA: sqrt_out
4120
- SparseCPU, SparseCUDA: sqrt_out_sparse
4321
+ SparseCPU, SparseCUDA: sqrt_sparse_out
4322
+ SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_out
4121
4323
 
4122
4324
  - func: square(Tensor self) -> Tensor
4123
4325
  device_check: NoCheck # TensorIterator
@@ -4225,6 +4427,7 @@
4225
4427
  device_check: NoCheck
4226
4428
  device_guard: False
4227
4429
  variants: method
4430
+ tags: inplace_view
4228
4431
  dispatch:
4229
4432
  CompositeExplicitAutograd: t_
4230
4433
 
@@ -4232,11 +4435,17 @@
4232
4435
  device_check: NoCheck # TensorIterator
4233
4436
  structured_delegate: tan.out
4234
4437
  variants: function, method
4438
+ dispatch:
4439
+ SparseCPU, SparseCUDA: tan_sparse
4440
+ SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr
4235
4441
 
4236
4442
  - func: tan_(Tensor(a!) self) -> Tensor(a!)
4237
4443
  device_check: NoCheck # TensorIterator
4238
4444
  structured_delegate: tan.out
4239
4445
  variants: function, method
4446
+ dispatch:
4447
+ SparseCPU, SparseCUDA: tan_sparse_
4448
+ SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_
4240
4449
 
4241
4450
  - func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4242
4451
  device_check: NoCheck # TensorIterator
@@ -4244,6 +4453,8 @@
4244
4453
  structured_inherits: TensorIteratorBase
4245
4454
  dispatch:
4246
4455
  CPU, CUDA: tan_out
4456
+ SparseCPU, SparseCUDA: tan_sparse_out
4457
+ SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_out
4247
4458
 
4248
4459
  - func: tanh(Tensor self) -> Tensor
4249
4460
  device_check: NoCheck # TensorIterator
@@ -4252,6 +4463,8 @@
4252
4463
  dispatch:
4253
4464
  QuantizedCPU: tanh_quantized_cpu
4254
4465
  MkldnnCPU: mkldnn_tanh
4466
+ SparseCPU, SparseCUDA: tanh_sparse
4467
+ SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr
4255
4468
 
4256
4469
  - func: tanh_(Tensor(a!) self) -> Tensor(a!)
4257
4470
  device_check: NoCheck # TensorIterator
@@ -4259,12 +4472,17 @@
4259
4472
  variants: function, method
4260
4473
  dispatch:
4261
4474
  MkldnnCPU: mkldnn_tanh_
4475
+ SparseCPU, SparseCUDA: tanh_sparse_
4476
+ SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_
4477
+
4262
4478
  - func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4263
4479
  device_check: NoCheck # TensorIterator
4264
4480
  structured: True
4265
4481
  structured_inherits: TensorIteratorBase
4266
4482
  dispatch:
4267
4483
  CPU, CUDA: tanh_out
4484
+ SparseCPU, SparseCUDA: tanh_sparse_out
4485
+ SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_out
4268
4486
 
4269
4487
  - func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor
4270
4488
  variants: function
@@ -4331,6 +4549,7 @@
4331
4549
  variants: method
4332
4550
  device_check: NoCheck
4333
4551
  device_guard: False
4552
+ tags: inplace_view
4334
4553
  dispatch:
4335
4554
  CompositeExplicitAutograd: transpose_
4336
4555
 
@@ -4388,6 +4607,8 @@
4388
4607
  variants: function, method
4389
4608
  dispatch:
4390
4609
  CompositeExplicitAutograd: trunc
4610
+ SparseCPU, SparseCUDA: trunc_sparse
4611
+ SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
4391
4612
 
4392
4613
  - func: trunc_(Tensor(a!) self) -> Tensor(a!)
4393
4614
  structured_delegate: trunc.out
@@ -4395,6 +4616,8 @@
4395
4616
  variants: function, method
4396
4617
  dispatch:
4397
4618
  CompositeExplicitAutograd: trunc_
4619
+ SparseCPU, SparseCUDA: trunc_sparse_
4620
+ SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_
4398
4621
 
4399
4622
  - func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4400
4623
  structured: True
@@ -4402,6 +4625,8 @@
4402
4625
  device_check: NoCheck # TensorIterator
4403
4626
  dispatch:
4404
4627
  CPU, CUDA: trunc_out
4628
+ SparseCPU, SparseCUDA: trunc_sparse_out
4629
+ SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_out
4405
4630
 
4406
4631
  # Alias for trunc
4407
4632
  - func: fix(Tensor self) -> Tensor
@@ -4461,12 +4686,15 @@
4461
4686
  device_check: NoCheck
4462
4687
  device_guard: False
4463
4688
  dispatch:
4464
- CompositeExplicitAutograd: unsqueeze
4689
+ CPU, CUDA: unsqueeze
4690
+ SparseCPU, SparseCUDA: unsqueeze_sparse
4691
+ QuantizedCPU, QuantizedCUDA: unsqueeze_quantized
4465
4692
 
4466
4693
  - func: unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)
4467
4694
  variants: method
4468
4695
  device_check: NoCheck
4469
4696
  device_guard: False
4697
+ tags: inplace_view
4470
4698
  dispatch:
4471
4699
  CompositeExplicitAutograd: unsqueeze_
4472
4700
 
@@ -4586,6 +4814,11 @@
4586
4814
  device_check: NoCheck
4587
4815
  device_guard: False
4588
4816
 
4817
+ - func: _efficientzerotensor(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
4818
+ dispatch:
4819
+ CPU: _efficientzerotensor
4820
+ CUDA: _efficientzerotensor_cuda
4821
+
4589
4822
  - func: zeros(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
4590
4823
 
4591
4824
  - func: zeros.out(int[] size, *, Tensor(a!) out) -> Tensor(a!)
@@ -4655,12 +4888,15 @@
4655
4888
  SparseCUDA: _sparse_sum_backward_cuda
4656
4889
 
4657
4890
  - func: _sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
4891
+ python_module: sparse
4658
4892
  variants: function
4659
4893
 
4660
4894
  - func: _sparse_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
4895
+ python_module: sparse
4661
4896
  variants: function
4662
4897
 
4663
4898
  - func: _sparse_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
4899
+ python_module: sparse
4664
4900
  dispatch:
4665
4901
  SparseCPU: softmax_sparse_cpu
4666
4902
  SparseCUDA: softmax_sparse_cuda
@@ -4671,12 +4907,15 @@
4671
4907
  SparseCUDA: softmax_backward_sparse_cuda
4672
4908
 
4673
4909
  - func: _sparse_log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
4910
+ python_module: sparse
4674
4911
  variants: function
4675
4912
 
4676
4913
  - func: _sparse_log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
4914
+ python_module: sparse
4677
4915
  variants: function
4678
4916
 
4679
4917
  - func: _sparse_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
4918
+ python_module: sparse
4680
4919
  dispatch:
4681
4920
  SparseCPU: log_softmax_sparse_cpu
4682
4921
  SparseCUDA: log_softmax_sparse_cuda
@@ -4774,6 +5013,7 @@
4774
5013
  dispatch:
4775
5014
  CompositeExplicitAutograd: clone
4776
5015
  SparseCPU, SparseCUDA: clone_sparse
5016
+ SparseCsrCPU, SparseCsrCUDA: clone_sparse_csr
4777
5017
  MkldnnCPU: mkldnn_clone
4778
5018
  QuantizedCPU, QuantizedCUDA: quantized_clone
4779
5019
 
@@ -4886,9 +5126,20 @@
4886
5126
  # Functionally the same as addmm, but we give it a different derivative formula
4887
5127
  # that doesn't propagate gradients to non-present entries on sparse.
4888
5128
  - func: _sparse_addmm(Tensor self, Tensor sparse, Tensor dense, *, Scalar beta=1, Scalar alpha=1) -> Tensor
5129
+ python_module: sparse
4889
5130
  dispatch:
4890
5131
  CompositeExplicitAutograd: _sparse_addmm
4891
5132
 
5133
+ - func: sparse_sampled_addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
5134
+ python_module: sparse
5135
+ dispatch:
5136
+ SparseCsrCUDA: sparse_sampled_addmm_out_sparse_csr_cuda
5137
+
5138
+ - func: sparse_sampled_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
5139
+ python_module: sparse
5140
+ dispatch:
5141
+ SparseCsrCUDA: sparse_sampled_addmm_sparse_csr_cuda
5142
+
4892
5143
  - func: addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
4893
5144
  structured: True
4894
5145
  dispatch:
@@ -4896,8 +5147,8 @@
4896
5147
  CUDA: addmm_out_cuda
4897
5148
  SparseCPU: addmm_out_sparse_dense_cpu
4898
5149
  SparseCUDA: addmm_out_sparse_dense_cuda
4899
- SparseCsrCPU: addmm_out_sparse_csr_dense_cpu
4900
- SparseCsrCUDA: addmm_out_sparse_csr_dense_cuda
5150
+ SparseCsrCPU: addmm_out_sparse_csr_cpu
5151
+ SparseCsrCUDA: addmm_out_sparse_csr_cuda
4901
5152
 
4902
5153
  - func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
4903
5154
  structured_delegate: addmm.out
@@ -5209,12 +5460,12 @@
5209
5460
  dispatch:
5210
5461
  SparseCPU, SparseCUDA: copy_sparse_
5211
5462
 
5212
- - func: unbind.int(Tensor(a) self, int dim=0) -> Tensor(a)[]
5463
+ - func: unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[]
5213
5464
  variants: function, method
5214
5465
  dispatch:
5215
5466
  CompositeExplicitAutograd: unbind
5216
5467
 
5217
- - func: unbind.Dimname(Tensor(a) self, Dimname dim) -> Tensor(a)[]
5468
+ - func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
5218
5469
  variants: function, method
5219
5470
 
5220
5471
  - func: to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
@@ -5246,6 +5497,11 @@
5246
5497
 
5247
5498
  - func: to_mkldnn_backward(Tensor grad, Tensor input) -> Tensor
5248
5499
 
5500
+ - func: quantize_per_tensor_dynamic(Tensor self, ScalarType dtype, bool reduce_range) -> Tensor
5501
+ variants: function
5502
+ dispatch:
5503
+ CPU, CUDA: quantize_per_tensor_dynamic
5504
+
5249
5505
  - func: quantize_per_tensor(Tensor self, float scale, int zero_point, ScalarType dtype) -> Tensor
5250
5506
  variants: function
5251
5507
  dispatch:
@@ -5269,7 +5525,7 @@
5269
5525
  - func: dequantize.self(Tensor self) -> Tensor
5270
5526
  variants: function, method
5271
5527
  dispatch:
5272
- CPU: dequantize_cpu
5528
+ CPU, CUDA: dequantize_cpu_or_cuda
5273
5529
  QuantizedCPU, QuantizedCUDA: dequantize_quantized
5274
5530
 
5275
5531
  - func: dequantize.tensors(Tensor[] tensors) -> Tensor[]
@@ -5391,6 +5647,14 @@
5391
5647
  - func: choose_qparams_optimized(Tensor input, int numel, int n_bins, float ratio, int bit_width) -> (Tensor, Tensor)
5392
5648
  variants: function
5393
5649
 
5650
+ - func: _autocast_to_reduced_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled, ScalarType cuda_dtype, ScalarType cpu_dtype) -> Tensor(a)
5651
+ variants: method
5652
+ device_guard: False
5653
+
5654
+ - func: _autocast_to_full_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled) -> Tensor(a)
5655
+ variants: method
5656
+ device_guard: False
5657
+
5394
5658
  - func: _to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, MemoryFormat? memory_format=None) -> Tensor
5395
5659
  device_check: NoCheck
5396
5660
  device_guard: False
@@ -5589,6 +5853,8 @@
5589
5853
  - func: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor
5590
5854
  device_check: NoCheck # TensorIterator
5591
5855
  variants: function, method
5856
+ dispatch:
5857
+ CompositeExplicitAutograd: masked_fill
5592
5858
 
5593
5859
  - func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
5594
5860
  device_check: NoCheck # TensorIterator
@@ -5600,6 +5866,8 @@
5600
5866
  - func: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor
5601
5867
  device_check: NoCheck # TensorIterator
5602
5868
  variants: function, method
5869
+ dispatch:
5870
+ CompositeExplicitAutograd: masked_fill
5603
5871
 
5604
5872
  - func: masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!)
5605
5873
  variants: method
@@ -5609,13 +5877,20 @@
5609
5877
 
5610
5878
  - func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor
5611
5879
  variants: function, method
5880
+ dispatch:
5881
+ CompositeExplicitAutograd: masked_scatter
5882
+
5883
+ - func: _masked_softmax(Tensor self, Tensor mask) -> Tensor
5884
+ dispatch:
5885
+ CUDA: masked_softmax_cuda
5886
+ CPU: masked_softmax_cpu
5612
5887
 
5613
5888
  - func: view(Tensor(a) self, int[] size) -> Tensor(a)
5614
5889
  variants: method
5615
5890
  device_check: NoCheck
5616
5891
  device_guard: False
5617
5892
  dispatch:
5618
- CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view
5893
+ ZeroTensor, CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view
5619
5894
  MkldnnCPU: mkldnn_view
5620
5895
 
5621
5896
  # Warning: If you want to change the name or overload name of this
@@ -5639,19 +5914,21 @@
5639
5914
  - func: put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor
5640
5915
  variants: function, method
5641
5916
 
5642
- - func: index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
5643
- variants: method
5644
-
5645
- - func: index_add_.alpha(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha) -> Tensor(a!)
5646
- variants: method
5917
+ - func: index_add.out(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
5918
+ structured: True
5919
+ variants: function
5920
+ precomputed:
5921
+ - dim -> int dim
5647
5922
  dispatch:
5648
- CPU: index_add_cpu_
5649
- CUDA: index_add_cuda_
5923
+ CPU: index_add_cpu_out
5924
+ CUDA: index_add_cuda_out
5650
5925
 
5651
- - func: index_add(Tensor self, int dim, Tensor index, Tensor source) -> Tensor
5652
- variants: function, method
5926
+ - func: index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor(a!)
5927
+ structured_delegate: index_add.out
5928
+ variants: method
5653
5929
 
5654
- - func: index_add.alpha(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha) -> Tensor
5930
+ - func: index_add(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor
5931
+ structured_delegate: index_add.out
5655
5932
  variants: function, method
5656
5933
 
5657
5934
  - func: index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor
@@ -5667,6 +5944,8 @@
5667
5944
  - func: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
5668
5945
  device_check: NoCheck # TensorIterator
5669
5946
  variants: function, method
5947
+ dispatch:
5948
+ CompositeExplicitAutograd: index_fill
5670
5949
 
5671
5950
  - func: index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!)
5672
5951
  device_check: NoCheck # TensorIterator
@@ -5677,6 +5956,8 @@
5677
5956
  - func: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor
5678
5957
  device_check: NoCheck # TensorIterator
5679
5958
  variants: function, method
5959
+ dispatch:
5960
+ CompositeExplicitAutograd: index_fill
5680
5961
 
5681
5962
  - func: index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!)
5682
5963
  device_check: NoCheck # TensorIterator
@@ -5773,6 +6054,11 @@
5773
6054
  - func: scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor
5774
6055
  variants: function, method
5775
6056
 
6057
+ - func: scatter_reduce.two(Tensor self, int dim, Tensor index, str reduce, *, int? output_size=None) -> Tensor
6058
+ variants: function, method
6059
+ dispatch:
6060
+ CPU: scatter_reduce_two_cpu
6061
+
5776
6062
  - func: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
5777
6063
  structured_delegate: eq.Scalar_out
5778
6064
  device_check: NoCheck # TensorIterator
@@ -6064,16 +6350,12 @@
6064
6350
  CPU, CUDA: bitwise_right_shift
6065
6351
 
6066
6352
  - func: tril_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
6353
+ structured_delegate: tril.out
6067
6354
  variants: method
6068
- dispatch:
6069
- CPU: tril_cpu_
6070
- CUDA: tril_cuda_
6071
6355
 
6072
6356
  - func: triu_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
6357
+ structured_delegate: triu.out
6073
6358
  variants: method
6074
- dispatch:
6075
- CPU: triu_cpu_
6076
- CUDA: triu_cuda_
6077
6359
 
6078
6360
  - func: digamma_(Tensor(a!) self) -> Tensor(a!)
6079
6361
  device_check: NoCheck # TensorIterator
@@ -6083,16 +6365,12 @@
6083
6365
  - func: lerp_.Scalar(Tensor(a!) self, Tensor end, Scalar weight) -> Tensor(a!)
6084
6366
  device_check: NoCheck # TensorIterator
6085
6367
  variants: method
6086
- dispatch:
6087
- CPU: lerp_cpu_scalar_
6088
- CUDA: lerp_cuda_scalar_
6368
+ structured_delegate: lerp.Scalar_out
6089
6369
 
6090
6370
  - func: lerp_.Tensor(Tensor(a!) self, Tensor end, Tensor weight) -> Tensor(a!)
6091
6371
  device_check: NoCheck # TensorIterator
6092
6372
  variants: method
6093
- dispatch:
6094
- CPU: lerp_cpu_tensor_
6095
- CUDA: lerp_cuda_tensor_
6373
+ structured_delegate: lerp.Tensor_out
6096
6374
 
6097
6375
  - func: addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
6098
6376
  variants: method
@@ -6178,33 +6456,29 @@
6178
6456
  device_guard: False
6179
6457
 
6180
6458
  - func: cross.out(Tensor self, Tensor other, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)
6181
- dispatch:
6182
- CPU, CUDA: cross_out
6183
6459
 
6184
6460
  - func: cross(Tensor self, Tensor other, int? dim=None) -> Tensor
6185
6461
  variants: method, function
6186
- dispatch:
6187
- CPU, CUDA: cross
6188
6462
 
6189
6463
  - func: triu.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
6464
+ structured: True
6190
6465
  dispatch:
6191
- CPU: triu_cpu_out
6192
- CUDA: triu_cuda_out
6466
+ CPU: triu_cpu
6467
+ CUDA: triu_cuda
6193
6468
 
6194
6469
  - func: triu(Tensor self, int diagonal=0) -> Tensor
6470
+ structured_delegate: triu.out
6195
6471
  variants: method, function
6196
- dispatch:
6197
- CompositeExplicitAutograd: triu
6198
6472
 
6199
6473
  - func: tril.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
6474
+ structured: True
6200
6475
  dispatch:
6201
- CPU: tril_cpu_out
6202
- CUDA: tril_cuda_out
6476
+ CPU: tril_cpu
6477
+ CUDA: tril_cuda
6203
6478
 
6204
6479
  - func: tril(Tensor self, int diagonal=0) -> Tensor
6480
+ structured_delegate: tril.out
6205
6481
  variants: method, function
6206
- dispatch:
6207
- CompositeExplicitAutograd: tril
6208
6482
 
6209
6483
  - func: tril_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
6210
6484
  dispatch:
@@ -6584,7 +6858,8 @@
6584
6858
  - func: index_select(Tensor self, int dim, Tensor index) -> Tensor
6585
6859
  variants: method, function
6586
6860
  dispatch:
6587
- CPU, QuantizedCPU: index_select_cpu_
6861
+ CPU: index_select_cpu_
6862
+ QuantizedCPU: index_select_quantized_cpu_
6588
6863
  CUDA, QuantizedCUDA: index_select_cuda
6589
6864
  SparseCPU: index_select_sparse
6590
6865
  SparseCUDA: index_select_sparse
@@ -6629,6 +6904,9 @@
6629
6904
  - func: nonzero_numpy(Tensor self) -> Tensor[]
6630
6905
  variants: method, function
6631
6906
 
6907
+ - func: argwhere(Tensor self) -> Tensor
6908
+ variants: method, function
6909
+
6632
6910
  - func: gather.out(Tensor self, int dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!)
6633
6911
  structured: True
6634
6912
  dispatch:
@@ -6699,13 +6977,30 @@
6699
6977
  CUDA: legacy_lstsq_cuda
6700
6978
 
6701
6979
  - func: triangular_solve.X(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False, *, Tensor(a!) X, Tensor(b!) M) -> (Tensor(a!) solution, Tensor(b!) cloned_coefficient)
6980
+ structured: True
6702
6981
  dispatch:
6703
6982
  CPU, CUDA: triangular_solve_out
6983
+ SparseCsrCPU: triangular_solve_out_sparse_csr_cpu
6984
+ SparseCsrCUDA: triangular_solve_out_sparse_csr_cuda
6704
6985
 
6705
6986
  - func: triangular_solve(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False) -> (Tensor solution, Tensor cloned_coefficient)
6987
+ structured_delegate: triangular_solve.X
6988
+ variants: method, function
6989
+
6990
+ - func: _linalg_check_errors(Tensor info, str api_name, *, bool is_matrix) -> ()
6991
+ dispatch:
6992
+ CompositeExplicitAutograd: _linalg_check_errors
6993
+
6994
+ - func: linalg_solve_triangular.out(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False, Tensor(a!) out) -> Tensor(a!)
6995
+ python_module: linalg
6996
+ dispatch:
6997
+ CPU, CUDA: linalg_solve_triangular_out
6998
+
6999
+ - func: linalg_solve_triangular(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False) -> Tensor
7000
+ python_module: linalg
6706
7001
  variants: method, function
6707
7002
  dispatch:
6708
- CPU, CUDA: triangular_solve
7003
+ CPU, CUDA: linalg_solve_triangular
6709
7004
 
6710
7005
  - func: symeig.e(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)
6711
7006
  dispatch:
@@ -6736,12 +7031,6 @@
6736
7031
  - func: svd(Tensor self, bool some=True, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor V)
6737
7032
  variants: method, function
6738
7033
 
6739
- - func: _svd_helper(Tensor self, bool some, bool compute_uv) -> (Tensor U, Tensor S, Tensor V)
6740
- variants: function
6741
- dispatch:
6742
- CPU: _svd_helper_cpu
6743
- CUDA: _svd_helper_cuda
6744
-
6745
7034
  # swapaxes, alias for transpose
6746
7035
  - func: swapaxes(Tensor(a) self, int axis0, int axis1) -> Tensor(a)
6747
7036
  variants: function, method
@@ -6752,6 +7041,7 @@
6752
7041
  variants: method
6753
7042
  device_check: NoCheck
6754
7043
  device_guard: False
7044
+ tags: inplace_view
6755
7045
 
6756
7046
  # swapdims, alias for transpose
6757
7047
  - func: swapdims(Tensor(a) self, int dim0, int dim1) -> Tensor(a)
@@ -6763,6 +7053,7 @@
6763
7053
  variants: method
6764
7054
  device_check: NoCheck
6765
7055
  device_guard: False
7056
+ tags: inplace_view
6766
7057
 
6767
7058
  - func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
6768
7059
  dispatch:
@@ -6843,8 +7134,6 @@
6843
7134
 
6844
7135
  - func: _lu_with_info(Tensor self, bool pivot=True, bool check_errors=True) -> (Tensor LU, Tensor pivots, Tensor info)
6845
7136
  variants: function
6846
- dispatch:
6847
- CPU, CUDA: _lu_with_info
6848
7137
 
6849
7138
  - func: lu_solve.out(Tensor self, Tensor LU_data, Tensor LU_pivots, *, Tensor(a!) out) -> Tensor(a!)
6850
7139
  dispatch:
@@ -6926,11 +7215,17 @@
6926
7215
  device_check: NoCheck # TensorIterator
6927
7216
  structured_delegate: erfinv.out
6928
7217
  variants: method, function
7218
+ dispatch:
7219
+ SparseCPU, SparseCUDA: erfinv_sparse
7220
+ SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr
6929
7221
 
6930
7222
  - func: erfinv_(Tensor(a!) self) -> Tensor(a!)
6931
7223
  device_check: NoCheck # TensorIterator
6932
7224
  structured_delegate: erfinv.out
6933
7225
  variants: method
7226
+ dispatch:
7227
+ SparseCPU, SparseCUDA: erfinv_sparse_
7228
+ SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_
6934
7229
 
6935
7230
  - func: erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
6936
7231
  device_check: NoCheck # TensorIterator
@@ -6938,6 +7233,8 @@
6938
7233
  structured_inherits: TensorIteratorBase
6939
7234
  dispatch:
6940
7235
  CPU, CUDA: erfinv_out
7236
+ SparseCPU, SparseCUDA: erfinv_sparse_out
7237
+ SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_out
6941
7238
 
6942
7239
  - func: i0(Tensor self) -> Tensor
6943
7240
  structured_delegate: i0.out
@@ -6959,6 +7256,8 @@
6959
7256
  variants: function, method
6960
7257
  dispatch:
6961
7258
  CompositeExplicitAutograd: sign
7259
+ SparseCPU, SparseCUDA: sign_sparse
7260
+ SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr
6962
7261
 
6963
7262
  - func: sign_(Tensor(a!) self) -> Tensor(a!)
6964
7263
  device_check: NoCheck # TensorIterator
@@ -6966,6 +7265,8 @@
6966
7265
  variants: method
6967
7266
  dispatch:
6968
7267
  CompositeExplicitAutograd: sign_
7268
+ SparseCPU, SparseCUDA: sign_sparse_
7269
+ SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_
6969
7270
 
6970
7271
  - func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
6971
7272
  device_check: NoCheck # TensorIterator
@@ -6973,10 +7274,15 @@
6973
7274
  structured_inherits: TensorIteratorBase
6974
7275
  dispatch:
6975
7276
  CPU, CUDA: sign_out
7277
+ SparseCPU, SparseCUDA: sign_sparse_out
7278
+ SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_out
6976
7279
 
6977
7280
  - func: signbit(Tensor self) -> Tensor
6978
7281
  variants: function, method
6979
7282
  structured_delegate: signbit.out
7283
+ dispatch:
7284
+ SparseCPU, SparseCUDA: signbit_sparse
7285
+ SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr
6980
7286
 
6981
7287
  - func: signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
6982
7288
  structured: True
@@ -6984,6 +7290,8 @@
6984
7290
  dispatch:
6985
7291
  CPU: signbit_out
6986
7292
  CUDA: signbit_out
7293
+ SparseCPU, SparseCUDA: signbit_sparse_out
7294
+ SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr_out
6987
7295
 
6988
7296
  - func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor
6989
7297
  device_check: NoCheck # TensorIterator
@@ -7008,31 +7316,39 @@
7008
7316
  structured_delegate: atan2.out
7009
7317
  variants: method, function
7010
7318
 
7319
+ # arctan2, alias of atan2
7320
+ - func: arctan2(Tensor self, Tensor other) -> Tensor
7321
+ variants: method, function
7322
+
7323
+ - func: arctan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
7324
+ device_check: NoCheck # TensorIterator
7325
+
7326
+ - func: arctan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
7327
+ variants: method
7328
+
7011
7329
  - func: lerp.Scalar_out(Tensor self, Tensor end, Scalar weight, *, Tensor(a!) out) -> Tensor(a!)
7012
7330
  device_check: NoCheck # TensorIterator
7331
+ structured: True
7332
+ structured_inherits: TensorIteratorBase
7013
7333
  dispatch:
7014
- CPU: lerp_cpu_scalar_out
7015
- CUDA: lerp_cuda_scalar_out
7334
+ CPU, CUDA: lerp_Scalar
7016
7335
 
7017
7336
  - func: lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)
7018
7337
  device_check: NoCheck # TensorIterator
7338
+ structured: True
7339
+ structured_inherits: TensorIteratorBase
7019
7340
  dispatch:
7020
- CPU: lerp_cpu_tensor_out
7021
- CUDA: lerp_cuda_tensor_out
7341
+ CPU, CUDA: lerp_Tensor
7022
7342
 
7023
7343
  - func: lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor
7024
7344
  device_check: NoCheck # TensorIterator
7025
7345
  variants: method, function
7026
- dispatch:
7027
- CPU: lerp_cpu_scalar
7028
- CUDA: lerp_cuda_scalar
7346
+ structured_delegate: lerp.Scalar_out
7029
7347
 
7030
7348
  - func: lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor
7031
7349
  device_check: NoCheck # TensorIterator
7032
7350
  variants: method, function
7033
- dispatch:
7034
- CPU: lerp_cpu_tensor
7035
- CUDA: lerp_cuda_tensor
7351
+ structured_delegate: lerp.Tensor_out
7036
7352
 
7037
7353
  - func: histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!)
7038
7354
  dispatch:
@@ -7063,6 +7379,18 @@
7063
7379
  dispatch:
7064
7380
  CPU: histogram_cpu
7065
7381
 
7382
+ - func: _histogramdd_bin_edges(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor[]
7383
+ dispatch:
7384
+ CPU: histogramdd_bin_edges_cpu
7385
+
7386
+ - func: _histogramdd_from_bin_cts(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor
7387
+ dispatch:
7388
+ CPU: histogramdd_cpu
7389
+
7390
+ - func: _histogramdd_from_bin_tensors(Tensor self, Tensor[] bins, *, Tensor? weight=None, bool density=False) -> Tensor
7391
+ dispatch:
7392
+ CPU: histogramdd_cpu
7393
+
7066
7394
  - func: fmod.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
7067
7395
  device_check: NoCheck # TensorIterator
7068
7396
  dispatch:
@@ -7275,48 +7603,25 @@
7275
7603
  device_check: NoCheck # TensorIterator
7276
7604
  variants: method, function
7277
7605
 
7278
- # The following quantile signatures are DEPRECATED in favor of the new ones with the interpolation kwarg.
7279
- - func: quantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
7280
-
7281
- - func: quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False) -> Tensor
7606
+ - func: quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
7282
7607
  variants: method, function
7283
7608
 
7284
- - func: quantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
7609
+ - func: quantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
7285
7610
 
7286
- - func: quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False) -> Tensor
7611
+ - func: quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
7287
7612
  variants: method, function
7288
7613
 
7289
- - func: nanquantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
7614
+ - func: quantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
7290
7615
 
7291
- - func: nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False) -> Tensor
7616
+ - func: nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
7292
7617
  variants: method, function
7293
7618
 
7294
- - func: nanquantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
7619
+ - func: nanquantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
7295
7620
 
7296
- - func: nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False) -> Tensor
7621
+ - func: nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
7297
7622
  variants: method, function
7298
7623
 
7299
- # To keep backward and forward compatibility, and to avoid ambiguity with the original signatures, dim, keepdim and interpolation
7300
- # parameters are required for now. Once the deprecated signatures are removed they will be made optional.
7301
- - func: quantile.new_scalar_out(Tensor self, float q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
7302
-
7303
- - func: quantile.new_scalar(Tensor self, float q, int? dim, bool keepdim, *, str interpolation) -> Tensor
7304
- variants: method, function
7305
-
7306
- - func: quantile.new_out(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
7307
-
7308
- - func: quantile.new(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation) -> Tensor
7309
- variants: method, function
7310
-
7311
- - func: nanquantile.new_scalar_out(Tensor self, float q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
7312
-
7313
- - func: nanquantile.new_scalar(Tensor self, float q, int? dim, bool keepdim, *, str interpolation) -> Tensor
7314
- variants: method, function
7315
-
7316
- - func: nanquantile.new_out(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
7317
-
7318
- - func: nanquantile.new(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation) -> Tensor
7319
- variants: method, function
7624
+ - func: nanquantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
7320
7625
 
7321
7626
  - func: sort.values(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
7322
7627
  device_check: NoCheck # TensorIterator
@@ -7511,6 +7816,7 @@
7511
7816
  dispatch:
7512
7817
  CPU, CUDA: normal_
7513
7818
  Meta: normal_meta_
7819
+ SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
7514
7820
 
7515
7821
  - func: normal.Tensor_float_out(Tensor mean, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
7516
7822
  dispatch:
@@ -8209,6 +8515,13 @@
8209
8515
  CPU: foreach_tensor_minimum_slow
8210
8516
  CUDA: foreach_tensor_minimum_cuda
8211
8517
 
8518
+ - func: _foreach_norm.Scalar(Tensor[] tensors, Scalar ord=2) -> Tensor[]
8519
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
8520
+ variants: function
8521
+ dispatch:
8522
+ CPU: foreach_tensor_norm_slow
8523
+ CUDA: foreach_tensor_norm_cuda
8524
+
8212
8525
  - func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
8213
8526
  dispatch:
8214
8527
  CPU: bucketize_cpu
@@ -8224,17 +8537,27 @@
8224
8537
  CPU: bucketize_cpu
8225
8538
  CUDA: bucketize_cuda
8226
8539
 
8227
- - func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False) -> Tensor
8540
+ - func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
8228
8541
  dispatch:
8229
8542
  CPU: searchsorted_cpu
8230
8543
  CUDA: searchsorted_cuda
8231
8544
 
8232
- - func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)
8545
+ # [Note about _torch_cuda_cu_linker_symbol_op and torch_cuda_cu]
8546
+ # This is a DUMMY function to force the linking against torch_cuda_cu on Windows.
8547
+ # Otherwise, the Windows linker will optimize and not include torch_cuda_cu even when we
8548
+ # want it to be included. This is similar to what we do with warp_size for torch_cuda_cpp,
8549
+ # described as the solution to this issue: https://github.com/pytorch/pytorch/issues/31611
8550
+ # This op should NOT be used or exposed or edited or else Windows builds (with BUILD_SPLIT_CUDA) will break.
8551
+ - func: _torch_cuda_cu_linker_symbol_op(Tensor self) -> Tensor
8552
+ dispatch:
8553
+ CUDA: _torch_cuda_cu_linker_symbol_op_cuda
8554
+
8555
+ - func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
8233
8556
  dispatch:
8234
8557
  CPU: searchsorted_out_cpu
8235
8558
  CUDA: searchsorted_out_cuda
8236
8559
 
8237
- - func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False) -> Tensor
8560
+ - func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
8238
8561
  dispatch:
8239
8562
  CPU: searchsorted_cpu
8240
8563
  CUDA: searchsorted_cuda
@@ -8248,6 +8571,15 @@
8248
8571
  CPU: _convert_indices_from_coo_to_csr_structured_cpu
8249
8572
  CUDA: _convert_indices_from_coo_to_csr_structured_cuda
8250
8573
 
8574
+ - func: _convert_indices_from_csr_to_coo(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False) -> Tensor
8575
+ structured_delegate: _convert_indices_from_csr_to_coo.out
8576
+
8577
+ - func: _convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!)
8578
+ structured: True
8579
+ dispatch:
8580
+ CPU: _convert_indices_from_csr_to_coo_structured_cpu
8581
+ CUDA: _convert_indices_from_csr_to_coo_structured_cuda
8582
+
8251
8583
  ## NN wrappers
8252
8584
 
8253
8585
  - func: mse_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
@@ -8409,16 +8741,16 @@
8409
8741
 
8410
8742
  - func: smooth_l1_loss.out(Tensor self, Tensor target, int reduction=Mean, float beta=1.0, *, Tensor(a!) out) -> Tensor(a!)
8411
8743
  device_check: NoCheck # TensorIterator
8744
+ structured: True
8745
+ structured_inherits: TensorIteratorBase
8412
8746
  python_module: nn
8413
8747
  dispatch:
8414
- CPU: smooth_l1_loss_out
8415
- CUDA: smooth_l1_loss_out
8748
+ CPU, CUDA: smooth_l1_loss_out
8416
8749
 
8417
8750
  - func: smooth_l1_loss(Tensor self, Tensor target, int reduction=Mean, float beta=1.0) -> Tensor
8418
8751
  device_check: NoCheck # TensorIterator
8752
+ structured_delegate: smooth_l1_loss.out
8419
8753
  python_module: nn
8420
- dispatch:
8421
- CPU, CUDA: smooth_l1_loss
8422
8754
 
8423
8755
  - func: smooth_l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta, *, Tensor(a!) grad_input) -> Tensor(a!)
8424
8756
  python_module: nn
@@ -8533,6 +8865,7 @@
8533
8865
  python_module: nn
8534
8866
  dispatch:
8535
8867
  CPU, CUDA: hardsigmoid_out
8868
+ QuantizedCPU: hardsigmoid_out_quantized_cpu
8536
8869
 
8537
8870
  - func: hardsigmoid(Tensor self) -> Tensor
8538
8871
  structured_delegate: hardsigmoid.out
@@ -8715,14 +9048,14 @@
8715
9048
  device_check: NoCheck # TensorIterator
8716
9049
  python_module: nn
8717
9050
 
8718
- - func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)
9051
+ - func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)
8719
9052
  structured: True
8720
9053
  structured_inherits: TensorIteratorBase
8721
9054
  python_module: nn
8722
9055
  dispatch:
8723
9056
  CPU, CUDA: softplus_backward_out
8724
9057
 
8725
- - func: softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, Tensor output) -> Tensor
9058
+ - func: softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold) -> Tensor
8726
9059
  structured_delegate: softplus_backward.grad_input
8727
9060
  python_module: nn
8728
9061
 
@@ -8933,19 +9266,22 @@
8933
9266
 
8934
9267
  - func: fractional_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
8935
9268
  python_module: nn
9269
+ structured: True
8936
9270
  dispatch:
8937
- CPU: fractional_max_pool2d_backward_out_cpu
8938
- CUDA: fractional_max_pool2d_backward_out_cuda
9271
+ CPU: fractional_max_pool2d_backward_cpu
9272
+ CUDA: fractional_max_pool2d_backward_cuda
8939
9273
 
8940
9274
  - func: fractional_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices) -> Tensor
8941
9275
  python_module: nn
8942
- dispatch:
8943
- CPU: fractional_max_pool2d_backward_cpu
8944
- CUDA: fractional_max_pool2d_backward_cuda
9276
+ structured_delegate: fractional_max_pool2d_backward.grad_input
8945
9277
 
8946
9278
  # Return: (Tensor output, Tensor indices)
8947
9279
  - func: fractional_max_pool3d.output(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples, *, Tensor(a!) output, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))
8948
9280
  python_module: nn
9281
+ structured: True
9282
+ precomputed:
9283
+ - kernel_size -> int poolSizeT, int poolSizeH, int poolSizeW
9284
+ - output_size -> int outputT, int outputH, int outputW
8949
9285
  dispatch:
8950
9286
  CPU: fractional_max_pool3d_out_cpu
8951
9287
  CUDA: fractional_max_pool3d_out_cuda
@@ -8953,9 +9289,7 @@
8953
9289
  # Return: (Tensor output, Tensor indices)
8954
9290
  - func: fractional_max_pool3d(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples) -> (Tensor, Tensor)
8955
9291
  python_module: nn
8956
- dispatch:
8957
- CPU: fractional_max_pool3d_cpu
8958
- CUDA: fractional_max_pool3d_cuda
9292
+ structured_delegate: fractional_max_pool3d.output
8959
9293
 
8960
9294
  - func: fractional_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
8961
9295
  python_module: nn
@@ -9225,6 +9559,16 @@
9225
9559
  dispatch:
9226
9560
  CompositeExplicitAutograd: upsample_bilinear2d_backward
9227
9561
 
9562
+ - func: _upsample_bilinear2d_aa.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
9563
+ python_module: nn
9564
+ dispatch:
9565
+ CompositeExplicitAutograd: _upsample_bilinear2d_aa
9566
+
9567
+ - func: _upsample_bilinear2d_aa_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
9568
+ python_module: nn
9569
+ dispatch:
9570
+ CompositeExplicitAutograd: _upsample_bilinear2d_aa_backward
9571
+
9228
9572
  - func: upsample_trilinear3d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
9229
9573
  python_module: nn
9230
9574
  dispatch:
@@ -9245,26 +9589,56 @@
9245
9589
  dispatch:
9246
9590
  CompositeExplicitAutograd: upsample_bicubic2d_backward
9247
9591
 
9592
+ - func: _upsample_bicubic2d_aa.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
9593
+ python_module: nn
9594
+ dispatch:
9595
+ CompositeExplicitAutograd: _upsample_bicubic2d_aa
9596
+
9597
+ - func: _upsample_bicubic2d_aa_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
9598
+ python_module: nn
9599
+ dispatch:
9600
+ CompositeExplicitAutograd: _upsample_bicubic2d_aa_backward
9601
+
9248
9602
  - func: upsample_nearest1d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9249
9603
  python_module: nn
9250
9604
  dispatch:
9251
9605
  CompositeExplicitAutograd: upsample_nearest1d
9252
9606
 
9607
+ - func: _upsample_nearest_exact1d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9608
+ python_module: nn
9609
+ dispatch:
9610
+ CompositeExplicitAutograd: _upsample_nearest_exact1d
9611
+
9253
9612
  - func: upsample_nearest1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9254
9613
  python_module: nn
9255
9614
  dispatch:
9256
9615
  CompositeExplicitAutograd: upsample_nearest1d_backward
9257
9616
 
9617
+ - func: _upsample_nearest_exact1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9618
+ python_module: nn
9619
+ dispatch:
9620
+ CompositeExplicitAutograd: _upsample_nearest_exact1d_backward
9621
+
9258
9622
  - func: upsample_nearest2d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9259
9623
  python_module: nn
9260
9624
  dispatch:
9261
9625
  CompositeExplicitAutograd: upsample_nearest2d
9262
9626
 
9627
+ - func: _upsample_nearest_exact2d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9628
+ python_module: nn
9629
+ dispatch:
9630
+ CompositeExplicitAutograd: _upsample_nearest_exact2d
9631
+
9263
9632
  - func: upsample_nearest2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9264
9633
  python_module: nn
9265
9634
  dispatch:
9266
9635
  CompositeExplicitAutograd: upsample_nearest2d_backward
9267
9636
 
9637
+ - func: _upsample_nearest_exact2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9638
+ python_module: nn
9639
+ dispatch:
9640
+ CompositeExplicitAutograd: _upsample_nearest_exact2d_backward
9641
+
9268
9642
  - func: upsample_nearest3d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9269
9643
  python_module: nn
9270
9644
  dispatch:
@@ -9272,12 +9646,25 @@
9272
9646
  CUDA: upsample_nearest3d_cuda
9273
9647
  QuantizedCPU: upsample_nearest3d_quantized_cpu
9274
9648
 
9649
+ - func: _upsample_nearest_exact3d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
9650
+ python_module: nn
9651
+ dispatch:
9652
+ CPU: _upsample_nearest_exact3d_cpu
9653
+ CUDA: _upsample_nearest_exact3d_cuda
9654
+ QuantizedCPU: _upsample_nearest_exact3d_quantized_cpu
9655
+
9275
9656
  - func: upsample_nearest3d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9276
9657
  python_module: nn
9277
9658
  dispatch:
9278
9659
  CPU: upsample_nearest3d_backward_cpu
9279
9660
  CUDA: upsample_nearest3d_backward_cuda
9280
9661
 
9662
+ - func: _upsample_nearest_exact3d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
9663
+ python_module: nn
9664
+ dispatch:
9665
+ CPU: _upsample_nearest_exact3d_backward_cpu
9666
+ CUDA: _upsample_nearest_exact3d_backward_cuda
9667
+
9281
9668
  # NOTE: all of the non-"vec" upsample overloads are only kept for backward compatibility.
9282
9669
  - func: upsample_linear1d.out(Tensor self, int[1] output_size, bool align_corners, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)
9283
9670
  python_module: nn
@@ -9325,6 +9712,28 @@
9325
9712
  python_module: nn
9326
9713
  structured_delegate: upsample_bilinear2d_backward.grad_input
9327
9714
 
9715
+ - func: _upsample_bilinear2d_aa.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9716
+ python_module: nn
9717
+ structured: True
9718
+ dispatch:
9719
+ CPU: _upsample_bilinear2d_aa_out_cpu
9720
+ CUDA: _upsample_bilinear2d_aa_out_cuda
9721
+
9722
+ - func: _upsample_bilinear2d_aa(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
9723
+ python_module: nn
9724
+ structured_delegate: _upsample_bilinear2d_aa.out
9725
+
9726
+ - func: _upsample_bilinear2d_aa_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9727
+ python_module: nn
9728
+ structured: True
9729
+ dispatch:
9730
+ CPU: _upsample_bilinear2d_aa_backward_out_cpu
9731
+ CUDA: _upsample_bilinear2d_aa_backward_out_cuda
9732
+
9733
+ - func: _upsample_bilinear2d_aa_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
9734
+ python_module: nn
9735
+ structured_delegate: _upsample_bilinear2d_aa_backward.grad_input
9736
+
9328
9737
  - func: upsample_bicubic2d.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9329
9738
  python_module: nn
9330
9739
  structured: True
@@ -9347,6 +9756,28 @@
9347
9756
  python_module: nn
9348
9757
  structured_delegate: upsample_bicubic2d_backward.grad_input
9349
9758
 
9759
+ - func: _upsample_bicubic2d_aa.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9760
+ python_module: nn
9761
+ structured: True
9762
+ dispatch:
9763
+ CPU: _upsample_bicubic2d_aa_out_cpu
9764
+ CUDA: _upsample_bicubic2d_aa_out_cuda
9765
+
9766
+ - func: _upsample_bicubic2d_aa(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
9767
+ python_module: nn
9768
+ structured_delegate: _upsample_bicubic2d_aa.out
9769
+
9770
+ - func: _upsample_bicubic2d_aa_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9771
+ python_module: nn
9772
+ structured: True
9773
+ dispatch:
9774
+ CPU: _upsample_bicubic2d_aa_backward_out_cpu
9775
+ CUDA: _upsample_bicubic2d_aa_backward_out_cuda
9776
+
9777
+ - func: _upsample_bicubic2d_aa_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
9778
+ python_module: nn
9779
+ structured_delegate: _upsample_bicubic2d_aa_backward.grad_input
9780
+
9350
9781
  - func: upsample_trilinear3d.out(Tensor self, int[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9351
9782
  python_module: nn
9352
9783
  structured: True
@@ -9376,10 +9807,21 @@
9376
9807
  CPU: upsample_nearest1d_out_cpu
9377
9808
  CUDA: upsample_nearest1d_out_cuda
9378
9809
 
9810
+ - func: _upsample_nearest_exact1d.out(Tensor self, int[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)
9811
+ python_module: nn
9812
+ structured: True
9813
+ dispatch:
9814
+ CPU: _upsample_nearest_exact1d_out_cpu
9815
+ CUDA: _upsample_nearest_exact1d_out_cuda
9816
+
9379
9817
  - func: upsample_nearest1d(Tensor self, int[1] output_size, float? scales=None) -> Tensor
9380
9818
  python_module: nn
9381
9819
  structured_delegate: upsample_nearest1d.out
9382
9820
 
9821
+ - func: _upsample_nearest_exact1d(Tensor self, int[1] output_size, float? scales=None) -> Tensor
9822
+ python_module: nn
9823
+ structured_delegate: _upsample_nearest_exact1d.out
9824
+
9383
9825
  - func: upsample_nearest1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9384
9826
  python_module: nn
9385
9827
  structured: True
@@ -9387,10 +9829,21 @@
9387
9829
  CPU: upsample_nearest1d_backward_out_cpu
9388
9830
  CUDA: upsample_nearest1d_backward_out_cuda
9389
9831
 
9832
+ - func: _upsample_nearest_exact1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9833
+ python_module: nn
9834
+ structured: True
9835
+ dispatch:
9836
+ CPU: _upsample_nearest_exact1d_backward_out_cpu
9837
+ CUDA: _upsample_nearest_exact1d_backward_out_cuda
9838
+
9390
9839
  - func: upsample_nearest1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None) -> Tensor
9391
9840
  python_module: nn
9392
9841
  structured_delegate: upsample_nearest1d_backward.grad_input
9393
9842
 
9843
+ - func: _upsample_nearest_exact1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None) -> Tensor
9844
+ python_module: nn
9845
+ structured_delegate: _upsample_nearest_exact1d_backward.grad_input
9846
+
9394
9847
  - func: upsample_nearest2d.out(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9395
9848
  python_module: nn
9396
9849
  structured: True
@@ -9398,12 +9851,25 @@
9398
9851
  CPU: upsample_nearest2d_out_cpu
9399
9852
  CUDA: upsample_nearest2d_out_cuda
9400
9853
 
9854
+ - func: _upsample_nearest_exact2d.out(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9855
+ python_module: nn
9856
+ structured: True
9857
+ dispatch:
9858
+ CPU: _upsample_nearest_exact2d_out_cpu
9859
+ CUDA: _upsample_nearest_exact2d_out_cuda
9860
+
9401
9861
  - func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
9402
9862
  python_module: nn
9403
9863
  structured_delegate: upsample_nearest2d.out
9404
9864
  dispatch:
9405
9865
  QuantizedCPU: upsample_nearest2d_quantized_cpu
9406
9866
 
9867
+ - func: _upsample_nearest_exact2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
9868
+ python_module: nn
9869
+ structured_delegate: _upsample_nearest_exact2d.out
9870
+ dispatch:
9871
+ QuantizedCPU: _upsample_nearest_exact2d_quantized_cpu
9872
+
9407
9873
  - func: upsample_nearest2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9408
9874
  python_module: nn
9409
9875
  structured: True
@@ -9411,10 +9877,21 @@
9411
9877
  CPU: upsample_nearest2d_backward_out_cpu
9412
9878
  CUDA: upsample_nearest2d_backward_out_cuda
9413
9879
 
9880
+ - func: _upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9881
+ python_module: nn
9882
+ structured: True
9883
+ dispatch:
9884
+ CPU: _upsample_nearest_exact2d_backward_out_cpu
9885
+ CUDA: _upsample_nearest_exact2d_backward_out_cuda
9886
+
9414
9887
  - func: upsample_nearest2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
9415
9888
  python_module: nn
9416
9889
  structured_delegate: upsample_nearest2d_backward.grad_input
9417
9890
 
9891
+ - func: _upsample_nearest_exact2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
9892
+ python_module: nn
9893
+ structured_delegate: _upsample_nearest_exact2d_backward.grad_input
9894
+
9418
9895
  - func: upsample_nearest3d.out(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9419
9896
  python_module: nn
9420
9897
  structured: True
@@ -9422,12 +9899,25 @@
9422
9899
  CPU: upsample_nearest3d_out_cpu
9423
9900
  CUDA: upsample_nearest3d_out_cuda
9424
9901
 
9902
+ - func: _upsample_nearest_exact3d.out(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
9903
+ python_module: nn
9904
+ structured: True
9905
+ dispatch:
9906
+ CPU: _upsample_nearest_exact3d_out_cpu
9907
+ CUDA: _upsample_nearest_exact3d_out_cuda
9908
+
9425
9909
  - func: upsample_nearest3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
9426
9910
  python_module: nn
9427
9911
  structured_delegate: upsample_nearest3d.out
9428
9912
  dispatch:
9429
9913
  QuantizedCPU: upsample_nearest3d_quantized_cpu
9430
9914
 
9915
+ - func: _upsample_nearest_exact3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
9916
+ python_module: nn
9917
+ structured_delegate: _upsample_nearest_exact3d.out
9918
+ dispatch:
9919
+ QuantizedCPU: _upsample_nearest_exact3d_quantized_cpu
9920
+
9431
9921
  - func: upsample_nearest3d_backward.grad_input(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9432
9922
  python_module: nn
9433
9923
  structured: True
@@ -9435,10 +9925,21 @@
9435
9925
  CPU: upsample_nearest3d_backward_out_cpu
9436
9926
  CUDA: upsample_nearest3d_backward_out_cuda
9437
9927
 
9928
+ - func: _upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9929
+ python_module: nn
9930
+ structured: True
9931
+ dispatch:
9932
+ CPU: _upsample_nearest_exact3d_backward_out_cpu
9933
+ CUDA: _upsample_nearest_exact3d_backward_out_cuda
9934
+
9438
9935
  - func: upsample_nearest3d_backward(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
9439
9936
  python_module: nn
9440
9937
  structured_delegate: upsample_nearest3d_backward.grad_input
9441
9938
 
9939
+ - func: _upsample_nearest_exact3d_backward(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
9940
+ python_module: nn
9941
+ structured_delegate: _upsample_nearest_exact3d_backward.grad_input
9942
+
9442
9943
  - func: sigmoid_backward.grad_input(Tensor grad_output, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)
9443
9944
  python_module: nn
9444
9945
  structured: True
@@ -9501,18 +10002,6 @@
9501
10002
  python_module: nn
9502
10003
  structured_delegate: slow_conv_transpose2d.out
9503
10004
 
9504
- - func: slow_conv_transpose2d_backward.grad_output(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] output_padding, int[2] dilation, Tensor columns, Tensor ones, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
9505
- python_module: nn
9506
- dispatch:
9507
- CPU: slow_conv_transpose2d_backward_out_cpu
9508
- CUDA: slow_conv_transpose2d_backward_out_cuda
9509
-
9510
- - func: slow_conv_transpose2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] output_padding, int[2] dilation, Tensor columns, Tensor ones, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9511
- python_module: nn
9512
- dispatch:
9513
- CPU: slow_conv_transpose2d_backward_cpu
9514
- CUDA: slow_conv_transpose2d_backward_cuda
9515
-
9516
10005
  - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
9517
10006
  python_module: nn
9518
10007
  dispatch:
@@ -9525,43 +10014,31 @@
9525
10014
  CPU: slow_conv_transpose3d_cpu
9526
10015
  CUDA: slow_conv_transpose3d_cuda
9527
10016
 
9528
- - func: slow_conv_transpose3d_backward.grad_output(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] output_padding, int[3] dilation, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
9529
- python_module: nn
9530
- dispatch:
9531
- CPU: slow_conv_transpose3d_backward_out_cpu
9532
- CUDA: slow_conv_transpose3d_backward_out_cuda
9533
-
9534
- - func: slow_conv_transpose3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] output_padding, int[3] dilation, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9535
- python_module: nn
9536
- dispatch:
9537
- CPU: slow_conv_transpose3d_backward_cpu
9538
- CUDA: slow_conv_transpose3d_backward_cuda
9539
-
9540
10017
  - func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
9541
10018
  python_module: nn
9542
10019
 
9543
10020
  - func: thnn_conv2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0) -> Tensor
9544
10021
  python_module: nn
9545
10022
 
9546
- - func: thnn_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output, Tensor(b!) finput, Tensor(c!) fgrad_input) -> (Tensor(a!), Tensor(b!), Tensor(c!))
10023
+ - func: _slow_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output) -> Tensor(a!)
9547
10024
  python_module: nn
9548
10025
  dispatch:
9549
10026
  CPU: slow_conv2d_forward_out_cpu
9550
10027
  CUDA: slow_conv2d_forward_out_cuda
9551
10028
 
9552
- - func: thnn_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> (Tensor output, Tensor finput, Tensor fgrad_input)
10029
+ - func: _slow_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> Tensor
9553
10030
  python_module: nn
9554
10031
  dispatch:
9555
10032
  CPU: slow_conv2d_forward_cpu
9556
10033
  CUDA: slow_conv2d_forward_cuda
9557
10034
 
9558
- - func: thnn_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
10035
+ - func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
9559
10036
  python_module: nn
9560
10037
  dispatch:
9561
10038
  CPU: slow_conv2d_backward_out_cpu
9562
10039
  CUDA: slow_conv2d_backward_out_cuda
9563
10040
 
9564
- - func: thnn_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
10041
+ - func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9565
10042
  python_module: nn
9566
10043
  dispatch:
9567
10044
  CPU: slow_conv2d_backward_cpu
@@ -9578,81 +10055,39 @@
9578
10055
  dispatch:
9579
10056
  CUDA: conv_depthwise2d_cuda
9580
10057
 
9581
- - func: _conv_depthwise2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, *, Tensor(a!) grad_input, Tensor(b!) grad_weight) -> (Tensor(a!), Tensor(b!))
9582
- python_module: nn
9583
- dispatch:
9584
- CUDA: conv_depthwise2d_backward_cuda_out
9585
-
9586
- - func: _conv_depthwise2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool[2] output_mask) -> (Tensor grad_input, Tensor grad_weight)
9587
- python_module: nn
9588
- dispatch:
9589
- CUDA: conv_depthwise2d_backward_cuda
9590
-
9591
10058
  - func: conv_depthwise3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, int[3] dilation) -> Tensor
9592
10059
  python_module: nn
9593
10060
  dispatch:
9594
10061
  CUDA: conv_depthwise3d_cuda
9595
10062
 
9596
- - func: conv_depthwise3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
9597
- python_module: nn
9598
- dispatch:
9599
- CUDA: conv_depthwise3d_backward_cuda_out
9600
-
9601
- - func: conv_depthwise3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9602
- python_module: nn
9603
- dispatch:
9604
- CUDA: conv_depthwise3d_backward_cuda
9605
-
9606
10063
  - func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
9607
10064
  python_module: nn
9608
10065
 
9609
10066
  - func: slow_conv3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0) -> Tensor
9610
10067
  python_module: nn
9611
10068
 
9612
- - func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, *, Tensor(a!) output, Tensor(b!) finput, Tensor(c!) fgrad_input) -> (Tensor(a!), Tensor(b!), Tensor(c!))
10069
+ - func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, *, Tensor(a!) output) -> Tensor(a!)
9613
10070
  python_module: nn
9614
10071
  dispatch:
9615
10072
  CPU: slow_conv3d_forward_out_cpu
9616
10073
 
9617
- - func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding) -> (Tensor output, Tensor finput, Tensor fgrad_input)
10074
+ - func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding) -> Tensor
9618
10075
  python_module: nn
9619
10076
  dispatch:
9620
10077
  CPU: slow_conv3d_forward_cpu
9621
10078
 
9622
- - func: slow_conv3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
9623
- python_module: nn
9624
- dispatch:
9625
- CPU: slow_conv3d_backward_out_cpu
9626
-
9627
- - func: slow_conv3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9628
- python_module: nn
9629
- dispatch:
9630
- CPU: slow_conv3d_backward_cpu
9631
-
9632
10079
  - func: slow_conv_dilated2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1) -> Tensor
9633
10080
  python_module: nn
9634
10081
  dispatch:
9635
10082
  CPU: slow_conv_dilated2d_cpu
9636
10083
  CUDA: slow_conv_dilated2d_cuda
9637
10084
 
9638
- - func: slow_conv_dilated2d_backward(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9639
- python_module: nn
9640
- dispatch:
9641
- CPU: slow_conv_dilated2d_backward_cpu
9642
- CUDA: slow_conv_dilated2d_backward_cuda
9643
-
9644
10085
  - func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1) -> Tensor
9645
10086
  python_module: nn
9646
10087
  dispatch:
9647
10088
  CPU: slow_conv_dilated3d_cpu
9648
10089
  CUDA: slow_conv_dilated3d_cuda
9649
10090
 
9650
- - func: slow_conv_dilated3d_backward(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
9651
- python_module: nn
9652
- dispatch:
9653
- CPU: slow_conv_dilated3d_backward_cpu
9654
- CUDA: slow_conv_dilated3d_backward_cuda
9655
-
9656
10091
  - func: col2im.out(Tensor self, int[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!)
9657
10092
  python_module: nn
9658
10093
  dispatch:
@@ -9714,6 +10149,10 @@
9714
10149
  variants: function, method
9715
10150
  device_check: NoCheck
9716
10151
  device_guard: False
10152
+ dispatch:
10153
+ CompositeExplicitAutograd: isinf
10154
+ SparseCPU, SparseCUDA: isinf_sparse
10155
+ SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
9717
10156
 
9718
10157
  - func: record_stream(Tensor(a!) self, Stream s) -> ()
9719
10158
  variants: method
@@ -9723,22 +10162,32 @@
9723
10162
  - func: isposinf(Tensor self) -> Tensor
9724
10163
  variants: function, method
9725
10164
  structured_delegate: isposinf.out
10165
+ dispatch:
10166
+ SparseCPU, SparseCUDA: isposinf_sparse
10167
+ SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr
9726
10168
 
9727
10169
  - func: isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
9728
10170
  structured: True
9729
10171
  structured_inherits: TensorIteratorBase
9730
10172
  dispatch:
9731
10173
  CPU, CUDA: isposinf_out
10174
+ SparseCPU, SparseCUDA: isposinf_sparse_out
10175
+ SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr_out
9732
10176
 
9733
10177
  - func: isneginf(Tensor self) -> Tensor
9734
10178
  variants: function, method
9735
10179
  structured_delegate: isneginf.out
10180
+ dispatch:
10181
+ SparseCPU, SparseCUDA: isneginf_sparse
10182
+ SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr
9736
10183
 
9737
10184
  - func: isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
9738
10185
  structured: True
9739
10186
  structured_inherits: TensorIteratorBase
9740
10187
  dispatch:
9741
10188
  CPU, CUDA: isneginf_out
10189
+ SparseCPU, SparseCUDA: isneginf_sparse_out
10190
+ SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr_out
9742
10191
 
9743
10192
  # NOTE [_add_batch_dim and _remove_batch_dim]
9744
10193
  # _add_batch_dim and _remove_batch_dim are meant to be used in the implementation
@@ -10065,11 +10514,11 @@
10065
10514
  python_module: special
10066
10515
  variants: function
10067
10516
 
10068
- - func: special_round(Tensor self) -> Tensor
10517
+ - func: special_round(Tensor self, *, int decimals=0) -> Tensor
10069
10518
  python_module: special
10070
10519
  variants: function
10071
10520
 
10072
- - func: special_round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
10521
+ - func: special_round.out(Tensor self, *, int decimals=0, Tensor(a!) out) -> Tensor(a!)
10073
10522
  python_module: special
10074
10523
  variants: function
10075
10524
 
@@ -10109,6 +10558,10 @@
10109
10558
  python_module: special
10110
10559
  variants: function
10111
10560
 
10561
+ - func: special_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
10562
+ python_module: special
10563
+ variants: function
10564
+
10112
10565
  ## Functions related to the fast Fourier transform and the torch.fft namespace
10113
10566
  # Note [FFT namespace binding]
10114
10567
  # Functions in the fft python module should have their names start with
@@ -10200,6 +10653,26 @@
10200
10653
  python_module: fft
10201
10654
  variants: function
10202
10655
 
10656
+ - func: fft_hfft2(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
10657
+ use_const_ref_for_mutable_tensors: True
10658
+ python_module: fft
10659
+ variants: function
10660
+
10661
+ - func: fft_hfft2.out(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
10662
+ use_const_ref_for_mutable_tensors: True
10663
+ python_module: fft
10664
+ variants: function
10665
+
10666
+ - func: fft_ihfft2(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
10667
+ use_const_ref_for_mutable_tensors: True
10668
+ python_module: fft
10669
+ variants: function
10670
+
10671
+ - func: fft_ihfft2.out(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
10672
+ use_const_ref_for_mutable_tensors: True
10673
+ python_module: fft
10674
+ variants: function
10675
+
10203
10676
  - func: fft_fftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
10204
10677
  python_module: fft
10205
10678
  variants: function
@@ -10232,6 +10705,26 @@
10232
10705
  python_module: fft
10233
10706
  variants: function
10234
10707
 
10708
+ - func: fft_hfftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
10709
+ use_const_ref_for_mutable_tensors: True
10710
+ python_module: fft
10711
+ variants: function
10712
+
10713
+ - func: fft_hfftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
10714
+ use_const_ref_for_mutable_tensors: True
10715
+ python_module: fft
10716
+ variants: function
10717
+
10718
+ - func: fft_ihfftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
10719
+ use_const_ref_for_mutable_tensors: True
10720
+ python_module: fft
10721
+ variants: function
10722
+
10723
+ - func: fft_ihfftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
10724
+ use_const_ref_for_mutable_tensors: True
10725
+ python_module: fft
10726
+ variants: function
10727
+
10235
10728
  - func: fft_fftfreq(int n, float d=1.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
10236
10729
  python_module: fft
10237
10730
  variants: function
@@ -10286,6 +10779,38 @@
10286
10779
  python_module: linalg
10287
10780
  variants: function
10288
10781
 
10782
+ - func: linalg_cross(Tensor self, Tensor other, *, int dim=-1) -> Tensor
10783
+ python_module: linalg
10784
+ variants: function
10785
+ dispatch:
10786
+ CPU, CUDA: linalg_cross
10787
+
10788
+ - func: linalg_cross.out(Tensor self, Tensor other, *, int dim=-1, Tensor(a!) out) -> Tensor(a!)
10789
+ python_module: linalg
10790
+ dispatch:
10791
+ CPU, CUDA: linalg_cross_out
10792
+
10793
+ # linalg.lu_factor
10794
+ - func: linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots)
10795
+ python_module: linalg
10796
+ variants: function
10797
+
10798
+ - func: linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots)
10799
+ python_module: linalg
10800
+ variants: function
10801
+
10802
+ - func: linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)
10803
+ python_module: linalg
10804
+ structured_delegate: linalg_lu_factor_ex.out
10805
+ variants: function
10806
+
10807
+ - func: linalg_lu_factor_ex.out(Tensor A, *, bool pivot=True, bool check_errors=False, Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info) -> (Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info)
10808
+ python_module: linalg
10809
+ variants: function
10810
+ structured: True
10811
+ dispatch:
10812
+ CPU, CUDA: linalg_lu_factor_ex_out
10813
+
10289
10814
  - func: linalg_det(Tensor self) -> Tensor
10290
10815
  python_module: linalg
10291
10816
  variants: function
@@ -10327,6 +10852,12 @@
10327
10852
  - func: linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
10328
10853
  python_module: linalg
10329
10854
 
10855
+ - func: linalg_matrix_exp(Tensor self) -> Tensor
10856
+ python_module: linalg
10857
+ variants: function
10858
+ dispatch:
10859
+ CPU, CUDA: linalg_matrix_exp
10860
+
10330
10861
  - func: linalg_slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)
10331
10862
  python_module: linalg
10332
10863
  variants: function
@@ -10467,18 +10998,30 @@
10467
10998
  - func: linalg_matrix_norm.str_ord_out(Tensor self, str ord='fro', int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
10468
10999
  python_module: linalg
10469
11000
 
10470
- - func: linalg_svd.U(Tensor self, bool full_matrices=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)
11001
+ # This function is exposes the `compute_uv` flag, which is then used to implement `linalg.svd` and
11002
+ # `linalg.svdvals` as composite functions that call this one
11003
+ - func: _linalg_svd(Tensor A, bool full_matrices=False, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor Vh)
11004
+ variants: function
11005
+ structured_delegate: _linalg_svd.U
11006
+
11007
+ - func: _linalg_svd.U(Tensor A, bool full_matrices=False, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)
11008
+ structured: True
11009
+ dispatch:
11010
+ CPU, CUDA: _linalg_svd_out
11011
+
11012
+ - func: linalg_svd(Tensor A, bool full_matrices=True) -> (Tensor U, Tensor S, Tensor Vh)
10471
11013
  python_module: linalg
11014
+ variants: function
10472
11015
 
10473
- - func: linalg_svd(Tensor self, bool full_matrices=True) -> (Tensor U, Tensor S, Tensor Vh)
11016
+ - func: linalg_svd.U(Tensor A, bool full_matrices=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)
10474
11017
  python_module: linalg
10475
11018
  variants: function
10476
11019
 
10477
- - func: linalg_svdvals(Tensor input) -> Tensor
11020
+ - func: linalg_svdvals(Tensor A) -> Tensor
10478
11021
  python_module: linalg
10479
11022
  variants: function
10480
11023
 
10481
- - func: linalg_svdvals.out(Tensor input, *, Tensor(a!) out) -> Tensor(a!)
11024
+ - func: linalg_svdvals.out(Tensor A, *, Tensor(a!) out) -> Tensor(a!)
10482
11025
  python_module: linalg
10483
11026
  variants: function
10484
11027
 
@@ -10498,7 +11041,29 @@
10498
11041
  python_module: linalg
10499
11042
  variants: function
10500
11043
 
10501
- - func: linalg_pinv(Tensor self, float rcond=1e-15, bool hermitian=False) -> Tensor
11044
+ - func: linalg_pinv.atol_rtol_tensor(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor
11045
+ python_module: linalg
11046
+ variants: function
11047
+ dispatch:
11048
+ CompositeExplicitAutograd: linalg_pinv
11049
+
11050
+ - func: linalg_pinv.atol_rtol_tensor_out(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
11051
+ python_module: linalg
11052
+ variants: function
11053
+ dispatch:
11054
+ CompositeExplicitAutograd: linalg_pinv_out
11055
+
11056
+ - func: linalg_pinv.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor
11057
+ cpp_no_default_args: ['atol', 'rtol']
11058
+ python_module: linalg
11059
+ variants: function
11060
+
11061
+ - func: linalg_pinv.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
11062
+ cpp_no_default_args: ['atol', 'rtol']
11063
+ python_module: linalg
11064
+ variants: function
11065
+
11066
+ - func: linalg_pinv(Tensor self, float rcond, bool hermitian=False) -> Tensor
10502
11067
  python_module: linalg
10503
11068
  variants: function
10504
11069
 
@@ -10506,7 +11071,7 @@
10506
11071
  python_module: linalg
10507
11072
  variants: function
10508
11073
 
10509
- - func: linalg_pinv.out(Tensor self, float rcond=1e-15, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
11074
+ - func: linalg_pinv.out(Tensor self, float rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
10510
11075
  python_module: linalg
10511
11076
  variants: function
10512
11077
 
@@ -10565,11 +11130,29 @@
10565
11130
  - func: linalg_matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)
10566
11131
  python_module: linalg
10567
11132
 
10568
- - func: linalg_matrix_rank(Tensor self, float? tol=None, bool hermitian=False) -> Tensor
11133
+ - func: linalg_matrix_rank.atol_rtol_tensor(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor
10569
11134
  python_module: linalg
10570
11135
  variants: function
10571
11136
 
10572
- - func: linalg_matrix_rank.out(Tensor self, float? tol=None, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
11137
+ - func: linalg_matrix_rank.atol_rtol_tensor_out(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
11138
+ python_module: linalg
11139
+ variants: function
11140
+
11141
+ - func: linalg_matrix_rank.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor
11142
+ cpp_no_default_args: ['atol', 'rtol']
11143
+ python_module: linalg
11144
+ variants: function
11145
+
11146
+ - func: linalg_matrix_rank.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
11147
+ cpp_no_default_args: ['atol', 'rtol']
11148
+ python_module: linalg
11149
+ variants: function
11150
+
11151
+ - func: linalg_matrix_rank(Tensor self, float tol, bool hermitian=False) -> Tensor
11152
+ python_module: linalg
11153
+ variants: function
11154
+
11155
+ - func: linalg_matrix_rank.out(Tensor self, float tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
10573
11156
  python_module: linalg
10574
11157
  variants: function
10575
11158
 
@@ -10622,6 +11205,12 @@
10622
11205
  cpp_no_default_args: ['a', 'b']
10623
11206
  python_module: nn
10624
11207
 
11208
+ # Note: this function is only for testing.
11209
+ - func: _test_warn_in_autograd(Tensor self) -> Tensor
11210
+ python_module: nn
11211
+ dispatch:
11212
+ CompositeExplicitAutograd: _test_warn_in_autograd
11213
+
10625
11214
  - func: segment_reduce(Tensor data, str reduce, *, Tensor? lengths=None, Tensor? indices=None, int axis=0, bool unsafe=False, Scalar? initial=None) -> Tensor
10626
11215
  variants: function
10627
11216
  dispatch: