torch-rb 0.10.2 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +2 -1
- data/codegen/generate_functions.rb +24 -7
- data/codegen/native_functions.yaml +1362 -199
- data/ext/torch/extconf.rb +1 -13
- data/ext/torch/ruby_arg_parser.h +11 -3
- data/ext/torch/utils.h +1 -1
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +1 -0
- metadata +3 -3
@@ -145,6 +145,7 @@
|
|
145
145
|
|
146
146
|
- func: rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!)
|
147
147
|
variants: method
|
148
|
+
tags: inplace_view
|
148
149
|
|
149
150
|
- func: rename(Tensor(a) self, Dimname[]? names) -> Tensor(a)
|
150
151
|
variants: method
|
@@ -274,6 +275,7 @@
|
|
274
275
|
device_check: NoCheck # TensorIterator
|
275
276
|
dispatch:
|
276
277
|
CPU, CUDA: abs_out
|
278
|
+
MPS: abs_out_mps
|
277
279
|
SparseCPU, SparseCUDA: abs_sparse_out
|
278
280
|
SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_out
|
279
281
|
|
@@ -328,12 +330,12 @@
|
|
328
330
|
- func: view_as_real(Tensor(a) self) -> Tensor(a)
|
329
331
|
variants: function
|
330
332
|
dispatch:
|
331
|
-
CPU, CUDA: view_as_real
|
333
|
+
CPU, CUDA, MPS, Meta: view_as_real
|
332
334
|
|
333
335
|
- func: view_as_complex(Tensor(a) self) -> Tensor(a)
|
334
336
|
variants: function
|
335
337
|
dispatch:
|
336
|
-
CPU, CUDA: view_as_complex
|
338
|
+
CPU, CUDA, Meta: view_as_complex
|
337
339
|
|
338
340
|
- func: sgn(Tensor self) -> Tensor
|
339
341
|
variants: function, method
|
@@ -357,6 +359,9 @@
|
|
357
359
|
SparseCPU, SparseCUDA: sgn_sparse_out
|
358
360
|
SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
|
359
361
|
|
362
|
+
- func: chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
|
363
|
+
variants: method
|
364
|
+
|
360
365
|
- func: real(Tensor(a) self) -> Tensor(a)
|
361
366
|
device_check: NoCheck # TensorIterator
|
362
367
|
variants: function
|
@@ -422,6 +427,7 @@
|
|
422
427
|
structured_inherits: TensorIteratorBase
|
423
428
|
dispatch:
|
424
429
|
CPU, CUDA: acos_out
|
430
|
+
MPS: acos_out_mps
|
425
431
|
|
426
432
|
# arccos, alias of acos
|
427
433
|
- func: arccos(Tensor self) -> Tensor
|
@@ -448,6 +454,7 @@
|
|
448
454
|
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr
|
449
455
|
MkldnnCPU: mkldnn_add
|
450
456
|
ZeroTensor: add_zerotensor
|
457
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
451
458
|
|
452
459
|
- func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
453
460
|
device_check: NoCheck # TensorIterator
|
@@ -457,18 +464,22 @@
|
|
457
464
|
SparseCPU, SparseCUDA: add_sparse_
|
458
465
|
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr_
|
459
466
|
MkldnnCPU: mkldnn_add_
|
467
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
460
468
|
|
461
469
|
- func: add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
462
470
|
device_check: NoCheck # TensorIterator
|
463
471
|
structured: True
|
464
472
|
structured_inherits: TensorIteratorBase
|
473
|
+
ufunc_inner_loop:
|
474
|
+
Generic: add (AllAndComplex, BFloat16, Half, ComplexHalf)
|
475
|
+
ScalarOnly: add (Bool)
|
465
476
|
dispatch:
|
466
|
-
CPU, CUDA: add_out
|
467
477
|
SparseCPU: add_out_sparse_cpu
|
468
478
|
SparseCUDA: add_out_sparse_cuda
|
469
479
|
SparseCsrCPU: add_out_sparse_csr_cpu
|
470
480
|
SparseCsrCUDA: add_out_sparse_csr_cuda
|
471
481
|
MkldnnCPU: mkldnn_add_out
|
482
|
+
MPS: add_out_mps
|
472
483
|
|
473
484
|
- func: _add_relu.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
|
474
485
|
variants: function
|
@@ -494,6 +505,7 @@
|
|
494
505
|
variants: function
|
495
506
|
dispatch:
|
496
507
|
CPU: add_relu_
|
508
|
+
autogen: _add_relu.Scalar_out
|
497
509
|
|
498
510
|
# For C++ only, until we have conversion from C++ numbers to Tensor
|
499
511
|
- func: add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
|
@@ -507,6 +519,7 @@
|
|
507
519
|
variants: method
|
508
520
|
dispatch:
|
509
521
|
CompositeExplicitAutograd: add_
|
522
|
+
autogen: add.Scalar_out
|
510
523
|
|
511
524
|
- func: addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
512
525
|
structured_delegate: addmv.out
|
@@ -521,8 +534,9 @@
|
|
521
534
|
dispatch:
|
522
535
|
CPU: addmv_out_cpu
|
523
536
|
CUDA: addmv_out_cuda
|
524
|
-
|
525
|
-
|
537
|
+
MPS: addmv_out_mps
|
538
|
+
SparseCsrCPU: addmv_out_sparse_compressed
|
539
|
+
SparseCsrCUDA: addmv_out_sparse_compressed_cuda
|
526
540
|
|
527
541
|
- func: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
528
542
|
variants: function, method
|
@@ -560,6 +574,7 @@
|
|
560
574
|
- dim -> int dim
|
561
575
|
dispatch:
|
562
576
|
CPU, CUDA: all_out
|
577
|
+
MPS: all_out_mps
|
563
578
|
|
564
579
|
- func: all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
|
565
580
|
device_check: NoCheck # TensorIterator
|
@@ -583,6 +598,7 @@
|
|
583
598
|
- dim -> int dim
|
584
599
|
dispatch:
|
585
600
|
CPU, CUDA: any_out
|
601
|
+
MPS: any_out_mps
|
586
602
|
|
587
603
|
- func: any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
|
588
604
|
device_check: NoCheck # TensorIterator
|
@@ -595,6 +611,12 @@
|
|
595
611
|
|
596
612
|
- func: arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
597
613
|
|
614
|
+
# Note [arange.start_step schema]
|
615
|
+
# We want `arange.start_step` to be grouped up with `arange.start_out`,
|
616
|
+
# But this doesn't happen automatically because the step argument
|
617
|
+
# is defaultable for .start_out but not for .start_step.
|
618
|
+
# We should probably just make "step" a defaultable param on arange.start,
|
619
|
+
# and kill arange.start_step.
|
598
620
|
- func: arange.start_step(Scalar start, Scalar end, Scalar step, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
599
621
|
|
600
622
|
- func: arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -603,6 +625,7 @@
|
|
603
625
|
dispatch:
|
604
626
|
CPU, Meta: arange_out
|
605
627
|
CUDA: arange_cuda_out
|
628
|
+
MPS: arange_mps_out
|
606
629
|
|
607
630
|
# This function is a temporary hack to allow tracing of arange like constructs with dynamic
|
608
631
|
# bounds on arange. Normal arange is not traceable because it does not take any tensor inputs;
|
@@ -620,6 +643,7 @@
|
|
620
643
|
structured: True
|
621
644
|
dispatch:
|
622
645
|
CPU, CUDA: argmax_out
|
646
|
+
MPS: argmax_out_mps
|
623
647
|
|
624
648
|
- func: argmin(Tensor self, int? dim=None, bool keepdim=False) -> Tensor
|
625
649
|
structured_delegate: argmin.out
|
@@ -644,6 +668,7 @@
|
|
644
668
|
structured_inherits: TensorIteratorBase
|
645
669
|
dispatch:
|
646
670
|
CPU, CUDA: acosh_out
|
671
|
+
MPS: acosh_out_mps
|
647
672
|
|
648
673
|
# arccosh, alias for acosh
|
649
674
|
- func: arccosh(Tensor self) -> Tensor
|
@@ -673,6 +698,7 @@
|
|
673
698
|
structured_inherits: TensorIteratorBase
|
674
699
|
dispatch:
|
675
700
|
CPU, CUDA: asinh_out
|
701
|
+
MPS: asinh_out_mps
|
676
702
|
SparseCPU, SparseCUDA: asinh_sparse_out
|
677
703
|
SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_out
|
678
704
|
|
@@ -705,6 +731,7 @@
|
|
705
731
|
structured_inherits: TensorIteratorBase
|
706
732
|
dispatch:
|
707
733
|
CPU, CUDA: atanh_out
|
734
|
+
MPS: atanh_out_mps
|
708
735
|
SparseCPU, SparseCUDA: atanh_sparse_out
|
709
736
|
SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_out
|
710
737
|
|
@@ -721,6 +748,7 @@
|
|
721
748
|
variants: function, method
|
722
749
|
dispatch:
|
723
750
|
ZeroTensor, CPU, CUDA, Meta: as_strided_tensorimpl
|
751
|
+
MPS: as_strided_tensorimpl_mps
|
724
752
|
QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
|
725
753
|
device_check: NoCheck
|
726
754
|
device_guard: False
|
@@ -756,6 +784,7 @@
|
|
756
784
|
structured_inherits: TensorIteratorBase
|
757
785
|
dispatch:
|
758
786
|
CPU, CUDA: asin_out
|
787
|
+
MPS: asin_out_mps
|
759
788
|
SparseCPU, SparseCUDA: asin_sparse_out
|
760
789
|
SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_out
|
761
790
|
|
@@ -790,6 +819,7 @@
|
|
790
819
|
structured_inherits: TensorIteratorBase
|
791
820
|
dispatch:
|
792
821
|
CPU, CUDA: atan_out
|
822
|
+
MPS: atan_out_mps
|
793
823
|
SparseCPU, SparseCUDA: atan_sparse_out
|
794
824
|
SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_out
|
795
825
|
|
@@ -833,6 +863,7 @@
|
|
833
863
|
dispatch:
|
834
864
|
CPU: baddbmm_out_cpu
|
835
865
|
CUDA: baddbmm_out_cuda
|
866
|
+
MPS: baddbmm_out_mps
|
836
867
|
SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
|
837
868
|
|
838
869
|
- func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -861,19 +892,26 @@
|
|
861
892
|
variants: function
|
862
893
|
dispatch:
|
863
894
|
CPU, CUDA: bernoulli_out
|
895
|
+
MPS: bernoulli_out_mps
|
864
896
|
|
865
897
|
- func: bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!)
|
866
898
|
device_check: NoCheck # TensorIterator
|
867
899
|
variants: method
|
868
900
|
dispatch:
|
869
901
|
CPU, CUDA: bernoulli_
|
902
|
+
MPS: bernoulli_mps_
|
903
|
+
autogen: bernoulli.Tensor_functional, bernoulli.Tensor_out
|
870
904
|
|
871
905
|
- func: bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!)
|
872
906
|
device_check: NoCheck # TensorIterator
|
873
907
|
variants: method
|
874
908
|
dispatch:
|
875
909
|
CPU, CUDA: bernoulli_
|
910
|
+
MPS: bernoulli_mps_
|
911
|
+
autogen: bernoulli.float_out
|
876
912
|
|
913
|
+
# Note [bernoulli.p schema]
|
914
|
+
# We should probably just fix the overload ambiguity by appending a _functional to the C++ API name (BC breaking)
|
877
915
|
# This out-of-place version isn't used explicitly, but needed by jit.
|
878
916
|
# There is no default valid on `p` here because it would introduce ambiguity
|
879
917
|
# with `bernoulli(Tensor self, *, Generator? generator=None)` declaration.
|
@@ -890,6 +928,7 @@
|
|
890
928
|
dispatch:
|
891
929
|
CPU: binary_cross_entropy_cpu
|
892
930
|
CUDA: binary_cross_entropy_cuda
|
931
|
+
MPS: binary_cross_entropy_mps
|
893
932
|
|
894
933
|
- func: binary_cross_entropy.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
|
895
934
|
device_check: NoCheck # TensorIterator
|
@@ -898,6 +937,7 @@
|
|
898
937
|
dispatch:
|
899
938
|
CPU: binary_cross_entropy_out_cpu
|
900
939
|
CUDA: binary_cross_entropy_out_cuda
|
940
|
+
MPS: binary_cross_entropy_out_mps
|
901
941
|
|
902
942
|
- func: binary_cross_entropy_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor
|
903
943
|
python_module: nn
|
@@ -905,6 +945,7 @@
|
|
905
945
|
dispatch:
|
906
946
|
CPU: binary_cross_entropy_backward_cpu
|
907
947
|
CUDA: binary_cross_entropy_backward_cuda
|
948
|
+
MPS: binary_cross_entropy_backward_mps
|
908
949
|
|
909
950
|
- func: binary_cross_entropy_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) grad_input) -> Tensor(a!)
|
910
951
|
python_module: nn
|
@@ -912,6 +953,7 @@
|
|
912
953
|
dispatch:
|
913
954
|
CPU: binary_cross_entropy_backward_out_cpu
|
914
955
|
CUDA: binary_cross_entropy_backward_out_cuda
|
956
|
+
MPS: binary_cross_entropy_backward_out_mps
|
915
957
|
|
916
958
|
- func: binary_cross_entropy_with_logits(Tensor self, Tensor target, Tensor? weight=None, Tensor? pos_weight=None, int reduction=Mean) -> Tensor
|
917
959
|
device_check: NoCheck # TensorIterator
|
@@ -1061,6 +1103,7 @@
|
|
1061
1103
|
dispatch:
|
1062
1104
|
CPU: bmm_out_cpu
|
1063
1105
|
CUDA: bmm_out_cuda
|
1106
|
+
MPS: bmm_out_mps
|
1064
1107
|
SparseCPU: bmm_out_sparse_cpu
|
1065
1108
|
SparseCUDA: bmm_out_sparse_cuda
|
1066
1109
|
SparseCsrCUDA: bmm_out_sparse_csr_cuda
|
@@ -1078,12 +1121,20 @@
|
|
1078
1121
|
SparseCPU, SparseCUDA: sparse_broadcast_to
|
1079
1122
|
|
1080
1123
|
- func: cat(Tensor[] tensors, int dim=0) -> Tensor
|
1124
|
+
structured_delegate: cat.out
|
1081
1125
|
dispatch:
|
1082
|
-
|
1126
|
+
SparseCPU, SparseCUDA: cat_sparse
|
1127
|
+
QuantizedCPU: cat_quantized_cpu
|
1083
1128
|
|
1084
1129
|
- func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
|
1130
|
+
structured: True
|
1131
|
+
precomputed:
|
1132
|
+
- dim -> int dim, int valid, bool all_contiguous, bool all_same_dtype, bool all_same_sizes_and_stride, MemoryFormat memory_format
|
1085
1133
|
dispatch:
|
1086
|
-
|
1134
|
+
CPU: cat_out_cpu
|
1135
|
+
CUDA: cat_out_cuda
|
1136
|
+
MPS: cat_out_mps
|
1137
|
+
QuantizedCPU: cat_out_quantized_cpu
|
1087
1138
|
|
1088
1139
|
- func: cat.names(Tensor[] tensors, Dimname dim) -> Tensor
|
1089
1140
|
|
@@ -1125,6 +1176,7 @@
|
|
1125
1176
|
structured_inherits: TensorIteratorBase
|
1126
1177
|
dispatch:
|
1127
1178
|
CPU, CUDA: ceil_out
|
1179
|
+
MPS: ceil_out_mps
|
1128
1180
|
SparseCPU, SparseCUDA: ceil_sparse_out
|
1129
1181
|
SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_out
|
1130
1182
|
|
@@ -1164,8 +1216,7 @@
|
|
1164
1216
|
|
1165
1217
|
- func: clamp.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor
|
1166
1218
|
variants: function, method
|
1167
|
-
|
1168
|
-
CPU, CUDA: clamp
|
1219
|
+
structured_delegate: clamp.Tensor_out
|
1169
1220
|
|
1170
1221
|
- func: clamp_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)
|
1171
1222
|
device_check: NoCheck # TensorIterator
|
@@ -1177,8 +1228,7 @@
|
|
1177
1228
|
|
1178
1229
|
- func: clamp_.Tensor(Tensor(a!) self, Tensor? min=None, Tensor? max=None) -> Tensor(a!)
|
1179
1230
|
variants: function, method
|
1180
|
-
|
1181
|
-
CompositeExplicitAutograd: clamp_
|
1231
|
+
structured_delegate: clamp.Tensor_out
|
1182
1232
|
|
1183
1233
|
- func: clamp.out(Tensor self, Scalar? min=None, Scalar? max=None, *, Tensor(a!) out) -> Tensor(a!)
|
1184
1234
|
device_check: NoCheck # TensorIterator
|
@@ -1187,73 +1237,83 @@
|
|
1187
1237
|
structured_inherits: TensorIteratorBase
|
1188
1238
|
dispatch:
|
1189
1239
|
CPU, CUDA: clamp_out
|
1240
|
+
MPS: clamp_out_mps
|
1190
1241
|
|
1191
1242
|
- func: clamp.Tensor_out(Tensor self, Tensor? min=None, Tensor? max=None, *, Tensor(a!) out) -> Tensor(a!)
|
1192
1243
|
device_check: NoCheck # TensorIterator
|
1244
|
+
structured: True
|
1245
|
+
structured_inherits: TensorIteratorBase
|
1193
1246
|
dispatch:
|
1194
|
-
CPU, CUDA:
|
1247
|
+
CPU, CUDA: clamp_Tensor_out
|
1248
|
+
MPS: clamp_Tensor_out_mps
|
1195
1249
|
|
1196
1250
|
- func: clamp_max(Tensor self, Scalar max) -> Tensor
|
1197
1251
|
device_check: NoCheck # TensorIterator
|
1198
1252
|
variants: function, method
|
1199
|
-
|
1200
|
-
CompositeExplicitAutograd: clamp_max
|
1253
|
+
structured_delegate: clamp_max.out
|
1201
1254
|
|
1202
1255
|
- func: clamp_max.Tensor(Tensor self, Tensor max) -> Tensor
|
1203
1256
|
variants: function, method
|
1204
|
-
|
1205
|
-
CompositeExplicitAutograd: clamp_max
|
1257
|
+
structured_delegate: clamp_max.Tensor_out
|
1206
1258
|
|
1207
1259
|
- func: clamp_max_(Tensor(a!) self, Scalar max) -> Tensor(a!)
|
1208
1260
|
device_check: NoCheck # TensorIterator
|
1209
1261
|
variants: function, method
|
1210
|
-
|
1211
|
-
CompositeExplicitAutograd: clamp_max_
|
1262
|
+
structured_delegate: clamp_max.out
|
1212
1263
|
|
1213
1264
|
- func: clamp_max_.Tensor(Tensor(a!) self, Tensor max) -> Tensor(a!)
|
1214
1265
|
variants: function, method
|
1215
|
-
|
1216
|
-
CompositeExplicitAutograd: clamp_max_
|
1266
|
+
structured_delegate: clamp_max.Tensor_out
|
1217
1267
|
|
1218
1268
|
- func: clamp_max.out(Tensor self, Scalar max, *, Tensor(a!) out) -> Tensor(a!)
|
1219
1269
|
device_check: NoCheck # TensorIterator
|
1270
|
+
structured: True
|
1271
|
+
structured_inherits: TensorIteratorBase
|
1220
1272
|
dispatch:
|
1221
1273
|
CPU, CUDA: clamp_max_out
|
1274
|
+
MPS: clamp_max_out_mps
|
1222
1275
|
|
1223
1276
|
- func: clamp_max.Tensor_out(Tensor self, Tensor max, *, Tensor(a!) out) -> Tensor(a!)
|
1277
|
+
device_check: NoCheck # TensorIterator
|
1278
|
+
structured: True
|
1279
|
+
structured_inherits: TensorIteratorBase
|
1224
1280
|
dispatch:
|
1225
|
-
CPU, CUDA:
|
1281
|
+
CPU, CUDA: clamp_max_Tensor_out
|
1282
|
+
MPS: clamp_max_Tensor_out_mps
|
1226
1283
|
|
1227
1284
|
- func: clamp_min(Tensor self, Scalar min) -> Tensor
|
1228
1285
|
device_check: NoCheck # TensorIterator
|
1229
1286
|
variants: function, method
|
1230
|
-
|
1231
|
-
CompositeExplicitAutograd: clamp_min
|
1287
|
+
structured_delegate: clamp_min.out
|
1232
1288
|
|
1233
1289
|
- func: clamp_min.Tensor(Tensor self, Tensor min) -> Tensor
|
1234
1290
|
variants: function, method
|
1235
|
-
|
1236
|
-
CompositeExplicitAutograd: clamp_min
|
1291
|
+
structured_delegate: clamp_min.Tensor_out
|
1237
1292
|
|
1238
1293
|
- func: clamp_min_(Tensor(a!) self, Scalar min) -> Tensor(a!)
|
1239
1294
|
device_check: NoCheck # TensorIterator
|
1240
1295
|
variants: function, method
|
1241
|
-
|
1242
|
-
CompositeExplicitAutograd: clamp_min_
|
1296
|
+
structured_delegate: clamp_min.out
|
1243
1297
|
|
1244
1298
|
- func: clamp_min_.Tensor(Tensor(a!) self, Tensor min) -> Tensor(a!)
|
1245
1299
|
variants: function, method
|
1246
|
-
|
1247
|
-
CompositeExplicitAutograd: clamp_min_
|
1300
|
+
structured_delegate: clamp_min.Tensor_out
|
1248
1301
|
|
1249
1302
|
- func: clamp_min.out(Tensor self, Scalar min, *, Tensor(a!) out) -> Tensor(a!)
|
1250
1303
|
device_check: NoCheck # TensorIterator
|
1304
|
+
structured: True
|
1305
|
+
structured_inherits: TensorIteratorBase
|
1251
1306
|
dispatch:
|
1252
1307
|
CPU, CUDA: clamp_min_out
|
1308
|
+
MPS: clamp_min_out_mps
|
1253
1309
|
|
1254
1310
|
- func: clamp_min.Tensor_out(Tensor self, Tensor min, *, Tensor(a!) out) -> Tensor(a!)
|
1311
|
+
device_check: NoCheck # TensorIterator
|
1312
|
+
structured: True
|
1313
|
+
structured_inherits: TensorIteratorBase
|
1255
1314
|
dispatch:
|
1256
|
-
CPU, CUDA:
|
1315
|
+
CPU, CUDA: clamp_min_Tensor_out
|
1316
|
+
MPS: clamp_min_Tensor_out_mps
|
1257
1317
|
|
1258
1318
|
# clip is an alias for clamp
|
1259
1319
|
- func: clip(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor
|
@@ -1360,23 +1420,29 @@
|
|
1360
1420
|
|
1361
1421
|
- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor
|
1362
1422
|
|
1423
|
+
- func: copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor
|
1424
|
+
variants: function
|
1425
|
+
|
1363
1426
|
- func: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
|
1364
1427
|
variants: method
|
1365
1428
|
device_check: NoCheck
|
1366
1429
|
device_guard: False
|
1367
1430
|
dispatch:
|
1368
1431
|
MkldnnCPU: copy_mkldnn_
|
1369
|
-
SparseCPU, SparseCUDA
|
1432
|
+
SparseCPU, SparseCUDA: copy_sparse_wrapper_
|
1370
1433
|
CompositeExplicitAutograd: copy_
|
1371
|
-
SparseCsrCPU, SparseCsrCUDA:
|
1434
|
+
SparseCsrCPU, SparseCsrCUDA: copy_sparse_compressed_
|
1435
|
+
autogen: copy.out
|
1372
1436
|
|
1373
1437
|
- func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
|
1374
|
-
dispatch:
|
1438
|
+
dispatch:
|
1439
|
+
MPS: _copy_from_mps
|
1375
1440
|
|
1376
1441
|
# We need this to be able to properly copy from a CPU to an XLA tensor with different sizes.
|
1377
1442
|
# See https://github.com/pytorch/xla/issues/2881
|
1378
1443
|
- func: _copy_from_and_resize(Tensor self, Tensor dst) -> Tensor
|
1379
|
-
dispatch:
|
1444
|
+
dispatch:
|
1445
|
+
MPS: _copy_from_and_resize_mps
|
1380
1446
|
|
1381
1447
|
- func: cos(Tensor self) -> Tensor
|
1382
1448
|
device_check: NoCheck # TensorIterator
|
@@ -1394,6 +1460,7 @@
|
|
1394
1460
|
structured_inherits: TensorIteratorBase
|
1395
1461
|
dispatch:
|
1396
1462
|
CPU, CUDA: cos_out
|
1463
|
+
MPS: cos_out_mps
|
1397
1464
|
|
1398
1465
|
- func: cosh(Tensor self) -> Tensor
|
1399
1466
|
device_check: NoCheck # TensorIterator
|
@@ -1411,6 +1478,7 @@
|
|
1411
1478
|
structured_inherits: TensorIteratorBase
|
1412
1479
|
dispatch:
|
1413
1480
|
CPU, CUDA: cosh_out
|
1481
|
+
MPS: cosh_out_mps
|
1414
1482
|
|
1415
1483
|
- func: cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor
|
1416
1484
|
|
@@ -1419,6 +1487,7 @@
|
|
1419
1487
|
dispatch:
|
1420
1488
|
CPU: count_nonzero_cpu
|
1421
1489
|
CUDA: count_nonzero_cuda
|
1490
|
+
MPS: count_nonzero_mps
|
1422
1491
|
|
1423
1492
|
- func: count_nonzero(Tensor self, int? dim=None) -> Tensor
|
1424
1493
|
variants: function, method
|
@@ -1457,6 +1526,14 @@
|
|
1457
1526
|
dispatch:
|
1458
1527
|
CUDA: cudnn_convolution_transpose
|
1459
1528
|
|
1529
|
+
- func: _mps_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups) -> Tensor
|
1530
|
+
dispatch:
|
1531
|
+
MPS: _mps_convolution_transpose
|
1532
|
+
|
1533
|
+
- func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool[2] output_mask) -> (Tensor, Tensor)
|
1534
|
+
dispatch:
|
1535
|
+
MPS: mps_convolution_transpose_backward
|
1536
|
+
|
1460
1537
|
- func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
|
1461
1538
|
dispatch:
|
1462
1539
|
CUDA: cudnn_convolution_relu
|
@@ -1679,6 +1756,7 @@
|
|
1679
1756
|
structured_inherits: TensorIteratorBase
|
1680
1757
|
dispatch:
|
1681
1758
|
CPU, CUDA: div_out
|
1759
|
+
MPS: div_out_mps
|
1682
1760
|
SparseCPU, SparseCUDA: div_out_sparse_zerodim
|
1683
1761
|
|
1684
1762
|
- func: div.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor
|
@@ -1701,6 +1779,7 @@
|
|
1701
1779
|
structured_inherits: TensorIteratorBase
|
1702
1780
|
dispatch:
|
1703
1781
|
CPU, CUDA: div_out_mode
|
1782
|
+
MPS: div_out_mode_mps
|
1704
1783
|
SparseCPU, SparseCUDA: div_out_sparse_zerodim
|
1705
1784
|
|
1706
1785
|
# For C++ only, until we have conversion from C++ numbers to Tensor
|
@@ -1715,6 +1794,7 @@
|
|
1715
1794
|
variants: method
|
1716
1795
|
dispatch:
|
1717
1796
|
CompositeExplicitAutograd: div_
|
1797
|
+
autogen: div.Scalar_out
|
1718
1798
|
|
1719
1799
|
- func: div.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor
|
1720
1800
|
variants: function, method
|
@@ -1725,6 +1805,7 @@
|
|
1725
1805
|
variants: method
|
1726
1806
|
dispatch:
|
1727
1807
|
CompositeExplicitAutograd: div_
|
1808
|
+
autogen: div.Scalar_mode_out
|
1728
1809
|
|
1729
1810
|
# divide, alias for div
|
1730
1811
|
- func: divide.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -1780,6 +1861,7 @@
|
|
1780
1861
|
dispatch:
|
1781
1862
|
CPU: dot
|
1782
1863
|
CUDA: dot_cuda
|
1864
|
+
MPS: dot_mps
|
1783
1865
|
|
1784
1866
|
- func: dot.out(Tensor self, Tensor tensor, *, Tensor(a!) out) -> Tensor(a!)
|
1785
1867
|
dispatch:
|
@@ -1800,6 +1882,7 @@
|
|
1800
1882
|
- func: embedding(Tensor weight, Tensor indices, int padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor
|
1801
1883
|
dispatch:
|
1802
1884
|
CompositeExplicitAutograd: embedding
|
1885
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_embedding
|
1803
1886
|
|
1804
1887
|
- func: embedding_backward(Tensor grad, Tensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq, bool sparse) -> Tensor
|
1805
1888
|
|
@@ -1807,11 +1890,13 @@
|
|
1807
1890
|
dispatch:
|
1808
1891
|
CPU: embedding_dense_backward_cpu
|
1809
1892
|
CUDA: embedding_dense_backward_cuda
|
1893
|
+
MPS: embedding_dense_backward_mps
|
1810
1894
|
|
1811
1895
|
- func: embedding_renorm_(Tensor(a!) self, Tensor indices, float max_norm, float norm_type) -> Tensor(a!)
|
1812
1896
|
dispatch:
|
1813
1897
|
CPU: embedding_renorm_cpu_
|
1814
1898
|
CUDA: embedding_renorm_cuda_
|
1899
|
+
autogen: embedding_renorm.functional, embedding_renorm.out
|
1815
1900
|
|
1816
1901
|
- func: embedding_sparse_backward(Tensor grad, Tensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq) -> Tensor
|
1817
1902
|
|
@@ -1872,10 +1957,12 @@
|
|
1872
1957
|
dispatch:
|
1873
1958
|
CPU: empty_cpu
|
1874
1959
|
CUDA: empty_cuda
|
1960
|
+
MPS: empty_mps
|
1875
1961
|
Meta: empty_meta
|
1876
1962
|
MkldnnCPU: empty_mkldnn
|
1877
1963
|
SparseCPU, SparseCUDA: empty_sparse
|
1878
|
-
SparseCsrCPU, SparseCsrCUDA:
|
1964
|
+
SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
|
1965
|
+
QuantizedCPU, QuantizedCUDA: empty_unknown_quantized
|
1879
1966
|
|
1880
1967
|
# We do not make new_empty a composite that calls into new_empty_strided, as the strided version
|
1881
1968
|
# is significantly more difficult to implement by different backends
|
@@ -1920,8 +2007,20 @@
|
|
1920
2007
|
dispatch:
|
1921
2008
|
CPU, Meta: resize_
|
1922
2009
|
CUDA: resize_cuda_
|
2010
|
+
MPS: resize_mps_
|
1923
2011
|
QuantizedCPU: quantized_resize_cpu_
|
1924
2012
|
SparseCsrCPU, SparseCsrCUDA: resize_sparse_csr_
|
2013
|
+
autogen: resize.functional, resize.out
|
2014
|
+
|
2015
|
+
# This is a utility function to enable users to resize out tensor while registering kernels for out variants.
|
2016
|
+
# Eventually, we can consider exposing `resize_output` as a public API to ship it with python op registration
|
2017
|
+
# to make it easy to register out variants for ops.
|
2018
|
+
- func: _resize_output_(Tensor(a!) self, int[] size, Device device) -> Tensor(a!)
|
2019
|
+
use_const_ref_for_mutable_tensors: True
|
2020
|
+
variants: function
|
2021
|
+
dispatch:
|
2022
|
+
Meta: _resize_output_
|
2023
|
+
autogen: _resize_output.functional, _resize_output.out
|
1925
2024
|
|
1926
2025
|
- func: empty_quantized(int[] size, Tensor qtensor, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
1927
2026
|
category_override: factory
|
@@ -1938,6 +2037,7 @@
|
|
1938
2037
|
device_guard: False
|
1939
2038
|
dispatch:
|
1940
2039
|
CompositeExplicitAutograd: empty_like
|
2040
|
+
QuantizedCPU, QuantizedCUDA: empty_like_quantized
|
1941
2041
|
SparseCPU, SparseCUDA: empty_like_sparse_coo
|
1942
2042
|
SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
|
1943
2043
|
|
@@ -1945,7 +2045,9 @@
|
|
1945
2045
|
dispatch:
|
1946
2046
|
CPU: empty_strided_cpu
|
1947
2047
|
CUDA: empty_strided_cuda
|
2048
|
+
MPS: empty_strided_mps
|
1948
2049
|
Meta: empty_strided_meta
|
2050
|
+
QuantizedCPU, QuantizedCUDA: empty_strided_unknown_quantized
|
1949
2051
|
|
1950
2052
|
- func: erf(Tensor self) -> Tensor
|
1951
2053
|
device_check: NoCheck # TensorIterator
|
@@ -1969,6 +2071,7 @@
|
|
1969
2071
|
structured_inherits: TensorIteratorBase
|
1970
2072
|
dispatch:
|
1971
2073
|
CPU, CUDA: erf_out
|
2074
|
+
MPS: erf_out_mps
|
1972
2075
|
SparseCPU, SparseCUDA: erf_sparse_out
|
1973
2076
|
SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_out
|
1974
2077
|
|
@@ -2005,6 +2108,7 @@
|
|
2005
2108
|
structured_inherits: TensorIteratorBase
|
2006
2109
|
dispatch:
|
2007
2110
|
CPU, CUDA: exp_out
|
2111
|
+
MPS: exp_out_mps
|
2008
2112
|
|
2009
2113
|
- func: exp2(Tensor self) -> Tensor
|
2010
2114
|
structured_delegate: exp2.out
|
@@ -2019,6 +2123,7 @@
|
|
2019
2123
|
structured_inherits: TensorIteratorBase
|
2020
2124
|
dispatch:
|
2021
2125
|
CPU, CUDA: exp2_out
|
2126
|
+
MPS: exp2_out_mps
|
2022
2127
|
|
2023
2128
|
- func: expm1(Tensor self) -> Tensor
|
2024
2129
|
device_check: NoCheck # TensorIterator
|
@@ -2045,6 +2150,13 @@
|
|
2045
2150
|
SparseCPU, SparseCUDA: expm1_sparse_out
|
2046
2151
|
SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_out
|
2047
2152
|
|
2153
|
+
- func: expand.SymInt(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)
|
2154
|
+
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
|
2155
|
+
device_check: NoCheck
|
2156
|
+
device_guard: False
|
2157
|
+
dispatch:
|
2158
|
+
CompositeExplicitAutograd: expand_symint
|
2159
|
+
|
2048
2160
|
- func: expand(Tensor(a) self, int[] size, *, bool implicit=False) -> Tensor(a)
|
2049
2161
|
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
|
2050
2162
|
device_check: NoCheck
|
@@ -2065,11 +2177,13 @@
|
|
2065
2177
|
dispatch:
|
2066
2178
|
CPU: eye_out_cpu
|
2067
2179
|
CUDA: eye_out_cuda
|
2180
|
+
MPS: eye_out_mps
|
2068
2181
|
|
2069
2182
|
- func: eye.m_out(int n, int m, *, Tensor(a!) out) -> Tensor(a!)
|
2070
2183
|
dispatch:
|
2071
2184
|
CPU: eye_out_cpu
|
2072
2185
|
CUDA: eye_out_cuda
|
2186
|
+
MPS: eye_out_mps
|
2073
2187
|
|
2074
2188
|
- func: flatten.using_ints(Tensor(a) self, int start_dim=0, int end_dim=-1) -> Tensor(a)
|
2075
2189
|
variants: function, method
|
@@ -2089,21 +2203,36 @@
|
|
2089
2203
|
- func: unflatten.Dimname(Tensor(a) self, Dimname dim, int[] sizes, Dimname[] names) -> Tensor(a)
|
2090
2204
|
variants: method
|
2091
2205
|
|
2206
|
+
- func: fill.Scalar(Tensor self, Scalar value) -> Tensor
|
2207
|
+
variants: function
|
2208
|
+
dispatch:
|
2209
|
+
CompositeExplicitAutograd: fill
|
2210
|
+
|
2211
|
+
- func: fill.Tensor(Tensor self, Tensor value) -> Tensor
|
2212
|
+
variants: function
|
2213
|
+
dispatch:
|
2214
|
+
CompositeExplicitAutograd: fill
|
2215
|
+
|
2092
2216
|
- func: fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!)
|
2093
2217
|
device_check: NoCheck # TensorIterator
|
2094
2218
|
variants: function, method
|
2095
2219
|
dispatch:
|
2096
2220
|
CPU, CUDA: fill_
|
2221
|
+
MPS: fill_scalar_mps
|
2097
2222
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
2098
2223
|
Meta: fill_meta_
|
2224
|
+
SparseCsrCPU, SparseCsrCUDA: fill_sparse_csr_
|
2225
|
+
autogen: fill.Scalar_out
|
2099
2226
|
|
2100
2227
|
- func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
|
2101
2228
|
device_check: NoCheck # TensorIterator
|
2102
2229
|
variants: function, method
|
2103
2230
|
dispatch:
|
2104
2231
|
CPU, CUDA: fill_
|
2232
|
+
MPS: fill_tensor_mps_
|
2105
2233
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
2106
2234
|
Meta: fill_meta_
|
2235
|
+
autogen: fill.Tensor_out
|
2107
2236
|
|
2108
2237
|
- func: floor(Tensor self) -> Tensor
|
2109
2238
|
device_check: NoCheck # TensorIterator
|
@@ -2129,6 +2258,7 @@
|
|
2129
2258
|
structured_inherits: TensorIteratorBase
|
2130
2259
|
dispatch:
|
2131
2260
|
CPU, CUDA: floor_out
|
2261
|
+
MPS: floor_out_mps
|
2132
2262
|
SparseCPU, SparseCUDA: floor_sparse_out
|
2133
2263
|
SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_out
|
2134
2264
|
|
@@ -2220,10 +2350,12 @@
|
|
2220
2350
|
variants: function, method
|
2221
2351
|
|
2222
2352
|
# NOTE [ grid_sampler Native Functions ]
|
2223
|
-
# `grid_sampler`
|
2224
|
-
# `cudnn_grid_sampler`, `grid_sampler_2d`, or `grid_sampler_3d`, each of
|
2225
|
-
# has the corresponding backward defined as native functions as well.
|
2226
|
-
#
|
2353
|
+
# `grid_sampler` is _supposed to_ do all the shape checking and then dispatch to
|
2354
|
+
# one of `cudnn_grid_sampler`, `grid_sampler_2d`, or `grid_sampler_3d`, each of
|
2355
|
+
# which has the corresponding backward defined as native functions as well.
|
2356
|
+
# However, we do shape checking everywhere for now since each of the mentioned
|
2357
|
+
# functions can be called directly, which will lead to crashes otherwise.
|
2358
|
+
# See https://github.com/pytorch/pytorch/issues/73187 for more information.
|
2227
2359
|
#
|
2228
2360
|
# There is also _grid_sampler_2d_backward_cpu_fallback which is an
|
2229
2361
|
# implementation detail of grid_sampler_2d and is only exposed here for testing
|
@@ -2261,7 +2393,10 @@
|
|
2261
2393
|
CPU: grid_sampler_3d_cpu
|
2262
2394
|
CUDA: grid_sampler_3d_cuda
|
2263
2395
|
|
2264
|
-
|
2396
|
+
# `grid_sampler_3d_backward` takes in `output_mask` to optimize performance for
|
2397
|
+
# the case where `input` doesn't require gradient. Gradient for `grid` is always
|
2398
|
+
# computed (only `output_mask[0]` is checked by the implementations).
|
2399
|
+
- func: grid_sampler_3d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask) -> (Tensor, Tensor)
|
2265
2400
|
dispatch:
|
2266
2401
|
CPU: grid_sampler_3d_backward_cpu
|
2267
2402
|
CUDA: grid_sampler_3d_backward_cuda
|
@@ -2355,15 +2490,21 @@
|
|
2355
2490
|
# - Tensor Tensor::index(ArrayRef<TensorIndex> indices)
|
2356
2491
|
# - Tensor Tensor::index(std::initializer_list<TensorIndex> indices)
|
2357
2492
|
|
2493
|
+
- func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
|
2494
|
+
structured: True
|
2495
|
+
variants: function
|
2496
|
+
precomputed:
|
2497
|
+
- dim -> int dim
|
2498
|
+
dispatch:
|
2499
|
+
CPU, CUDA: index_copy_out
|
2500
|
+
|
2358
2501
|
- func: index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
|
2359
2502
|
variants: method
|
2360
|
-
|
2361
|
-
CompositeExplicitAutograd: index_copy_
|
2503
|
+
structured_delegate: index_copy.out
|
2362
2504
|
|
2363
2505
|
- func: index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor
|
2364
2506
|
variants: function, method
|
2365
|
-
|
2366
|
-
CompositeExplicitAutograd: index_copy
|
2507
|
+
structured_delegate: index_copy.out
|
2367
2508
|
|
2368
2509
|
- func: index_copy_.dimname(Tensor(a!) self, Dimname dim, Tensor index, Tensor source) -> Tensor(a!)
|
2369
2510
|
variants: method
|
@@ -2376,6 +2517,7 @@
|
|
2376
2517
|
variants: function, method
|
2377
2518
|
dispatch:
|
2378
2519
|
CompositeExplicitAutograd: index_put_
|
2520
|
+
autogen: index_put.out
|
2379
2521
|
# NB: The following functions are declared in aten/src/ATen/templates/TensorBody.h and defined in aten/src/ATen/TensorIndexing.cpp:
|
2380
2522
|
# - Tensor & Tensor::index_put_(ArrayRef<TensorIndex> indices, Tensor const & rhs)
|
2381
2523
|
# - Tensor & Tensor::index_put_(ArrayRef<TensorIndex> indices, Scalar v)
|
@@ -2393,6 +2535,7 @@
|
|
2393
2535
|
variants: function
|
2394
2536
|
dispatch:
|
2395
2537
|
CPU, CUDA: _index_put_impl_
|
2538
|
+
autogen: _index_put_impl.functional, _index_put_impl.out
|
2396
2539
|
|
2397
2540
|
- func: instance_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool use_input_stats, float momentum, float eps, bool cudnn_enabled) -> Tensor
|
2398
2541
|
variants: function
|
@@ -2444,7 +2587,7 @@
|
|
2444
2587
|
device_check: NoCheck
|
2445
2588
|
device_guard: False
|
2446
2589
|
dispatch:
|
2447
|
-
CPU, CUDA: isnan
|
2590
|
+
CPU, CUDA, MPS: isnan
|
2448
2591
|
SparseCPU, SparseCUDA: isnan_sparse
|
2449
2592
|
SparseCsrCPU, SparseCsrCUDA: isnan_sparse_csr
|
2450
2593
|
|
@@ -2540,17 +2683,14 @@
|
|
2540
2683
|
dispatch:
|
2541
2684
|
CPU: layer_norm_cpu
|
2542
2685
|
CUDA: layer_norm_cuda
|
2686
|
+
MPS: layer_norm_mps
|
2543
2687
|
CompositeImplicitAutograd: math_native_layer_norm
|
2544
2688
|
|
2545
|
-
- func: _native_multi_head_self_attention(Tensor query, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor
|
2546
|
-
dispatch:
|
2547
|
-
CPU: multi_head_self_attention_cpu
|
2548
|
-
CUDA: multi_head_self_attention_cuda
|
2549
|
-
|
2550
2689
|
- func: native_layer_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
2551
2690
|
dispatch:
|
2552
2691
|
CPU: layer_norm_backward_cpu
|
2553
2692
|
CUDA: layer_norm_backward_cuda
|
2693
|
+
MPS: layer_norm_backward_mps
|
2554
2694
|
|
2555
2695
|
- func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
|
2556
2696
|
variants: function, method
|
@@ -2575,6 +2715,14 @@
|
|
2575
2715
|
- func: linear.out(Tensor input, Tensor weight, Tensor? bias=None, *, Tensor(a!) out) -> Tensor(a!)
|
2576
2716
|
python_module: nn
|
2577
2717
|
|
2718
|
+
# TODO: Add this function to MPS dispatch key so that we avoid declaring it in
|
2719
|
+
# native_functions.yaml
|
2720
|
+
# https://github.com/pytorch/pytorch/issues/77394
|
2721
|
+
- func: _mps_linear(Tensor self, Tensor weight, Tensor? bias=None) -> Tensor
|
2722
|
+
python_module: nn
|
2723
|
+
dispatch:
|
2724
|
+
MPS: _mps_linear
|
2725
|
+
|
2578
2726
|
- func: mkldnn_linear(Tensor self, Tensor weight, Tensor? bias=None) -> Tensor
|
2579
2727
|
python_module: nn
|
2580
2728
|
dispatch:
|
@@ -2592,6 +2740,18 @@
|
|
2592
2740
|
dispatch:
|
2593
2741
|
MkldnnCPU: mkldnn_linear_backward
|
2594
2742
|
|
2743
|
+
- func: _mps_linear_backward_input(int[] input_size, Tensor grad_output, Tensor weight) -> Tensor
|
2744
|
+
dispatch:
|
2745
|
+
MPS: _mps_linear_backward_input
|
2746
|
+
|
2747
|
+
- func: _mps_linear_backward_weights(Tensor grad_output, Tensor input, Tensor weight, bool bias_defined) -> (Tensor, Tensor)
|
2748
|
+
dispatch:
|
2749
|
+
MPS: _mps_linear_backward_weights
|
2750
|
+
|
2751
|
+
- func: mps_linear_backward(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
2752
|
+
dispatch:
|
2753
|
+
MPS: mps_linear_backward
|
2754
|
+
|
2595
2755
|
- func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
2596
2756
|
|
2597
2757
|
- func: fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
@@ -2622,6 +2782,7 @@
|
|
2622
2782
|
dispatch:
|
2623
2783
|
CPU, Meta: linspace_out
|
2624
2784
|
CUDA: linspace_cuda_out
|
2785
|
+
MPS: linspace_out_mps
|
2625
2786
|
|
2626
2787
|
- func: log(Tensor self) -> Tensor
|
2627
2788
|
device_check: NoCheck # TensorIterator
|
@@ -2639,6 +2800,7 @@
|
|
2639
2800
|
structured_inherits: TensorIteratorBase
|
2640
2801
|
dispatch:
|
2641
2802
|
CPU, CUDA: log_out
|
2803
|
+
MPS: log_out_mps
|
2642
2804
|
|
2643
2805
|
- func: log10(Tensor self) -> Tensor
|
2644
2806
|
device_check: NoCheck # TensorIterator
|
@@ -2658,6 +2820,7 @@
|
|
2658
2820
|
structured_inherits: TensorIteratorBase
|
2659
2821
|
dispatch:
|
2660
2822
|
CPU, CUDA: log10_out
|
2823
|
+
MPS: log10_out_mps
|
2661
2824
|
|
2662
2825
|
- func: log1p(Tensor self) -> Tensor
|
2663
2826
|
device_check: NoCheck # TensorIterator
|
@@ -2681,6 +2844,7 @@
|
|
2681
2844
|
structured_inherits: TensorIteratorBase
|
2682
2845
|
dispatch:
|
2683
2846
|
CPU, CUDA: log1p_out
|
2847
|
+
MPS: log1p_out_mps
|
2684
2848
|
SparseCPU, SparseCUDA: log1p_sparse_out
|
2685
2849
|
SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_out
|
2686
2850
|
|
@@ -2700,12 +2864,14 @@
|
|
2700
2864
|
structured_inherits: TensorIteratorBase
|
2701
2865
|
dispatch:
|
2702
2866
|
CPU, CUDA: log2_out
|
2867
|
+
MPS: log2_out_mps
|
2703
2868
|
|
2704
2869
|
- func: logaddexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
2705
2870
|
structured: True
|
2706
2871
|
structured_inherits: TensorIteratorBase
|
2707
2872
|
dispatch:
|
2708
2873
|
CPU, CUDA: logaddexp_out
|
2874
|
+
MPS: logaddexp_out_mps
|
2709
2875
|
|
2710
2876
|
- func: logaddexp(Tensor self, Tensor other) -> Tensor
|
2711
2877
|
variants: method, function
|
@@ -2718,6 +2884,7 @@
|
|
2718
2884
|
structured_inherits: TensorIteratorBase
|
2719
2885
|
dispatch:
|
2720
2886
|
CPU, CUDA: logaddexp2_out
|
2887
|
+
MPS: logaddexp2_out_mps
|
2721
2888
|
|
2722
2889
|
- func: logaddexp2(Tensor self, Tensor other) -> Tensor
|
2723
2890
|
variants: method, function
|
@@ -2791,6 +2958,11 @@
|
|
2791
2958
|
- func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
|
2792
2959
|
variants: function, method
|
2793
2960
|
|
2961
|
+
- func: log_softmax.int_out(Tensor self, int dim, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)
|
2962
|
+
variants: function
|
2963
|
+
dispatch:
|
2964
|
+
CompositeExplicitAutograd: log_softmax_out
|
2965
|
+
|
2794
2966
|
- func: log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
|
2795
2967
|
variants: function, method
|
2796
2968
|
|
@@ -2802,6 +2974,7 @@
|
|
2802
2974
|
dispatch:
|
2803
2975
|
CPU: log_softmax_cpu_out
|
2804
2976
|
CUDA: log_softmax_cuda_out
|
2977
|
+
MPS: log_softmax_mps_out
|
2805
2978
|
|
2806
2979
|
- func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
|
2807
2980
|
structured_delegate: _log_softmax_backward_data.out
|
@@ -2811,6 +2984,7 @@
|
|
2811
2984
|
dispatch:
|
2812
2985
|
CPU: log_softmax_backward_cpu_out
|
2813
2986
|
CUDA: log_softmax_backward_cuda_out
|
2987
|
+
MPS: log_softmax_backward_mps_out
|
2814
2988
|
|
2815
2989
|
- func: _logcumsumexp(Tensor self, int dim) -> Tensor
|
2816
2990
|
dispatch:
|
@@ -2922,6 +3096,7 @@
|
|
2922
3096
|
- dim -> int dim
|
2923
3097
|
dispatch:
|
2924
3098
|
CPU, CUDA: max_out
|
3099
|
+
MPS: max_out_mps
|
2925
3100
|
|
2926
3101
|
- func: max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
2927
3102
|
device_check: NoCheck # TensorIterator
|
@@ -2937,10 +3112,10 @@
|
|
2937
3112
|
|
2938
3113
|
- func: amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
|
2939
3114
|
variants: function, method
|
2940
|
-
|
2941
|
-
CompositeExplicitAutograd: amax
|
3115
|
+
structured_delegate: amax.out
|
2942
3116
|
|
2943
3117
|
- func: amax.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
3118
|
+
structured: True
|
2944
3119
|
dispatch:
|
2945
3120
|
CPU, CUDA: amax_out
|
2946
3121
|
|
@@ -2951,6 +3126,17 @@
|
|
2951
3126
|
|
2952
3127
|
- func: max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
|
2953
3128
|
|
3129
|
+
# TODO: Add this function to MPS dispatch key so that we avoid declaring it in
|
3130
|
+
# native_functions.yaml
|
3131
|
+
# https://github.com/pytorch/pytorch/issues/77394
|
3132
|
+
- func: _mps_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
|
3133
|
+
dispatch:
|
3134
|
+
MPS: _mps_max_pool2d
|
3135
|
+
|
3136
|
+
- func: mps_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
|
3137
|
+
dispatch:
|
3138
|
+
MPS: mps_max_pool2d_backward
|
3139
|
+
|
2954
3140
|
- func: mkldnn_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
|
2955
3141
|
dispatch:
|
2956
3142
|
MkldnnCPU: mkldnn_max_pool2d
|
@@ -2974,6 +3160,7 @@
|
|
2974
3160
|
- func: quantized_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
|
2975
3161
|
dispatch:
|
2976
3162
|
QuantizedCPU: quantized_max_pool2d
|
3163
|
+
QuantizedCUDA: quantized_max_pool2d_cudnn
|
2977
3164
|
|
2978
3165
|
- func: max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
|
2979
3166
|
|
@@ -2997,6 +3184,7 @@
|
|
2997
3184
|
device_check: NoCheck # TensorIterator
|
2998
3185
|
dispatch:
|
2999
3186
|
CPU, CUDA: mean_out
|
3187
|
+
MPS: mean_out_mps
|
3000
3188
|
QuantizedCPU: mean_out_quantized_cpu
|
3001
3189
|
|
3002
3190
|
- func: mean.names_dim(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
@@ -3069,6 +3257,7 @@
|
|
3069
3257
|
- dim -> int dim
|
3070
3258
|
dispatch:
|
3071
3259
|
CPU, CUDA: min_out
|
3260
|
+
MPS: min_out_mps
|
3072
3261
|
|
3073
3262
|
- func: min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
3074
3263
|
device_check: NoCheck # TensorIterator
|
@@ -3079,13 +3268,24 @@
|
|
3079
3268
|
|
3080
3269
|
- func: amin(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
|
3081
3270
|
variants: function, method
|
3082
|
-
|
3083
|
-
CompositeExplicitAutograd: amin
|
3271
|
+
structured_delegate: amin.out
|
3084
3272
|
|
3085
3273
|
- func: amin.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
3274
|
+
structured: True
|
3086
3275
|
dispatch:
|
3087
3276
|
CPU, CUDA: amin_out
|
3088
3277
|
|
3278
|
+
# TODO: Add this function to MPS dispatch key so that we avoid declaring it in
|
3279
|
+
# native_functions.yaml
|
3280
|
+
# https://github.com/pytorch/pytorch/issues/77394
|
3281
|
+
- func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
|
3282
|
+
dispatch:
|
3283
|
+
MPS: _mps_convolution
|
3284
|
+
|
3285
|
+
- func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
3286
|
+
dispatch:
|
3287
|
+
MPS: mps_convolution_backward
|
3288
|
+
|
3089
3289
|
- func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
|
3090
3290
|
dispatch:
|
3091
3291
|
CompositeExplicitAutograd: mkldnn_convolution
|
@@ -3130,10 +3330,12 @@
|
|
3130
3330
|
dispatch:
|
3131
3331
|
CPU: mm_out_cpu
|
3132
3332
|
CUDA: mm_out_cuda
|
3333
|
+
MPS: mm_out_mps
|
3133
3334
|
SparseCPU, SparseCUDA: _sparse_mm_out
|
3134
3335
|
SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm_out
|
3135
3336
|
|
3136
3337
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
3338
|
+
python_module: sparse
|
3137
3339
|
|
3138
3340
|
- func: _sparse_sparse_matmul(Tensor self, Tensor other) -> Tensor
|
3139
3341
|
dispatch:
|
@@ -3165,8 +3367,10 @@
|
|
3165
3367
|
variants: function, method
|
3166
3368
|
dispatch:
|
3167
3369
|
SparseCPU, SparseCUDA: mul_sparse
|
3370
|
+
SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr
|
3168
3371
|
MkldnnCPU: mkldnn_mul
|
3169
3372
|
ZeroTensor: mul_zerotensor
|
3373
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
|
3170
3374
|
|
3171
3375
|
- func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
3172
3376
|
device_check: NoCheck # TensorIterator
|
@@ -3174,7 +3378,9 @@
|
|
3174
3378
|
variants: method
|
3175
3379
|
dispatch:
|
3176
3380
|
SparseCPU, SparseCUDA: mul_sparse_
|
3381
|
+
SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr_
|
3177
3382
|
MkldnnCPU: mkldnn_mul_
|
3383
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Tensor
|
3178
3384
|
|
3179
3385
|
- func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
3180
3386
|
device_check: NoCheck # TensorIterator
|
@@ -3182,8 +3388,10 @@
|
|
3182
3388
|
structured_inherits: TensorIteratorBase
|
3183
3389
|
dispatch:
|
3184
3390
|
CPU, CUDA: mul_out
|
3391
|
+
MPS: mul_out_mps
|
3185
3392
|
SparseCPU: mul_out_sparse_cpu
|
3186
3393
|
SparseCUDA: mul_out_sparse_cuda
|
3394
|
+
SparseCsrCPU, SparseCsrCUDA: mul_out_sparse_csr
|
3187
3395
|
MkldnnCPU: mkldnn_mul_out
|
3188
3396
|
|
3189
3397
|
# For C++ only, until we have conversion from C++ numbers to Tensor
|
@@ -3192,12 +3400,15 @@
|
|
3192
3400
|
variants: function, method
|
3193
3401
|
dispatch:
|
3194
3402
|
CompositeExplicitAutograd: mul
|
3403
|
+
SparseCsrCPU, SparseCsrCUDA: mul_scalar_sparse_csr
|
3195
3404
|
|
3196
3405
|
- func: mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
3197
3406
|
device_check: NoCheck # TensorIterator
|
3198
3407
|
variants: method
|
3199
3408
|
dispatch:
|
3200
3409
|
CompositeExplicitAutograd: mul_
|
3410
|
+
SparseCsrCPU, SparseCsrCUDA: mul__scalar_sparse_csr
|
3411
|
+
autogen: mul.Scalar_out
|
3201
3412
|
|
3202
3413
|
# multiply, alias for mul
|
3203
3414
|
- func: multiply.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -3246,6 +3457,12 @@
|
|
3246
3457
|
CPU: narrow_copy_dense_cpu
|
3247
3458
|
SparseCPU, SparseCUDA: narrow_copy_sparse
|
3248
3459
|
CompositeExplicitAutograd: narrow_copy_dense
|
3460
|
+
tags: view_copy
|
3461
|
+
|
3462
|
+
- func: narrow_copy.SymInt(Tensor self, int dim, int start, SymInt length) -> Tensor
|
3463
|
+
variants: function, method
|
3464
|
+
dispatch:
|
3465
|
+
CompositeExplicitAutograd: narrow_copy_symint
|
3249
3466
|
|
3250
3467
|
- func: narrow_copy.out(Tensor self, int dim, int start, int length, *, Tensor(a!) out) -> Tensor(a!)
|
3251
3468
|
dispatch:
|
@@ -3265,11 +3482,13 @@
|
|
3265
3482
|
dispatch:
|
3266
3483
|
CPU: batch_norm_cpu
|
3267
3484
|
CUDA: batch_norm_cuda
|
3485
|
+
MPS: batch_norm_mps
|
3268
3486
|
MkldnnCPU: mkldnn_batch_norm
|
3269
3487
|
|
3270
3488
|
- func: native_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!))
|
3271
3489
|
dispatch:
|
3272
3490
|
CUDA: batch_norm_cuda_out
|
3491
|
+
MPS: batch_norm_mps_out
|
3273
3492
|
|
3274
3493
|
- func: batch_norm_stats(Tensor input, float eps) -> (Tensor, Tensor)
|
3275
3494
|
dispatch:
|
@@ -3296,6 +3515,7 @@
|
|
3296
3515
|
dispatch:
|
3297
3516
|
CPU: batch_norm_backward_cpu
|
3298
3517
|
CUDA: batch_norm_backward_cuda
|
3518
|
+
MPS: batch_norm_backward_mps
|
3299
3519
|
MkldnnCPU: mkldnn_batch_norm_backward
|
3300
3520
|
|
3301
3521
|
- func: batch_norm_backward_reduce(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, bool input_g, bool weight_g, bool bias_g) -> (Tensor, Tensor, Tensor, Tensor)
|
@@ -3363,6 +3583,7 @@
|
|
3363
3583
|
variants: function, method
|
3364
3584
|
dispatch:
|
3365
3585
|
CompositeExplicitAutograd: permute
|
3586
|
+
MPS: permute_mps
|
3366
3587
|
|
3367
3588
|
- func: movedim.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a)
|
3368
3589
|
variants: function, method
|
@@ -3403,8 +3624,14 @@
|
|
3403
3624
|
variants: function, method
|
3404
3625
|
|
3405
3626
|
- func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
|
3627
|
+
dispatch:
|
3628
|
+
CPU: pixel_shuffle_cpu
|
3629
|
+
CompositeExplicitAutograd: math_pixel_shuffle
|
3406
3630
|
|
3407
3631
|
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
|
3632
|
+
dispatch:
|
3633
|
+
CPU: pixel_unshuffle_cpu
|
3634
|
+
CompositeExplicitAutograd: math_pixel_unshuffle
|
3408
3635
|
|
3409
3636
|
- func: channel_shuffle(Tensor self, int groups) -> Tensor
|
3410
3637
|
dispatch:
|
@@ -3420,6 +3647,7 @@
|
|
3420
3647
|
variants: method
|
3421
3648
|
dispatch:
|
3422
3649
|
CUDA: is_pinned_cuda
|
3650
|
+
MPS: is_pinned_mps
|
3423
3651
|
CompositeExplicitAutograd: is_pinned_default
|
3424
3652
|
|
3425
3653
|
# TODO: add a copy kwarg that guarantees that the tensor is put into fresh
|
@@ -3431,6 +3659,7 @@
|
|
3431
3659
|
- func: _pin_memory(Tensor self, Device? device=None) -> Tensor
|
3432
3660
|
dispatch:
|
3433
3661
|
CUDA: _pin_memory_cuda
|
3662
|
+
MPS: _pin_memory_mps
|
3434
3663
|
|
3435
3664
|
- func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
|
3436
3665
|
variants: function, method
|
@@ -3566,6 +3795,7 @@
|
|
3566
3795
|
structured_inherits: TensorIteratorBase
|
3567
3796
|
dispatch:
|
3568
3797
|
CPU, CUDA: reciprocal_out
|
3798
|
+
MPS: reciprocal_out_mps
|
3569
3799
|
|
3570
3800
|
- func: neg(Tensor self) -> Tensor
|
3571
3801
|
device_check: NoCheck # TensorIterator
|
@@ -3589,6 +3819,7 @@
|
|
3589
3819
|
structured_inherits: TensorIteratorBase
|
3590
3820
|
dispatch:
|
3591
3821
|
CPU, CUDA: neg_out
|
3822
|
+
MPS: neg_out_mps
|
3592
3823
|
SparseCPU, SparseCUDA: neg_out_sparse
|
3593
3824
|
SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_out
|
3594
3825
|
|
@@ -3605,6 +3836,7 @@
|
|
3605
3836
|
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
|
3606
3837
|
dispatch:
|
3607
3838
|
CompositeExplicitAutograd: repeat
|
3839
|
+
MPS: repeat_mps
|
3608
3840
|
|
3609
3841
|
- func: repeat_interleave.Tensor(Tensor repeats, *, int? output_size=None) -> Tensor
|
3610
3842
|
variants: function
|
@@ -3631,7 +3863,7 @@
|
|
3631
3863
|
device_check: NoCheck
|
3632
3864
|
device_guard: False
|
3633
3865
|
dispatch:
|
3634
|
-
CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor: _reshape_alias
|
3866
|
+
CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor, MPS: _reshape_alias
|
3635
3867
|
# We don't need to support mkldnn since this is handled explicitly by the reshape operator.
|
3636
3868
|
|
3637
3869
|
- func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
|
@@ -3668,6 +3900,7 @@
|
|
3668
3900
|
dispatch:
|
3669
3901
|
CPU: round_out
|
3670
3902
|
CUDA: round_out
|
3903
|
+
MPS: round_out_mps
|
3671
3904
|
SparseCPU, SparseCUDA: round_sparse_out
|
3672
3905
|
SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_out
|
3673
3906
|
|
@@ -3700,16 +3933,21 @@
|
|
3700
3933
|
variants: function, method
|
3701
3934
|
dispatch:
|
3702
3935
|
CPU, CUDA: relu
|
3936
|
+
MPS: relu_mps
|
3703
3937
|
MkldnnCPU: mkldnn_relu
|
3704
3938
|
QuantizedCPU: relu_quantized_cpu
|
3939
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu
|
3705
3940
|
|
3706
3941
|
- func: relu_(Tensor(a!) self) -> Tensor(a!)
|
3707
3942
|
device_check: NoCheck # TensorIterator
|
3708
3943
|
variants: function, method
|
3709
3944
|
dispatch:
|
3710
3945
|
CPU, CUDA: relu_
|
3946
|
+
MPS: relu_mps_
|
3711
3947
|
MkldnnCPU: mkldnn_relu_
|
3712
3948
|
QuantizedCPU: relu_quantized_cpu_
|
3949
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu_
|
3950
|
+
autogen: relu.out
|
3713
3951
|
|
3714
3952
|
- func: relu6(Tensor self) -> Tensor
|
3715
3953
|
python_module: nn
|
@@ -3720,16 +3958,18 @@
|
|
3720
3958
|
- func: prelu(Tensor self, Tensor weight) -> Tensor
|
3721
3959
|
variants: function, method
|
3722
3960
|
dispatch:
|
3961
|
+
MkldnnCPU: mkldnn_prelu
|
3723
3962
|
CPU: prelu_cpu
|
3724
3963
|
CUDA: prelu_cuda
|
3725
3964
|
|
3726
3965
|
- func: prelu_backward(Tensor grad_output, Tensor self, Tensor weight) -> (Tensor, Tensor)
|
3727
3966
|
variants: function, method
|
3728
3967
|
dispatch:
|
3968
|
+
MkldnnCPU: mkldnn_prelu_backward
|
3729
3969
|
CPU: prelu_backward_cpu
|
3730
3970
|
CUDA: prelu_backward_cuda
|
3731
3971
|
|
3732
|
-
- func: gelu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
3972
|
+
- func: gelu.out(Tensor self, *, str approximate='none', Tensor(a!) out) -> Tensor(a!)
|
3733
3973
|
structured: True
|
3734
3974
|
structured_inherits: TensorIteratorBase
|
3735
3975
|
device_check: NoCheck # TensorIterator
|
@@ -3737,24 +3977,34 @@
|
|
3737
3977
|
dispatch:
|
3738
3978
|
CPU: gelu_out_cpu
|
3739
3979
|
CUDA: gelu_out_cuda
|
3980
|
+
MPS: gelu_out_mps
|
3981
|
+
|
3982
|
+
- func: gelu_(Tensor(a!) self, *, str approximate='none') -> Tensor(a!)
|
3983
|
+
structured_delegate: gelu.out
|
3984
|
+
device_check: NoCheck # TensorIterator
|
3985
|
+
python_module: nn
|
3986
|
+
dispatch:
|
3987
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
|
3740
3988
|
|
3741
|
-
- func: gelu(Tensor self) -> Tensor
|
3989
|
+
- func: gelu(Tensor self, *, str approximate='none') -> Tensor
|
3742
3990
|
structured_delegate: gelu.out
|
3743
3991
|
device_check: NoCheck # TensorIterator
|
3744
3992
|
python_module: nn
|
3745
3993
|
dispatch:
|
3746
3994
|
MkldnnCPU: mkldnn_gelu
|
3747
3995
|
QuantizedCPU: gelu_quantized_cpu
|
3996
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu
|
3748
3997
|
|
3749
|
-
- func: gelu_backward.grad_input(Tensor
|
3998
|
+
- func: gelu_backward.grad_input(Tensor grad_output, Tensor self, *, str approximate='none', Tensor(a!) grad_input) -> Tensor(a!)
|
3750
3999
|
structured: True
|
3751
4000
|
structured_inherits: TensorIteratorBase
|
3752
4001
|
python_module: nn
|
3753
4002
|
dispatch:
|
3754
4003
|
CPU: gelu_backward_out_cpu
|
3755
4004
|
CUDA: gelu_backward_out_cuda
|
4005
|
+
MPS: gelu_backward_out_mps
|
3756
4006
|
|
3757
|
-
- func: gelu_backward(Tensor
|
4007
|
+
- func: gelu_backward(Tensor grad_output, Tensor self, *, str approximate='none') -> Tensor
|
3758
4008
|
structured_delegate: gelu_backward.grad_input
|
3759
4009
|
python_module: nn
|
3760
4010
|
dispatch:
|
@@ -3804,6 +4054,7 @@
|
|
3804
4054
|
structured_inherits: TensorIteratorBase
|
3805
4055
|
dispatch:
|
3806
4056
|
CPU, CUDA: rsqrt_out
|
4057
|
+
MPS: rsqrt_out_mps
|
3807
4058
|
|
3808
4059
|
- func: select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)
|
3809
4060
|
variants: function, method
|
@@ -3816,6 +4067,7 @@
|
|
3816
4067
|
device_guard: False
|
3817
4068
|
dispatch:
|
3818
4069
|
CompositeExplicitAutograd: select
|
4070
|
+
SparseCsrCPU, SparseCsrCUDA: select_sparse_csr
|
3819
4071
|
|
3820
4072
|
- func: select_backward(Tensor grad_output, int[] input_sizes, int dim, int index) -> Tensor
|
3821
4073
|
variants: function
|
@@ -3839,6 +4091,7 @@
|
|
3839
4091
|
device_check: NoCheck # TensorIterator
|
3840
4092
|
dispatch:
|
3841
4093
|
CompositeExplicitAutograd: celu_
|
4094
|
+
autogen: celu.out
|
3842
4095
|
|
3843
4096
|
- func: silu(Tensor self) -> Tensor
|
3844
4097
|
structured_delegate: silu.out
|
@@ -3858,6 +4111,7 @@
|
|
3858
4111
|
python_module: nn
|
3859
4112
|
dispatch:
|
3860
4113
|
CPU, CUDA: silu_out
|
4114
|
+
MPS: silu_out_mps
|
3861
4115
|
|
3862
4116
|
- func: silu_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
|
3863
4117
|
structured: True
|
@@ -3865,6 +4119,7 @@
|
|
3865
4119
|
python_module: nn
|
3866
4120
|
dispatch:
|
3867
4121
|
CPU, CUDA: silu_backward_out
|
4122
|
+
MPS: silu_backward_out_mps
|
3868
4123
|
|
3869
4124
|
- func: silu_backward(Tensor grad_output, Tensor self) -> Tensor
|
3870
4125
|
structured_delegate: silu_backward.grad_input
|
@@ -3918,6 +4173,7 @@
|
|
3918
4173
|
structured_inherits: TensorIteratorBase
|
3919
4174
|
dispatch:
|
3920
4175
|
CPU, CUDA: sigmoid_out
|
4176
|
+
MPS: sigmoid_out_mps
|
3921
4177
|
|
3922
4178
|
- func: logit(Tensor self, float? eps=None) -> Tensor
|
3923
4179
|
variants: function, method
|
@@ -3955,6 +4211,7 @@
|
|
3955
4211
|
structured_inherits: TensorIteratorBase
|
3956
4212
|
dispatch:
|
3957
4213
|
CPU, CUDA: sin_out
|
4214
|
+
MPS: sin_out_mps
|
3958
4215
|
SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_out
|
3959
4216
|
SparseCPU, SparseCUDA: sin_sparse_out
|
3960
4217
|
|
@@ -3994,6 +4251,7 @@
|
|
3994
4251
|
structured_inherits: TensorIteratorBase
|
3995
4252
|
dispatch:
|
3996
4253
|
CPU, CUDA: sinh_out
|
4254
|
+
MPS: sinh_out_mps
|
3997
4255
|
SparseCPU, SparseCUDA: sinh_sparse_out
|
3998
4256
|
SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_out
|
3999
4257
|
|
@@ -4080,6 +4338,11 @@
|
|
4080
4338
|
- func: softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
|
4081
4339
|
variants: function, method
|
4082
4340
|
|
4341
|
+
- func: softmax.int_out(Tensor self, int dim, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)
|
4342
|
+
variants: function
|
4343
|
+
dispatch:
|
4344
|
+
CompositeExplicitAutograd: softmax_out
|
4345
|
+
|
4083
4346
|
- func: softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
|
4084
4347
|
variants: function, method
|
4085
4348
|
|
@@ -4093,6 +4356,7 @@
|
|
4093
4356
|
dispatch:
|
4094
4357
|
CPU: softmax_cpu_out
|
4095
4358
|
CUDA: softmax_cuda_out
|
4359
|
+
MPS: softmax_mps_out
|
4096
4360
|
|
4097
4361
|
- func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
|
4098
4362
|
structured_delegate: _softmax_backward_data.out
|
@@ -4102,6 +4366,7 @@
|
|
4102
4366
|
dispatch:
|
4103
4367
|
CPU: softmax_backward_cpu_out
|
4104
4368
|
CUDA: softmax_backward_cuda_out
|
4369
|
+
MPS: softmax_backward_mps_out
|
4105
4370
|
|
4106
4371
|
- func: unsafe_split.Tensor(Tensor self, int split_size, int dim=0) -> Tensor[]
|
4107
4372
|
variants: function, method
|
@@ -4117,6 +4382,10 @@
|
|
4117
4382
|
dispatch:
|
4118
4383
|
CompositeExplicitAutograd: split
|
4119
4384
|
|
4385
|
+
- func: split.sizes(Tensor(a -> *) self, int[] split_size, int dim=0) -> Tensor(a)[]
|
4386
|
+
variants: function, method
|
4387
|
+
device_guard: False
|
4388
|
+
|
4120
4389
|
- func: unsafe_split_with_sizes(Tensor self, int[] split_sizes, int dim=0) -> Tensor[]
|
4121
4390
|
variants: function, method
|
4122
4391
|
device_check: NoCheck
|
@@ -4154,7 +4423,7 @@
|
|
4154
4423
|
device_check: NoCheck
|
4155
4424
|
device_guard: False
|
4156
4425
|
dispatch:
|
4157
|
-
|
4426
|
+
CompositeExplicitAutograd: squeeze
|
4158
4427
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
4159
4428
|
|
4160
4429
|
- func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
|
@@ -4162,7 +4431,7 @@
|
|
4162
4431
|
device_check: NoCheck
|
4163
4432
|
device_guard: False
|
4164
4433
|
dispatch:
|
4165
|
-
|
4434
|
+
CompositeExplicitAutograd: squeeze
|
4166
4435
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
4167
4436
|
|
4168
4437
|
- func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
|
@@ -4232,12 +4501,13 @@
|
|
4232
4501
|
|
4233
4502
|
- func: dstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)
|
4234
4503
|
|
4235
|
-
#
|
4236
|
-
# missing the `pad_mode` and `center` arguments, which are taken care of at
|
4237
|
-
# `torch.functional.py`. They shall be moved here once we have mapping between
|
4238
|
-
# Python strings and C++ Enum in codegen.
|
4504
|
+
# Overload without center & pad mode, needed for forward-compatibility
|
4239
4505
|
- func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
|
4240
4506
|
variants: function, method
|
4507
|
+
cpp_no_default_args: ['hop_length', 'win_length', 'window', 'normalized']
|
4508
|
+
|
4509
|
+
- func: stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
|
4510
|
+
variants: function, method
|
4241
4511
|
|
4242
4512
|
- func: istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool? onesided=None, int? length=None, bool return_complex=False) -> Tensor
|
4243
4513
|
variants: function, method
|
@@ -4258,6 +4528,7 @@
|
|
4258
4528
|
variants: function, method
|
4259
4529
|
dispatch:
|
4260
4530
|
CompositeExplicitAutograd: sum
|
4531
|
+
SparseCsrCPU, SparseCsrCUDA: sum_csr
|
4261
4532
|
|
4262
4533
|
- func: sum.dim_IntList(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
4263
4534
|
structured_delegate: sum.IntList_out
|
@@ -4273,21 +4544,17 @@
|
|
4273
4544
|
device_check: NoCheck # TensorIterator
|
4274
4545
|
dispatch:
|
4275
4546
|
CPU, CUDA: sum_out
|
4547
|
+
MPS: sum_out_mps
|
4276
4548
|
|
4277
4549
|
- func: sum.DimnameList_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
|
4278
4550
|
device_check: NoCheck # TensorIterator
|
4279
4551
|
|
4280
|
-
- func: nansum(Tensor self, *, ScalarType? dtype=None) -> Tensor
|
4552
|
+
- func: nansum(Tensor self, int[1] dim=[], bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
4281
4553
|
variants: function, method
|
4282
4554
|
dispatch:
|
4283
4555
|
CPU, CUDA: nansum
|
4284
4556
|
|
4285
|
-
- func: nansum.
|
4286
|
-
variants: function, method
|
4287
|
-
dispatch:
|
4288
|
-
CPU, CUDA: nansum
|
4289
|
-
|
4290
|
-
- func: nansum.IntList_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
|
4557
|
+
- func: nansum.out(Tensor self, int[1] dim=[], bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
|
4291
4558
|
dispatch:
|
4292
4559
|
CPU, CUDA: nansum_out
|
4293
4560
|
|
@@ -4318,6 +4585,7 @@
|
|
4318
4585
|
structured_inherits: TensorIteratorBase
|
4319
4586
|
dispatch:
|
4320
4587
|
CPU, CUDA: sqrt_out
|
4588
|
+
MPS: sqrt_out_mps
|
4321
4589
|
SparseCPU, SparseCUDA: sqrt_sparse_out
|
4322
4590
|
SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_out
|
4323
4591
|
|
@@ -4330,8 +4598,6 @@
|
|
4330
4598
|
variants: function, method
|
4331
4599
|
|
4332
4600
|
- func: square.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
4333
|
-
dispatch:
|
4334
|
-
CPU, CUDA: square_out
|
4335
4601
|
|
4336
4602
|
- func: std(Tensor self, bool unbiased=True) -> Tensor
|
4337
4603
|
device_check: NoCheck # TensorIterator
|
@@ -4346,6 +4612,7 @@
|
|
4346
4612
|
variants: function, method
|
4347
4613
|
dispatch:
|
4348
4614
|
CPU, CUDA: std
|
4615
|
+
MPS: std_mps
|
4349
4616
|
|
4350
4617
|
- func: std_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)
|
4351
4618
|
device_check: NoCheck # TensorIterator
|
@@ -4397,6 +4664,7 @@
|
|
4397
4664
|
variants: function, method
|
4398
4665
|
dispatch:
|
4399
4666
|
CPU, CUDA: prod
|
4667
|
+
MPS: prod_mps
|
4400
4668
|
|
4401
4669
|
- func: prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
4402
4670
|
structured_delegate: prod.int_out
|
@@ -4408,6 +4676,7 @@
|
|
4408
4676
|
device_check: NoCheck # TensorIterator
|
4409
4677
|
dispatch:
|
4410
4678
|
CPU, CUDA: prod_out
|
4679
|
+
MPS: prod_out_mps
|
4411
4680
|
|
4412
4681
|
- func: prod.dim_Dimname(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
4413
4682
|
device_check: NoCheck # TensorIterator
|
@@ -4453,6 +4722,7 @@
|
|
4453
4722
|
structured_inherits: TensorIteratorBase
|
4454
4723
|
dispatch:
|
4455
4724
|
CPU, CUDA: tan_out
|
4725
|
+
MPS: tan_out_mps
|
4456
4726
|
SparseCPU, SparseCUDA: tan_sparse_out
|
4457
4727
|
SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_out
|
4458
4728
|
|
@@ -4481,6 +4751,7 @@
|
|
4481
4751
|
structured_inherits: TensorIteratorBase
|
4482
4752
|
dispatch:
|
4483
4753
|
CPU, CUDA: tanh_out
|
4754
|
+
MPS: tanh_out_mps
|
4484
4755
|
SparseCPU, SparseCUDA: tanh_sparse_out
|
4485
4756
|
SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_out
|
4486
4757
|
|
@@ -4511,12 +4782,14 @@
|
|
4511
4782
|
structured_inherits: TensorIteratorBase
|
4512
4783
|
dispatch:
|
4513
4784
|
CPU, CUDA: threshold_out
|
4785
|
+
MPS: threshold_out_mps
|
4514
4786
|
|
4515
4787
|
- func: threshold_backward.grad_input(Tensor grad_output, Tensor self, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)
|
4516
4788
|
structured: True
|
4517
4789
|
structured_inherits: TensorIteratorBase
|
4518
4790
|
dispatch:
|
4519
4791
|
CPU, CUDA: threshold_backward_out
|
4792
|
+
MPS: threshold_backward_out_mps
|
4520
4793
|
|
4521
4794
|
- func: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor
|
4522
4795
|
variants: function
|
@@ -4558,6 +4831,7 @@
|
|
4558
4831
|
device_guard: False
|
4559
4832
|
dispatch:
|
4560
4833
|
MkldnnCPU: mkldnn_transpose_
|
4834
|
+
autogen: _mkldnn_transpose.out
|
4561
4835
|
|
4562
4836
|
- func: one_hot(Tensor self, int num_classes=-1) -> Tensor
|
4563
4837
|
python_module: nn
|
@@ -4595,6 +4869,28 @@
|
|
4595
4869
|
|
4596
4870
|
- func: trapz.dx(Tensor y, *, float dx=1, int dim=-1) -> Tensor
|
4597
4871
|
|
4872
|
+
# Fused implementation detail for transformers. Adds in-projection bias to QKV and divides Q by sqrt(D/num_heads).
|
4873
|
+
- func: _transform_bias_rescale_qkv(Tensor qkv, Tensor qkv_bias, int num_heads) -> (Tensor, Tensor, Tensor)
|
4874
|
+
dispatch:
|
4875
|
+
CPU, NestedTensorCPU: transform_bias_rescale_qkv_cpu
|
4876
|
+
CUDA, NestedTensorCUDA: transform_bias_rescale_qkv_cuda
|
4877
|
+
|
4878
|
+
- func: _nested_tensor_from_mask(Tensor t, Tensor mask) -> Tensor
|
4879
|
+
dispatch:
|
4880
|
+
CPU, CUDA: NestedTensor_nested_tensor_from_mask
|
4881
|
+
|
4882
|
+
- func: _nested_from_padded(Tensor padded, Tensor cpu_nested_shape_example, bool fuse_transform_0213=False) -> Tensor
|
4883
|
+
device_check: NoCheck # cpu_nested_shape_example will always be on CPU
|
4884
|
+
dispatch:
|
4885
|
+
CPU: nested_from_padded_generic
|
4886
|
+
CUDA: nested_from_padded_cuda
|
4887
|
+
|
4888
|
+
# _nested_from_padded is not usable from Python, so
|
4889
|
+
# _nested_from_padded_and_nested_example is available for testing.
|
4890
|
+
- func: _nested_from_padded_and_nested_example(Tensor padded, Tensor nt_example) -> Tensor
|
4891
|
+
dispatch:
|
4892
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_from_padded_and_nested_example
|
4893
|
+
|
4598
4894
|
- func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
|
4599
4895
|
dispatch:
|
4600
4896
|
CompositeExplicitAutograd: _trilinear
|
@@ -4625,6 +4921,7 @@
|
|
4625
4921
|
device_check: NoCheck # TensorIterator
|
4626
4922
|
dispatch:
|
4627
4923
|
CPU, CUDA: trunc_out
|
4924
|
+
MPS: trunc_out_mps
|
4628
4925
|
SparseCPU, SparseCUDA: trunc_sparse_out
|
4629
4926
|
SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_out
|
4630
4927
|
|
@@ -4686,7 +4983,7 @@
|
|
4686
4983
|
device_check: NoCheck
|
4687
4984
|
device_guard: False
|
4688
4985
|
dispatch:
|
4689
|
-
|
4986
|
+
CompositeExplicitAutograd: unsqueeze
|
4690
4987
|
SparseCPU, SparseCUDA: unsqueeze_sparse
|
4691
4988
|
QuantizedCPU, QuantizedCUDA: unsqueeze_quantized
|
4692
4989
|
|
@@ -4713,6 +5010,7 @@
|
|
4713
5010
|
variants: function, method
|
4714
5011
|
dispatch:
|
4715
5012
|
CPU, CUDA: var
|
5013
|
+
MPS: var_mps
|
4716
5014
|
|
4717
5015
|
- func: var.out(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
4718
5016
|
device_check: NoCheck # TensorIterator
|
@@ -4764,12 +5062,18 @@
|
|
4764
5062
|
device_check: NoCheck
|
4765
5063
|
device_guard: False
|
4766
5064
|
|
4767
|
-
# we define both of these because 'where' does the broadcast and '_s_where' doesn't;
|
4768
|
-
# this allows us to implicitly calculate the broadcast derivative, while only dealing with the
|
4769
|
-
# _s_where derivative.
|
4770
5065
|
- func: where.self(Tensor condition, Tensor self, Tensor other) -> Tensor
|
4771
5066
|
device_check: NoCheck # TensorIterator
|
4772
5067
|
variants: function, method
|
5068
|
+
dispatch:
|
5069
|
+
CPU, CUDA: where
|
5070
|
+
MPS: where_mps
|
5071
|
+
|
5072
|
+
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
5073
|
+
device_check: NoCheck # TensorIterator
|
5074
|
+
dispatch:
|
5075
|
+
CPU, CUDA: where_self_out
|
5076
|
+
MPS: where_self_out_mps
|
4773
5077
|
|
4774
5078
|
- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
|
4775
5079
|
variants: function
|
@@ -4784,11 +5088,6 @@
|
|
4784
5088
|
device_check: NoCheck # TensorIterator
|
4785
5089
|
variants: function
|
4786
5090
|
|
4787
|
-
- func: _s_where(Tensor condition, Tensor self, Tensor other) -> Tensor
|
4788
|
-
variants: function
|
4789
|
-
dispatch:
|
4790
|
-
CPU, CUDA: _s_where
|
4791
|
-
|
4792
5091
|
- func: norm_except_dim(Tensor v, int pow=2, int dim=0) -> Tensor
|
4793
5092
|
variants: function
|
4794
5093
|
|
@@ -4797,15 +5096,17 @@
|
|
4797
5096
|
- func: _weight_norm(Tensor v, Tensor g, int dim=0) -> Tensor
|
4798
5097
|
variants: function
|
4799
5098
|
|
4800
|
-
- func:
|
5099
|
+
- func: _weight_norm_interface(Tensor v, Tensor g, int dim=0) -> (Tensor, Tensor)
|
4801
5100
|
variants: function
|
4802
5101
|
dispatch:
|
5102
|
+
CPU: weight_norm_cpu
|
4803
5103
|
CUDA: weight_norm_cuda
|
4804
5104
|
|
4805
|
-
- func:
|
5105
|
+
- func: _weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
|
4806
5106
|
variants: function
|
4807
5107
|
dispatch:
|
4808
|
-
|
5108
|
+
CPU: weight_norm_backward_cpu
|
5109
|
+
CUDA: weight_norm_backward_cuda
|
4809
5110
|
|
4810
5111
|
- func: _weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
|
4811
5112
|
variants: function
|
@@ -4887,6 +5188,16 @@
|
|
4887
5188
|
SparseCPU: _sparse_sum_backward_cpu
|
4888
5189
|
SparseCUDA: _sparse_sum_backward_cuda
|
4889
5190
|
|
5191
|
+
- func: _sparse_csr_sum.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
5192
|
+
dispatch:
|
5193
|
+
SparseCsrCPU: _sparse_csr_sum_cpu
|
5194
|
+
SparseCsrCUDA: _sparse_csr_sum_cuda
|
5195
|
+
|
5196
|
+
- func: _sparse_csr_prod.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
5197
|
+
dispatch:
|
5198
|
+
SparseCsrCPU: _sparse_csr_prod_cpu
|
5199
|
+
SparseCsrCUDA: _sparse_csr_prod_cuda
|
5200
|
+
|
4890
5201
|
- func: _sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
|
4891
5202
|
python_module: sparse
|
4892
5203
|
variants: function
|
@@ -4962,6 +5273,7 @@
|
|
4962
5273
|
device_check: NoCheck # TensorIterator
|
4963
5274
|
dispatch:
|
4964
5275
|
CPU, CUDA: norm_out
|
5276
|
+
MPS: norm_out_mps
|
4965
5277
|
|
4966
5278
|
# These four redispatch in their implementation, so OK to be CompositeImplicitAutograd
|
4967
5279
|
- func: norm.names_ScalarOpt_dim_dtype(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor
|
@@ -4987,24 +5299,31 @@
|
|
4987
5299
|
dispatch:
|
4988
5300
|
CPU, CUDA: frexp_out
|
4989
5301
|
|
5302
|
+
# Deprecated (v.1.12)
|
4990
5303
|
- func: frobenius_norm(Tensor self) -> Tensor
|
4991
5304
|
variants: function
|
4992
5305
|
|
5306
|
+
# Deprecated (v.1.12)
|
4993
5307
|
- func: frobenius_norm.dim(Tensor self, int[1] dim, bool keepdim=False) -> Tensor
|
4994
5308
|
variants: function
|
4995
5309
|
|
5310
|
+
# Deprecated (v.1.12)
|
4996
5311
|
- func: frobenius_norm.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
4997
5312
|
variants: function
|
4998
5313
|
|
5314
|
+
# Deprecated (v.1.12)
|
4999
5315
|
- func: nuclear_norm(Tensor self, bool keepdim=False) -> Tensor
|
5000
5316
|
variants: function
|
5001
5317
|
|
5318
|
+
# Deprecated (v.1.12)
|
5002
5319
|
- func: nuclear_norm.out(Tensor self, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
5003
5320
|
variants: function
|
5004
5321
|
|
5322
|
+
# Deprecated (v.1.12)
|
5005
5323
|
- func: nuclear_norm.dim(Tensor self, int[2] dim, bool keepdim=False) -> Tensor
|
5006
5324
|
variants: function
|
5007
5325
|
|
5326
|
+
# Deprecated (v.1.12)
|
5008
5327
|
- func: nuclear_norm.dim_out(Tensor self, int[2] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
5009
5328
|
variants: function
|
5010
5329
|
|
@@ -5013,7 +5332,7 @@
|
|
5013
5332
|
dispatch:
|
5014
5333
|
CompositeExplicitAutograd: clone
|
5015
5334
|
SparseCPU, SparseCUDA: clone_sparse
|
5016
|
-
SparseCsrCPU, SparseCsrCUDA:
|
5335
|
+
SparseCsrCPU, SparseCsrCUDA: clone_sparse_compressed
|
5017
5336
|
MkldnnCPU: mkldnn_clone
|
5018
5337
|
QuantizedCPU, QuantizedCUDA: quantized_clone
|
5019
5338
|
|
@@ -5025,22 +5344,27 @@
|
|
5025
5344
|
variants: function, method
|
5026
5345
|
dispatch:
|
5027
5346
|
CompositeExplicitAutograd: resize_as_
|
5347
|
+
autogen: resize_as.functional, resize_as.out
|
5028
5348
|
|
5029
5349
|
- func: resize_as_sparse_(Tensor(a!) self, Tensor the_template) -> Tensor(a!)
|
5030
5350
|
use_const_ref_for_mutable_tensors: True
|
5031
|
-
variants: function
|
5351
|
+
variants: function, method
|
5032
5352
|
dispatch:
|
5033
5353
|
SparseCPU, SparseCUDA: resize_as_sparse_
|
5034
5354
|
SparseCsrCPU, SparseCsrCUDA: resize_as_sparse_csr_
|
5355
|
+
autogen: resize_as_sparse.functional, resize_as_sparse.out
|
5035
5356
|
|
5036
5357
|
- func: zero_(Tensor(a!) self) -> Tensor(a!)
|
5037
5358
|
device_check: NoCheck # TensorIterator
|
5038
5359
|
variants: method, function
|
5039
5360
|
dispatch:
|
5040
5361
|
CPU, CUDA: zero_
|
5362
|
+
MPS: zero_mps_
|
5041
5363
|
Meta: zero_meta_
|
5042
5364
|
SparseCPU, SparseCUDA: zero_sparse_
|
5365
|
+
SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
|
5043
5366
|
MkldnnCPU: mkldnn_zero_
|
5367
|
+
autogen: zero.functional, zero.out
|
5044
5368
|
|
5045
5369
|
- func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
5046
5370
|
device_check: NoCheck # TensorIterator
|
@@ -5048,6 +5372,7 @@
|
|
5048
5372
|
structured_inherits: TensorIteratorBase
|
5049
5373
|
dispatch:
|
5050
5374
|
CPU, CUDA: sub_out
|
5375
|
+
MPS: sub_out_mps
|
5051
5376
|
SparseCPU, SparseCUDA: sub_out_sparse
|
5052
5377
|
|
5053
5378
|
- func: sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
|
@@ -5056,6 +5381,7 @@
|
|
5056
5381
|
structured_delegate: sub.out
|
5057
5382
|
dispatch:
|
5058
5383
|
SparseCPU, SparseCUDA: sub_sparse
|
5384
|
+
ZeroTensor: sub_zerotensor
|
5059
5385
|
|
5060
5386
|
- func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
5061
5387
|
device_check: NoCheck # TensorIterator
|
@@ -5076,6 +5402,7 @@
|
|
5076
5402
|
variants: method
|
5077
5403
|
dispatch:
|
5078
5404
|
CompositeExplicitAutograd: sub_
|
5405
|
+
autogen: sub.Scalar_out
|
5079
5406
|
|
5080
5407
|
# subtract, alias for sub
|
5081
5408
|
- func: subtract.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
@@ -5125,7 +5452,7 @@
|
|
5125
5452
|
|
5126
5453
|
# Functionally the same as addmm, but we give it a different derivative formula
|
5127
5454
|
# that doesn't propagate gradients to non-present entries on sparse.
|
5128
|
-
- func: _sparse_addmm(Tensor self, Tensor
|
5455
|
+
- func: _sparse_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
5129
5456
|
python_module: sparse
|
5130
5457
|
dispatch:
|
5131
5458
|
CompositeExplicitAutograd: _sparse_addmm
|
@@ -5134,21 +5461,24 @@
|
|
5134
5461
|
python_module: sparse
|
5135
5462
|
dispatch:
|
5136
5463
|
SparseCsrCUDA: sparse_sampled_addmm_out_sparse_csr_cuda
|
5464
|
+
SparseCsrCPU: sparse_sampled_addmm_out_sparse_csr_cpu
|
5137
5465
|
|
5138
5466
|
- func: sparse_sampled_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
5139
5467
|
python_module: sparse
|
5140
5468
|
dispatch:
|
5141
5469
|
SparseCsrCUDA: sparse_sampled_addmm_sparse_csr_cuda
|
5470
|
+
SparseCsrCPU: sparse_sampled_addmm_sparse_csr_cpu
|
5142
5471
|
|
5143
5472
|
- func: addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
5144
5473
|
structured: True
|
5145
5474
|
dispatch:
|
5146
5475
|
CPU: addmm_out_cpu
|
5147
5476
|
CUDA: addmm_out_cuda
|
5477
|
+
MPS: addmm_out_mps
|
5148
5478
|
SparseCPU: addmm_out_sparse_dense_cpu
|
5149
5479
|
SparseCUDA: addmm_out_sparse_dense_cuda
|
5150
|
-
SparseCsrCPU:
|
5151
|
-
SparseCsrCUDA:
|
5480
|
+
SparseCsrCPU: addmm_out_sparse_compressed_cpu
|
5481
|
+
SparseCsrCUDA: addmm_out_sparse_compressed_cuda
|
5152
5482
|
|
5153
5483
|
- func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
5154
5484
|
structured_delegate: addmm.out
|
@@ -5156,7 +5486,7 @@
|
|
5156
5486
|
dispatch:
|
5157
5487
|
SparseCPU: addmm_sparse_dense_cpu
|
5158
5488
|
SparseCUDA: addmm_sparse_dense_cuda
|
5159
|
-
SparseCsrCPU, SparseCsrCUDA:
|
5489
|
+
SparseCsrCPU, SparseCsrCUDA: addmm_sparse_compressed_dense
|
5160
5490
|
|
5161
5491
|
- func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
|
5162
5492
|
structured_delegate: addmm.out
|
@@ -5167,6 +5497,16 @@
|
|
5167
5497
|
SparseCPU: s_addmm_sparse_dense_cpu_
|
5168
5498
|
SparseCUDA: s_addmm_sparse_dense_cuda_
|
5169
5499
|
|
5500
|
+
- func: _addmm_activation.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False, Tensor(a!) out) -> Tensor(a!)
|
5501
|
+
structured: True
|
5502
|
+
dispatch:
|
5503
|
+
CPU: addmm_activation_out_cpu
|
5504
|
+
CUDA: addmm_activation_out_cuda
|
5505
|
+
|
5506
|
+
- func: _addmm_activation(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False) -> Tensor
|
5507
|
+
structured_delegate: _addmm_activation.out
|
5508
|
+
variants: function, method
|
5509
|
+
|
5170
5510
|
# NOTE [ Sparse: autograd and API ]
|
5171
5511
|
#
|
5172
5512
|
#
|
@@ -5278,11 +5618,23 @@
|
|
5278
5618
|
# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
|
5279
5619
|
# the default would never make sense.
|
5280
5620
|
|
5621
|
+
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5281
5622
|
- func: sparse_csr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5623
|
+
- func: sparse_csc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5624
|
+
- func: sparse_bsr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5625
|
+
- func: sparse_bsc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5282
5626
|
|
5627
|
+
- func: sparse_compressed_tensor.comp_plain_value(Tensor compressed_indices, Tensor plain_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5283
5628
|
- func: sparse_csr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5629
|
+
- func: sparse_csc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5630
|
+
- func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5631
|
+
- func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5284
5632
|
|
5633
|
+
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
5285
5634
|
- func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
5635
|
+
- func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
5636
|
+
- func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
5637
|
+
- func: _sparse_bsc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
5286
5638
|
|
5287
5639
|
- func: sparse_coo_tensor.size(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5288
5640
|
|
@@ -5294,7 +5646,11 @@
|
|
5294
5646
|
|
5295
5647
|
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size) -> ()
|
5296
5648
|
|
5649
|
+
- func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout) -> ()
|
5297
5650
|
- func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
5651
|
+
- func: _validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
|
5652
|
+
- func: _validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
5653
|
+
- func: _validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
|
5298
5654
|
|
5299
5655
|
- func: _sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5300
5656
|
dispatch:
|
@@ -5309,26 +5665,34 @@
|
|
5309
5665
|
variants: method
|
5310
5666
|
dispatch:
|
5311
5667
|
SparseCPU, SparseCUDA: sparse_resize_
|
5668
|
+
autogen: sparse_resize.functional, sparse_resize.out
|
5312
5669
|
|
5313
5670
|
- func: sparse_resize_and_clear_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)
|
5314
5671
|
use_const_ref_for_mutable_tensors: True
|
5315
5672
|
variants: method
|
5316
5673
|
dispatch:
|
5317
5674
|
SparseCPU, SparseCUDA: sparse_resize_and_clear_
|
5675
|
+
autogen: sparse_resize_and_clear.functional, sparse_resize_and_clear.out
|
5318
5676
|
|
5319
5677
|
- func: sparse_mask(Tensor self, Tensor mask) -> Tensor
|
5320
5678
|
variants: method
|
5321
5679
|
dispatch:
|
5322
5680
|
SparseCPU: sparse_mask_cpu
|
5323
5681
|
SparseCUDA: sparse_mask_cuda
|
5682
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_mask_sparse_csr
|
5324
5683
|
|
5325
5684
|
- func: _to_cpu(Tensor[] tensors) -> Tensor[]
|
5326
5685
|
variants: function
|
5327
5686
|
|
5328
5687
|
- func: to_dense(Tensor self, ScalarType? dtype=None) -> Tensor
|
5329
5688
|
variants: method
|
5689
|
+
|
5690
|
+
# Special case of to_dense with custom derivative
|
5691
|
+
- func: _to_dense(Tensor self, ScalarType? dtype=None) -> Tensor
|
5692
|
+
variants: method
|
5330
5693
|
dispatch:
|
5331
|
-
SparseCPU, SparseCUDA
|
5694
|
+
SparseCPU, SparseCUDA: sparse_to_dense
|
5695
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_dense
|
5332
5696
|
MkldnnCPU: mkldnn_to_dense
|
5333
5697
|
|
5334
5698
|
- func: to_dense_backward(Tensor grad, Tensor input) -> Tensor
|
@@ -5414,6 +5778,7 @@
|
|
5414
5778
|
SparseCPU, SparseCUDA: _coalesced_sparse_
|
5415
5779
|
device_check: NoCheck
|
5416
5780
|
device_guard: False
|
5781
|
+
autogen: _coalesced.functional, _coalesced.out
|
5417
5782
|
|
5418
5783
|
- func: indices(Tensor(a) self) -> Tensor(a)
|
5419
5784
|
variants: method
|
@@ -5444,6 +5809,20 @@
|
|
5444
5809
|
device_check: NoCheck
|
5445
5810
|
device_guard: False
|
5446
5811
|
|
5812
|
+
- func: ccol_indices(Tensor(a) self) -> Tensor(a)
|
5813
|
+
variants: method
|
5814
|
+
dispatch:
|
5815
|
+
SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
|
5816
|
+
device_check: NoCheck
|
5817
|
+
device_guard: False
|
5818
|
+
|
5819
|
+
- func: row_indices(Tensor(a) self) -> Tensor(a)
|
5820
|
+
variants: method
|
5821
|
+
dispatch:
|
5822
|
+
SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
|
5823
|
+
device_check: NoCheck
|
5824
|
+
device_guard: False
|
5825
|
+
|
5447
5826
|
- func: hspmm.out(Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
|
5448
5827
|
dispatch:
|
5449
5828
|
SparseCPU: hspmm_out_sparse_cpu
|
@@ -5459,11 +5838,13 @@
|
|
5459
5838
|
variants: function
|
5460
5839
|
dispatch:
|
5461
5840
|
SparseCPU, SparseCUDA: copy_sparse_
|
5841
|
+
autogen: copy_sparse_to_sparse.functional, copy_sparse_to_sparse.out
|
5462
5842
|
|
5463
5843
|
- func: unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[]
|
5464
5844
|
variants: function, method
|
5465
5845
|
dispatch:
|
5466
5846
|
CompositeExplicitAutograd: unbind
|
5847
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
|
5467
5848
|
|
5468
5849
|
- func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
|
5469
5850
|
variants: function, method
|
@@ -5472,11 +5853,41 @@
|
|
5472
5853
|
variants: method
|
5473
5854
|
dispatch:
|
5474
5855
|
CPU, CUDA: dense_to_sparse
|
5856
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
|
5475
5857
|
|
5476
5858
|
- func: to_sparse(Tensor self) -> Tensor
|
5477
5859
|
variants: method
|
5478
5860
|
dispatch:
|
5479
5861
|
CPU, CUDA: dense_to_sparse
|
5862
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
|
5863
|
+
|
5864
|
+
- func: to_sparse_csr(Tensor self) -> Tensor
|
5865
|
+
variants: method
|
5866
|
+
dispatch:
|
5867
|
+
CPU, CUDA: dense_to_sparse_csr
|
5868
|
+
SparseCPU, SparseCUDA: coo_to_sparse_csr
|
5869
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csr
|
5870
|
+
|
5871
|
+
- func: to_sparse_csc(Tensor self) -> Tensor
|
5872
|
+
variants: method
|
5873
|
+
dispatch:
|
5874
|
+
CPU, CUDA: dense_to_sparse_csc
|
5875
|
+
SparseCPU, SparseCUDA: coo_to_sparse_csc
|
5876
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csc
|
5877
|
+
|
5878
|
+
- func: to_sparse_bsr(Tensor self, int[2] blocksize) -> Tensor
|
5879
|
+
variants: method
|
5880
|
+
dispatch:
|
5881
|
+
CPU, CUDA: dense_to_sparse_bsr
|
5882
|
+
SparseCPU, SparseCUDA: coo_to_sparse_bsr
|
5883
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsr
|
5884
|
+
|
5885
|
+
- func: to_sparse_bsc(Tensor self, int[2] blocksize) -> Tensor
|
5886
|
+
variants: method
|
5887
|
+
dispatch:
|
5888
|
+
CPU, CUDA: dense_to_sparse_bsc
|
5889
|
+
SparseCPU, SparseCUDA: coo_to_sparse_bsc
|
5890
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsc
|
5480
5891
|
|
5481
5892
|
- func: to_mkldnn(Tensor self, ScalarType? dtype=None) -> Tensor
|
5482
5893
|
variants: method
|
@@ -5636,7 +6047,7 @@
|
|
5636
6047
|
dispatch:
|
5637
6048
|
CPU: fused_moving_avg_obs_fake_quant_cpu
|
5638
6049
|
CUDA: fused_moving_avg_obs_fake_quant_cuda
|
5639
|
-
|
6050
|
+
autogen: _fused_moving_avg_obs_fq_helper.functional, _fused_moving_avg_obs_fq_helper.out
|
5640
6051
|
|
5641
6052
|
- func: _choose_qparams_per_tensor(Tensor self, bool reduce_range=False) -> (float, int)
|
5642
6053
|
variants: function
|
@@ -5722,16 +6133,33 @@
|
|
5722
6133
|
dispatch:
|
5723
6134
|
CPU: _local_scalar_dense_cpu
|
5724
6135
|
CUDA: _local_scalar_dense_cuda
|
6136
|
+
MPS: _local_scalar_dense_mps
|
5725
6137
|
variants: function
|
5726
6138
|
|
6139
|
+
# MPS LSTM implementation
|
6140
|
+
|
6141
|
+
- func: _lstm_mps(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
|
6142
|
+
dispatch:
|
6143
|
+
MPS: _lstm_mps
|
6144
|
+
|
6145
|
+
- func: lstm_mps_backward(Tensor grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor[], Tensor[])
|
6146
|
+
dispatch:
|
6147
|
+
MPS: lstm_mps_backward
|
6148
|
+
|
6149
|
+
|
5727
6150
|
# Fused RNN kernels
|
5728
6151
|
- func: _thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor, Tensor)
|
5729
6152
|
dispatch:
|
5730
6153
|
CUDA: _thnn_fused_lstm_cell_cuda
|
5731
6154
|
|
5732
|
-
|
6155
|
+
# NB: The composite version of this function below is a simple wrapper that duplicates some of the outputs
|
6156
|
+
# It is necessary to avoid triggering TensorImpl use count checks in debug mode
|
6157
|
+
# NB: this is function is NOT differentiable
|
6158
|
+
- func: _thnn_fused_lstm_cell_backward_impl(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor)
|
5733
6159
|
dispatch:
|
5734
|
-
CUDA:
|
6160
|
+
CUDA: _thnn_fused_lstm_cell_backward_impl_cuda
|
6161
|
+
|
6162
|
+
- func: _thnn_fused_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
|
5735
6163
|
|
5736
6164
|
- func: _thnn_differentiable_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor input_gates, Tensor hidden_gates, Tensor? input_bias, Tensor? hidden_bias, Tensor cx, Tensor cy) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
|
5737
6165
|
|
@@ -5812,36 +6240,55 @@
|
|
5812
6240
|
device_check: NoCheck
|
5813
6241
|
device_guard: False
|
5814
6242
|
dispatch:
|
5815
|
-
CPU, CUDA: set_
|
6243
|
+
CPU, CUDA, Meta, MPS: set_
|
6244
|
+
autogen: set.source_Storage_functional, set.source_Storage_out
|
5816
6245
|
|
5817
6246
|
- func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, int storage_offset, int[] size, int[] stride=[]) -> Tensor(a!)
|
5818
6247
|
variants: method
|
5819
6248
|
device_check: NoCheck
|
5820
6249
|
device_guard: False
|
5821
6250
|
dispatch:
|
5822
|
-
CPU: set_storage_cpu_
|
6251
|
+
CPU, Meta: set_storage_cpu_
|
5823
6252
|
CUDA: set_storage_cuda_
|
6253
|
+
MPS: set_storage_mps_
|
5824
6254
|
QuantizedCPU, QuantizedCUDA: set_storage_quantized_
|
6255
|
+
autogen: set.source_Storage_storage_offset_functional, set.source_Storage_storage_offset_out
|
6256
|
+
|
6257
|
+
- func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, int storage_offset, int[] size, int[] stride=[]) -> Tensor(a!)
|
6258
|
+
variants: method
|
6259
|
+
device_check: NoCheck
|
6260
|
+
device_guard: False
|
5825
6261
|
|
5826
6262
|
- func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
|
5827
6263
|
variants: method
|
5828
6264
|
device_check: NoCheck
|
5829
6265
|
device_guard: False
|
5830
6266
|
dispatch:
|
5831
|
-
CPU, CUDA: set_tensor_
|
6267
|
+
CPU, CUDA, Meta, MPS: set_tensor_
|
6268
|
+
autogen: set.source_Tensor_functional, set.source_Tensor_out
|
5832
6269
|
|
5833
6270
|
- func: set_(Tensor(a!) self) -> Tensor(a!)
|
5834
6271
|
variants: method
|
5835
6272
|
dispatch:
|
5836
6273
|
CPU: set_cpu_
|
5837
6274
|
CUDA: set_cuda_
|
6275
|
+
Meta: set_meta_
|
6276
|
+
MPS: set_mps_
|
6277
|
+
autogen: set.functional, set.out
|
6278
|
+
|
6279
|
+
- func: lift(Tensor self) -> Tensor
|
6280
|
+
variants: method
|
6281
|
+
dispatch:
|
6282
|
+
# Not making it CompositeImplicitAutograd because lift
|
6283
|
+
# should be a primitive w.r.t. functorch
|
6284
|
+
CompositeExplicitAutograd: lift
|
5838
6285
|
|
5839
6286
|
- func: is_set_to(Tensor self, Tensor tensor) -> bool
|
5840
6287
|
variants: method
|
5841
6288
|
device_check: NoCheck
|
5842
6289
|
device_guard: False
|
5843
6290
|
dispatch:
|
5844
|
-
CPU, CUDA: is_set_to
|
6291
|
+
CPU, CUDA, MPS: is_set_to
|
5845
6292
|
|
5846
6293
|
- func: masked_fill_.Scalar(Tensor(a!) self, Tensor mask, Scalar value) -> Tensor(a!)
|
5847
6294
|
device_check: NoCheck # TensorIterator
|
@@ -5849,6 +6296,8 @@
|
|
5849
6296
|
dispatch:
|
5850
6297
|
CPU: masked_fill__cpu
|
5851
6298
|
CUDA: masked_fill__cuda
|
6299
|
+
MPS: masked_fill__mps
|
6300
|
+
autogen: masked_fill.Scalar_out
|
5852
6301
|
|
5853
6302
|
- func: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor
|
5854
6303
|
device_check: NoCheck # TensorIterator
|
@@ -5862,6 +6311,8 @@
|
|
5862
6311
|
dispatch:
|
5863
6312
|
CPU: masked_fill__cpu
|
5864
6313
|
CUDA: masked_fill__cuda
|
6314
|
+
MPS: masked_fill__mps
|
6315
|
+
autogen: masked_fill.Tensor_out
|
5865
6316
|
|
5866
6317
|
- func: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor
|
5867
6318
|
device_check: NoCheck # TensorIterator
|
@@ -5874,23 +6325,29 @@
|
|
5874
6325
|
dispatch:
|
5875
6326
|
CPU: masked_scatter__cpu
|
5876
6327
|
CUDA: masked_scatter__cuda
|
6328
|
+
autogen: masked_scatter.out
|
5877
6329
|
|
5878
6330
|
- func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor
|
5879
6331
|
variants: function, method
|
5880
6332
|
dispatch:
|
5881
6333
|
CompositeExplicitAutograd: masked_scatter
|
5882
6334
|
|
5883
|
-
- func: _masked_softmax(Tensor self, Tensor mask) -> Tensor
|
6335
|
+
- func: _masked_softmax(Tensor self, Tensor mask, int? dim=None) -> Tensor
|
5884
6336
|
dispatch:
|
5885
6337
|
CUDA: masked_softmax_cuda
|
5886
6338
|
CPU: masked_softmax_cpu
|
5887
6339
|
|
6340
|
+
- func: _masked_softmax_backward(Tensor grad_output, Tensor output, Tensor mask, int? dim=None) -> Tensor
|
6341
|
+
dispatch:
|
6342
|
+
CUDA: masked_softmax_backward_cuda
|
6343
|
+
CPU: masked_softmax_backward_cpu
|
6344
|
+
|
5888
6345
|
- func: view(Tensor(a) self, int[] size) -> Tensor(a)
|
5889
6346
|
variants: method
|
5890
6347
|
device_check: NoCheck
|
5891
6348
|
device_guard: False
|
5892
6349
|
dispatch:
|
5893
|
-
ZeroTensor, CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view
|
6350
|
+
ZeroTensor, CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, MPS: view
|
5894
6351
|
MkldnnCPU: mkldnn_view
|
5895
6352
|
|
5896
6353
|
# Warning: If you want to change the name or overload name of this
|
@@ -5909,7 +6366,8 @@
|
|
5909
6366
|
- func: put_(Tensor(a!) self, Tensor index, Tensor source, bool accumulate=False) -> Tensor(a!)
|
5910
6367
|
variants: method
|
5911
6368
|
dispatch:
|
5912
|
-
CPU, CUDA: put_
|
6369
|
+
CPU, CUDA, MPS: put_
|
6370
|
+
autogen: put.out
|
5913
6371
|
|
5914
6372
|
- func: put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor
|
5915
6373
|
variants: function, method
|
@@ -5934,12 +6392,30 @@
|
|
5934
6392
|
- func: index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor
|
5935
6393
|
variants: function, method
|
5936
6394
|
|
6395
|
+
- func: index_reduce.out(Tensor self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True, Tensor(a!) out) -> Tensor(a!)
|
6396
|
+
structured: True
|
6397
|
+
variants: function
|
6398
|
+
precomputed:
|
6399
|
+
- dim -> int dim
|
6400
|
+
dispatch:
|
6401
|
+
CPU: index_reduce_cpu_out
|
6402
|
+
CUDA: index_reduce_cuda_out
|
6403
|
+
|
6404
|
+
- func: index_reduce_(Tensor(a!) self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor(a!)
|
6405
|
+
structured_delegate: index_reduce.out
|
6406
|
+
variants: method
|
6407
|
+
|
6408
|
+
- func: index_reduce(Tensor self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor
|
6409
|
+
structured_delegate: index_reduce.out
|
6410
|
+
variants: function, method
|
6411
|
+
|
5937
6412
|
- func: index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)
|
5938
6413
|
device_check: NoCheck # TensorIterator
|
5939
6414
|
variants: method
|
5940
6415
|
dispatch:
|
5941
6416
|
CPU: index_fill_
|
5942
6417
|
CUDA: index_fill_
|
6418
|
+
autogen: index_fill.int_Scalar_out
|
5943
6419
|
|
5944
6420
|
- func: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
|
5945
6421
|
device_check: NoCheck # TensorIterator
|
@@ -5952,6 +6428,7 @@
|
|
5952
6428
|
variants: method
|
5953
6429
|
dispatch:
|
5954
6430
|
CPU, CUDA: index_fill_
|
6431
|
+
autogen: index_fill.int_Tensor_out
|
5955
6432
|
|
5956
6433
|
- func: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor
|
5957
6434
|
device_check: NoCheck # TensorIterator
|
@@ -5988,6 +6465,7 @@
|
|
5988
6465
|
variants: function
|
5989
6466
|
dispatch:
|
5990
6467
|
CPU, CUDA: scatter_src_out
|
6468
|
+
MPS: scatter_src_out_mps
|
5991
6469
|
|
5992
6470
|
- func: scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
|
5993
6471
|
structured_delegate: scatter.value_out
|
@@ -6002,6 +6480,7 @@
|
|
6002
6480
|
variants: function
|
6003
6481
|
dispatch:
|
6004
6482
|
CPU, CUDA: scatter_value_out
|
6483
|
+
MPS: scatter_value_out_mps
|
6005
6484
|
|
6006
6485
|
- func: scatter.reduce(Tensor self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor
|
6007
6486
|
structured_delegate: scatter.reduce_out
|
@@ -6016,6 +6495,7 @@
|
|
6016
6495
|
variants: function
|
6017
6496
|
dispatch:
|
6018
6497
|
CPU, CUDA: scatter_reduce_out
|
6498
|
+
MPS: scatter_reduce_out_mps
|
6019
6499
|
|
6020
6500
|
- func: scatter.value_reduce(Tensor self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor
|
6021
6501
|
structured_delegate: scatter.value_reduce_out
|
@@ -6030,6 +6510,7 @@
|
|
6030
6510
|
variants: function
|
6031
6511
|
dispatch:
|
6032
6512
|
CPU, CUDA: scatter_value_reduce_out
|
6513
|
+
MPS: scatter_value_reduce_out_mps
|
6033
6514
|
|
6034
6515
|
- func: scatter.dimname_src(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor
|
6035
6516
|
variants: function, method
|
@@ -6050,14 +6531,24 @@
|
|
6050
6531
|
variants: function
|
6051
6532
|
dispatch:
|
6052
6533
|
CPU, CUDA: scatter_add
|
6534
|
+
MPS: scatter_add_mps_out
|
6053
6535
|
|
6054
6536
|
- func: scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor
|
6055
6537
|
variants: function, method
|
6056
6538
|
|
6057
|
-
- func: scatter_reduce.two(Tensor self, int dim, Tensor index, str reduce, *,
|
6539
|
+
- func: scatter_reduce.two(Tensor self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor
|
6540
|
+
structured_delegate: scatter_reduce.two_out
|
6058
6541
|
variants: function, method
|
6542
|
+
|
6543
|
+
- func: scatter_reduce_.two(Tensor(a!) self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor(a!)
|
6544
|
+
structured_delegate: scatter_reduce.two_out
|
6545
|
+
variants: method
|
6546
|
+
|
6547
|
+
- func: scatter_reduce.two_out(Tensor self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True, Tensor(a!) out) -> Tensor(a!)
|
6548
|
+
structured: True
|
6549
|
+
variants: function
|
6059
6550
|
dispatch:
|
6060
|
-
CPU:
|
6551
|
+
CPU, CUDA: scatter_reduce_two
|
6061
6552
|
|
6062
6553
|
- func: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
6063
6554
|
structured_delegate: eq.Scalar_out
|
@@ -6093,6 +6584,12 @@
|
|
6093
6584
|
dispatch:
|
6094
6585
|
CompositeExplicitAutograd: bitwise_and
|
6095
6586
|
|
6587
|
+
- func: bitwise_and.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
6588
|
+
device_check: NoCheck # TensorIterator
|
6589
|
+
variants: function
|
6590
|
+
dispatch:
|
6591
|
+
CompositeExplicitAutograd: bitwise_and
|
6592
|
+
|
6096
6593
|
- func: bitwise_and.Tensor(Tensor self, Tensor other) -> Tensor
|
6097
6594
|
device_check: NoCheck # TensorIterator
|
6098
6595
|
variants: method, function
|
@@ -6141,6 +6638,12 @@
|
|
6141
6638
|
device_check: NoCheck # TensorIterator
|
6142
6639
|
variants: method, function
|
6143
6640
|
|
6641
|
+
- func: bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
6642
|
+
device_check: NoCheck # TensorIterator
|
6643
|
+
variants: function
|
6644
|
+
dispatch:
|
6645
|
+
CompositeExplicitAutograd: bitwise_or
|
6646
|
+
|
6144
6647
|
- func: bitwise_or.Tensor(Tensor self, Tensor other) -> Tensor
|
6145
6648
|
device_check: NoCheck # TensorIterator
|
6146
6649
|
variants: method, function
|
@@ -6189,6 +6692,12 @@
|
|
6189
6692
|
device_check: NoCheck # TensorIterator
|
6190
6693
|
variants: method, function
|
6191
6694
|
|
6695
|
+
- func: bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
6696
|
+
device_check: NoCheck # TensorIterator
|
6697
|
+
variants: function
|
6698
|
+
dispatch:
|
6699
|
+
CompositeExplicitAutograd: bitwise_xor
|
6700
|
+
|
6192
6701
|
- func: bitwise_xor.Tensor(Tensor self, Tensor other) -> Tensor
|
6193
6702
|
device_check: NoCheck # TensorIterator
|
6194
6703
|
variants: method, function
|
@@ -6236,12 +6745,14 @@
|
|
6236
6745
|
variants: method
|
6237
6746
|
dispatch:
|
6238
6747
|
CPU, CUDA: __ilshift__
|
6748
|
+
autogen: __lshift__.Scalar_out
|
6239
6749
|
|
6240
6750
|
- func: __ilshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
6241
6751
|
device_check: NoCheck # TensorIterator
|
6242
6752
|
variants: method
|
6243
6753
|
dispatch:
|
6244
6754
|
CPU, CUDA: __ilshift__
|
6755
|
+
autogen: __lshift__.Tensor_out
|
6245
6756
|
|
6246
6757
|
- func: bitwise_left_shift.Tensor(Tensor self, Tensor other) -> Tensor
|
6247
6758
|
device_check: NoCheck # TensorIterator
|
@@ -6264,25 +6775,25 @@
|
|
6264
6775
|
device_check: NoCheck # TensorIterator
|
6265
6776
|
variants: method, function
|
6266
6777
|
dispatch:
|
6267
|
-
|
6778
|
+
CompositeExplicitAutograd: bitwise_left_shift
|
6268
6779
|
|
6269
6780
|
- func: bitwise_left_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
6270
6781
|
device_check: NoCheck # TensorIterator
|
6271
6782
|
variants: method
|
6272
6783
|
dispatch:
|
6273
|
-
|
6784
|
+
CompositeExplicitAutograd: bitwise_left_shift_
|
6274
6785
|
|
6275
6786
|
- func: bitwise_left_shift.Tensor_Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
6276
6787
|
device_check: NoCheck # TensorIterator
|
6277
6788
|
variants: function
|
6278
6789
|
dispatch:
|
6279
|
-
|
6790
|
+
CompositeExplicitAutograd: bitwise_left_shift_out
|
6280
6791
|
|
6281
6792
|
- func: bitwise_left_shift.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
6282
6793
|
device_check: NoCheck # TensorIterator
|
6283
6794
|
variants: function
|
6284
6795
|
dispatch:
|
6285
|
-
|
6796
|
+
CompositeExplicitAutograd: bitwise_left_shift
|
6286
6797
|
|
6287
6798
|
- func: __rshift__.Scalar(Tensor self, Scalar other) -> Tensor
|
6288
6799
|
device_check: NoCheck # TensorIterator
|
@@ -6301,12 +6812,14 @@
|
|
6301
6812
|
variants: method
|
6302
6813
|
dispatch:
|
6303
6814
|
CPU, CUDA: __irshift__
|
6815
|
+
autogen: __rshift__.Scalar_out
|
6304
6816
|
|
6305
6817
|
- func: __irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
6306
6818
|
device_check: NoCheck # TensorIterator
|
6307
6819
|
variants: method
|
6308
6820
|
dispatch:
|
6309
6821
|
CPU, CUDA: __irshift__
|
6822
|
+
autogen: __rshift__.Tensor_out
|
6310
6823
|
|
6311
6824
|
- func: bitwise_right_shift.Tensor(Tensor self, Tensor other) -> Tensor
|
6312
6825
|
device_check: NoCheck # TensorIterator
|
@@ -6329,25 +6842,25 @@
|
|
6329
6842
|
device_check: NoCheck # TensorIterator
|
6330
6843
|
variants: method, function
|
6331
6844
|
dispatch:
|
6332
|
-
|
6845
|
+
CompositeExplicitAutograd: bitwise_right_shift
|
6333
6846
|
|
6334
6847
|
- func: bitwise_right_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
6335
6848
|
device_check: NoCheck # TensorIterator
|
6336
6849
|
variants: method
|
6337
6850
|
dispatch:
|
6338
|
-
|
6851
|
+
CompositeExplicitAutograd: bitwise_right_shift_
|
6339
6852
|
|
6340
6853
|
- func: bitwise_right_shift.Tensor_Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
6341
6854
|
device_check: NoCheck # TensorIterator
|
6342
6855
|
variants: function
|
6343
6856
|
dispatch:
|
6344
|
-
|
6857
|
+
CompositeExplicitAutograd: bitwise_right_shift_out
|
6345
6858
|
|
6346
6859
|
- func: bitwise_right_shift.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
6347
6860
|
device_check: NoCheck # TensorIterator
|
6348
6861
|
variants: function
|
6349
6862
|
dispatch:
|
6350
|
-
|
6863
|
+
CompositeExplicitAutograd: bitwise_right_shift
|
6351
6864
|
|
6352
6865
|
- func: tril_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
|
6353
6866
|
structured_delegate: tril.out
|
@@ -6376,15 +6889,18 @@
|
|
6376
6889
|
variants: method
|
6377
6890
|
dispatch:
|
6378
6891
|
CPU, CUDA: addbmm_
|
6892
|
+
MPS: addbmm_mps_
|
6379
6893
|
|
6380
6894
|
- func: addbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
6381
6895
|
dispatch:
|
6382
6896
|
CPU, CUDA: addbmm_out
|
6897
|
+
MPS: addbmm_out_mps
|
6383
6898
|
|
6384
6899
|
- func: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
6385
6900
|
variants: method, function
|
6386
6901
|
dispatch:
|
6387
6902
|
CPU, CUDA: addbmm
|
6903
|
+
MPS: addbmm_mps
|
6388
6904
|
|
6389
6905
|
- func: random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)
|
6390
6906
|
device_check: NoCheck # TensorIterator
|
@@ -6392,6 +6908,8 @@
|
|
6392
6908
|
dispatch:
|
6393
6909
|
CPU, CUDA: random_
|
6394
6910
|
Meta: random_meta_
|
6911
|
+
MPS: random_mps_
|
6912
|
+
autogen: random.from_functional, random.from_out
|
6395
6913
|
|
6396
6914
|
- func: random_.to(Tensor(a!) self, int to, *, Generator? generator=None) -> Tensor(a!)
|
6397
6915
|
device_check: NoCheck # TensorIterator
|
@@ -6399,6 +6917,8 @@
|
|
6399
6917
|
dispatch:
|
6400
6918
|
CPU, CUDA: random_
|
6401
6919
|
Meta: random_meta_
|
6920
|
+
MPS: random_mps_
|
6921
|
+
autogen: random.to_functional, random.to_out
|
6402
6922
|
|
6403
6923
|
- func: random_(Tensor(a!) self, *, Generator? generator=None) -> Tensor(a!)
|
6404
6924
|
device_check: NoCheck # TensorIterator
|
@@ -6406,31 +6926,37 @@
|
|
6406
6926
|
dispatch:
|
6407
6927
|
CPU, CUDA: random_
|
6408
6928
|
Meta: random_meta_
|
6929
|
+
autogen: random.functional, random.out
|
6409
6930
|
|
6410
6931
|
- func: uniform_(Tensor(a!) self, float from=0, float to=1, *, Generator? generator=None) -> Tensor(a!)
|
6411
6932
|
device_check: NoCheck # TensorIterator
|
6412
6933
|
variants: method
|
6413
6934
|
dispatch:
|
6414
6935
|
CPU, CUDA: uniform_
|
6936
|
+
MPS: uniform_mps_
|
6415
6937
|
Meta: uniform_meta_
|
6938
|
+
autogen: uniform.functional, uniform.out
|
6416
6939
|
|
6417
6940
|
- func: cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!)
|
6418
6941
|
device_check: NoCheck # TensorIterator
|
6419
6942
|
variants: method
|
6420
6943
|
dispatch:
|
6421
6944
|
CPU, CUDA: cauchy_
|
6945
|
+
autogen: cauchy.functional, cauchy.out
|
6422
6946
|
|
6423
6947
|
- func: log_normal_(Tensor(a!) self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor(a!)
|
6424
6948
|
device_check: NoCheck # TensorIterator
|
6425
6949
|
variants: method
|
6426
6950
|
dispatch:
|
6427
6951
|
CPU, CUDA: log_normal_
|
6952
|
+
autogen: log_normal.functional, log_normal.out
|
6428
6953
|
|
6429
6954
|
- func: exponential_(Tensor(a!) self, float lambd=1, *, Generator? generator=None) -> Tensor(a!)
|
6430
6955
|
device_check: NoCheck # TensorIterator
|
6431
6956
|
variants: method
|
6432
6957
|
dispatch:
|
6433
6958
|
CPU, CUDA: exponential_
|
6959
|
+
autogen: exponential.functional, exponential.out
|
6434
6960
|
|
6435
6961
|
- func: geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!)
|
6436
6962
|
device_check: NoCheck # TensorIterator
|
@@ -6439,11 +6965,13 @@
|
|
6439
6965
|
CPU, CUDA: geometric_
|
6440
6966
|
|
6441
6967
|
# wrappers for TH functions
|
6968
|
+
autogen: geometric.functional, geometric.out
|
6442
6969
|
|
6443
6970
|
- func: diag.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
|
6444
6971
|
dispatch:
|
6445
6972
|
CPU: diag_cpu_out
|
6446
6973
|
CUDA: diag_cuda_out
|
6974
|
+
MPS: diag_mps_out
|
6447
6975
|
|
6448
6976
|
- func: diag(Tensor self, int diagonal=0) -> Tensor
|
6449
6977
|
variants: method, function
|
@@ -6465,6 +6993,7 @@
|
|
6465
6993
|
dispatch:
|
6466
6994
|
CPU: triu_cpu
|
6467
6995
|
CUDA: triu_cuda
|
6996
|
+
MPS: triu_mps_out
|
6468
6997
|
|
6469
6998
|
- func: triu(Tensor self, int diagonal=0) -> Tensor
|
6470
6999
|
structured_delegate: triu.out
|
@@ -6475,6 +7004,7 @@
|
|
6475
7004
|
dispatch:
|
6476
7005
|
CPU: tril_cpu
|
6477
7006
|
CUDA: tril_cuda
|
7007
|
+
MPS: tril_mps_out
|
6478
7008
|
|
6479
7009
|
- func: tril(Tensor self, int diagonal=0) -> Tensor
|
6480
7010
|
structured_delegate: tril.out
|
@@ -6507,6 +7037,7 @@
|
|
6507
7037
|
device_check: NoCheck # TensorIterator
|
6508
7038
|
dispatch:
|
6509
7039
|
CPU, CUDA: ne_Scalar_out
|
7040
|
+
MPS: ne_scalar_out_mps
|
6510
7041
|
QuantizedCPU: ne_out_quantized_cpu
|
6511
7042
|
|
6512
7043
|
- func: ne.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6522,6 +7053,7 @@
|
|
6522
7053
|
device_check: NoCheck # TensorIterator
|
6523
7054
|
dispatch:
|
6524
7055
|
CPU, CUDA: ne_Tensor_out
|
7056
|
+
MPS: ne_tensor_out_mps
|
6525
7057
|
QuantizedCPU: ne_out_quantized_cpu
|
6526
7058
|
|
6527
7059
|
- func: ne.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6568,6 +7100,7 @@
|
|
6568
7100
|
device_check: NoCheck # TensorIterator
|
6569
7101
|
dispatch:
|
6570
7102
|
CPU, CUDA: eq_Scalar_out
|
7103
|
+
MPS: eq_scalar_out_mps
|
6571
7104
|
QuantizedCPU: eq_out_quantized_cpu
|
6572
7105
|
|
6573
7106
|
- func: eq.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6583,6 +7116,7 @@
|
|
6583
7116
|
device_check: NoCheck # TensorIterator
|
6584
7117
|
dispatch:
|
6585
7118
|
CPU, CUDA: eq_Tensor_out
|
7119
|
+
MPS: eq_tensor_out_mps
|
6586
7120
|
QuantizedCPU: eq_out_quantized_cpu
|
6587
7121
|
|
6588
7122
|
- func: eq.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6598,6 +7132,7 @@
|
|
6598
7132
|
device_check: NoCheck # TensorIterator
|
6599
7133
|
dispatch:
|
6600
7134
|
CPU, CUDA: ge_Scalar_out
|
7135
|
+
MPS: ge_scalar_out_mps
|
6601
7136
|
QuantizedCPU: ge_out_quantized_cpu
|
6602
7137
|
|
6603
7138
|
- func: ge.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6613,6 +7148,7 @@
|
|
6613
7148
|
device_check: NoCheck # TensorIterator
|
6614
7149
|
dispatch:
|
6615
7150
|
CPU, CUDA: ge_Tensor_out
|
7151
|
+
MPS: ge_tensor_out_mps
|
6616
7152
|
QuantizedCPU: ge_out_quantized_cpu
|
6617
7153
|
|
6618
7154
|
- func: ge.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6659,6 +7195,7 @@
|
|
6659
7195
|
device_check: NoCheck # TensorIterator
|
6660
7196
|
dispatch:
|
6661
7197
|
CPU, CUDA: le_Scalar_out
|
7198
|
+
MPS: le_scalar_out_mps
|
6662
7199
|
QuantizedCPU: le_out_quantized_cpu
|
6663
7200
|
|
6664
7201
|
- func: le.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6674,6 +7211,7 @@
|
|
6674
7211
|
device_check: NoCheck # TensorIterator
|
6675
7212
|
dispatch:
|
6676
7213
|
CPU, CUDA: le_Tensor_out
|
7214
|
+
MPS: le_tensor_out_mps
|
6677
7215
|
QuantizedCPU: le_out_quantized_cpu
|
6678
7216
|
|
6679
7217
|
- func: le.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6720,6 +7258,7 @@
|
|
6720
7258
|
device_check: NoCheck # TensorIterator
|
6721
7259
|
dispatch:
|
6722
7260
|
CPU, CUDA: gt_Scalar_out
|
7261
|
+
MPS: gt_scalar_out_mps
|
6723
7262
|
QuantizedCPU: gt_out_quantized_cpu
|
6724
7263
|
|
6725
7264
|
- func: gt.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6735,6 +7274,7 @@
|
|
6735
7274
|
device_check: NoCheck # TensorIterator
|
6736
7275
|
dispatch:
|
6737
7276
|
CPU, CUDA: gt_Tensor_out
|
7277
|
+
MPS: gt_tensor_out_mps
|
6738
7278
|
QuantizedCPU: gt_out_quantized_cpu
|
6739
7279
|
|
6740
7280
|
- func: gt.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6781,6 +7321,7 @@
|
|
6781
7321
|
device_check: NoCheck # TensorIterator
|
6782
7322
|
dispatch:
|
6783
7323
|
CPU, CUDA: lt_Scalar_out
|
7324
|
+
MPS: lt_scalar_out_mps
|
6784
7325
|
QuantizedCPU: lt_out_quantized_cpu
|
6785
7326
|
|
6786
7327
|
- func: lt.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6796,6 +7337,7 @@
|
|
6796
7337
|
device_check: NoCheck # TensorIterator
|
6797
7338
|
dispatch:
|
6798
7339
|
CPU, CUDA: lt_Tensor_out
|
7340
|
+
MPS: lt_tensor_out_mps
|
6799
7341
|
QuantizedCPU: lt_out_quantized_cpu
|
6800
7342
|
|
6801
7343
|
- func: lt.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6854,15 +7396,18 @@
|
|
6854
7396
|
dispatch:
|
6855
7397
|
CPU, QuantizedCPU: index_select_out_cpu_
|
6856
7398
|
CUDA, QuantizedCUDA: index_select_out_cuda
|
7399
|
+
MPS: index_select_out_mps
|
6857
7400
|
|
6858
7401
|
- func: index_select(Tensor self, int dim, Tensor index) -> Tensor
|
6859
7402
|
variants: method, function
|
6860
7403
|
dispatch:
|
6861
7404
|
CPU: index_select_cpu_
|
6862
7405
|
QuantizedCPU: index_select_quantized_cpu_
|
6863
|
-
CUDA
|
6864
|
-
|
6865
|
-
|
7406
|
+
CUDA: index_select_cuda
|
7407
|
+
QuantizedCUDA: index_select_quantized_cuda
|
7408
|
+
SparseCPU: index_select_sparse_cpu
|
7409
|
+
SparseCUDA: index_select_sparse_cuda
|
7410
|
+
MPS: index_select_mps
|
6866
7411
|
|
6867
7412
|
- func: index_select.dimname_out(Tensor self, Dimname dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!)
|
6868
7413
|
|
@@ -6911,6 +7456,7 @@
|
|
6911
7456
|
structured: True
|
6912
7457
|
dispatch:
|
6913
7458
|
CPU, CUDA: gather_out
|
7459
|
+
MPS: gather_out_mps
|
6914
7460
|
|
6915
7461
|
- func: gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor
|
6916
7462
|
variants: method, function
|
@@ -6934,6 +7480,7 @@
|
|
6934
7480
|
device_check: NoCheck # TensorIterator
|
6935
7481
|
dispatch:
|
6936
7482
|
CPU, CUDA: addcmul_out
|
7483
|
+
MPS: addcmul_out_mps
|
6937
7484
|
|
6938
7485
|
- func: addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
|
6939
7486
|
structured_delegate: addcmul.out
|
@@ -6951,6 +7498,7 @@
|
|
6951
7498
|
device_check: NoCheck # TensorIterator
|
6952
7499
|
dispatch:
|
6953
7500
|
CPU, CUDA: addcdiv_out
|
7501
|
+
MPS: addcdiv_out_mps
|
6954
7502
|
|
6955
7503
|
- func: addcdiv(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
|
6956
7504
|
structured_delegate: addcdiv.out
|
@@ -6998,10 +7546,13 @@
|
|
6998
7546
|
|
6999
7547
|
- func: linalg_solve_triangular(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False) -> Tensor
|
7000
7548
|
python_module: linalg
|
7001
|
-
variants:
|
7549
|
+
variants: function
|
7002
7550
|
dispatch:
|
7003
7551
|
CPU, CUDA: linalg_solve_triangular
|
7004
7552
|
|
7553
|
+
- func: linalg_vander(Tensor x, *, int? N=None) -> Tensor
|
7554
|
+
python_module: linalg
|
7555
|
+
|
7005
7556
|
- func: symeig.e(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)
|
7006
7557
|
dispatch:
|
7007
7558
|
CompositeExplicitAutograd: symeig_out
|
@@ -7079,21 +7630,6 @@
|
|
7079
7630
|
CPU: _cholesky_solve_helper_cpu
|
7080
7631
|
CUDA: _cholesky_solve_helper_cuda
|
7081
7632
|
|
7082
|
-
- func: solve(Tensor self, Tensor A) -> (Tensor solution, Tensor LU)
|
7083
|
-
variants: function, method
|
7084
|
-
dispatch:
|
7085
|
-
CompositeExplicitAutograd: solve
|
7086
|
-
|
7087
|
-
- func: solve.solution(Tensor self, Tensor A, *, Tensor(a!) solution, Tensor(b!) lu) -> (Tensor(a!) solution, Tensor(b!) LU)
|
7088
|
-
dispatch:
|
7089
|
-
CompositeExplicitAutograd: solve_out
|
7090
|
-
|
7091
|
-
- func: _solve_helper(Tensor self, Tensor A) -> (Tensor, Tensor)
|
7092
|
-
variants: function
|
7093
|
-
dispatch:
|
7094
|
-
CPU: _solve_helper_cpu
|
7095
|
-
CUDA: _solve_helper_cuda
|
7096
|
-
|
7097
7633
|
- func: cholesky_inverse(Tensor self, bool upper=False) -> Tensor
|
7098
7634
|
variants: method, function
|
7099
7635
|
dispatch:
|
@@ -7144,13 +7680,14 @@
|
|
7144
7680
|
dispatch:
|
7145
7681
|
CPU, CUDA: lu_solve
|
7146
7682
|
|
7683
|
+
# lu_unpack
|
7147
7684
|
- func: lu_unpack(Tensor LU_data, Tensor LU_pivots, bool unpack_data=True, bool unpack_pivots=True) -> (Tensor P, Tensor L, Tensor U)
|
7685
|
+
structured_delegate: lu_unpack.out
|
7148
7686
|
variants: function
|
7149
|
-
dispatch:
|
7150
|
-
CPU, CUDA: lu_unpack
|
7151
7687
|
|
7152
7688
|
- func: lu_unpack.out(Tensor LU_data, Tensor LU_pivots, bool unpack_data=True, bool unpack_pivots=True, *, Tensor(a!) P, Tensor(b!) L, Tensor(c!) U) -> (Tensor(a!) P, Tensor(b!) L, Tensor(c!) U)
|
7153
7689
|
variants: function
|
7690
|
+
structured: True
|
7154
7691
|
dispatch:
|
7155
7692
|
CPU, CUDA: lu_unpack_out
|
7156
7693
|
|
@@ -7274,6 +7811,7 @@
|
|
7274
7811
|
structured_inherits: TensorIteratorBase
|
7275
7812
|
dispatch:
|
7276
7813
|
CPU, CUDA: sign_out
|
7814
|
+
MPS: sign_out_mps
|
7277
7815
|
SparseCPU, SparseCUDA: sign_sparse_out
|
7278
7816
|
SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_out
|
7279
7817
|
|
@@ -7305,6 +7843,7 @@
|
|
7305
7843
|
structured_inherits: TensorIteratorBase
|
7306
7844
|
dispatch:
|
7307
7845
|
CPU, CUDA: atan2_out
|
7846
|
+
MPS: atan2_mps_out
|
7308
7847
|
|
7309
7848
|
- func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
7310
7849
|
device_check: NoCheck # TensorIterator
|
@@ -7391,6 +7930,12 @@
|
|
7391
7930
|
dispatch:
|
7392
7931
|
CPU: histogramdd_cpu
|
7393
7932
|
|
7933
|
+
- func: histogramdd(Tensor self, int[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)
|
7934
|
+
|
7935
|
+
- func: histogramdd.int_bins(Tensor self, int bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)
|
7936
|
+
|
7937
|
+
- func: histogramdd.TensorList_bins(Tensor self, Tensor[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)
|
7938
|
+
|
7394
7939
|
- func: fmod.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
7395
7940
|
device_check: NoCheck # TensorIterator
|
7396
7941
|
dispatch:
|
@@ -7528,6 +8073,7 @@
|
|
7528
8073
|
variants: method, function
|
7529
8074
|
dispatch:
|
7530
8075
|
CPU, CUDA: min
|
8076
|
+
MPS: min_mps
|
7531
8077
|
QuantizedCPU: min_quantized_cpu
|
7532
8078
|
|
7533
8079
|
- func: fmin(Tensor self, Tensor other) -> Tensor
|
@@ -7547,6 +8093,7 @@
|
|
7547
8093
|
variants: method, function
|
7548
8094
|
dispatch:
|
7549
8095
|
CPU, CUDA: max
|
8096
|
+
MPS: max_mps
|
7550
8097
|
QuantizedCPU: max_quantized_cpu
|
7551
8098
|
|
7552
8099
|
- func: fmax(Tensor self, Tensor other) -> Tensor
|
@@ -7572,6 +8119,7 @@
|
|
7572
8119
|
device_check: NoCheck # TensorIterator
|
7573
8120
|
dispatch:
|
7574
8121
|
CPU, CUDA: maximum_out
|
8122
|
+
MPS: maximum_out_mps
|
7575
8123
|
|
7576
8124
|
# binary max, alias of maximum
|
7577
8125
|
# NOTE: max is not an alias for maximum, since there is also unary max
|
@@ -7593,6 +8141,7 @@
|
|
7593
8141
|
device_check: NoCheck # TensorIterator
|
7594
8142
|
dispatch:
|
7595
8143
|
CPU, CUDA: minimum_out
|
8144
|
+
MPS: minimum_out_mps
|
7596
8145
|
|
7597
8146
|
# binary min, alias for minimum
|
7598
8147
|
# NOTE: min is not an alias for minimum, since there is also unary min
|
@@ -7626,27 +8175,23 @@
|
|
7626
8175
|
- func: sort.values(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
7627
8176
|
device_check: NoCheck # TensorIterator
|
7628
8177
|
dispatch:
|
7629
|
-
|
7630
|
-
CUDA: sort_out_cuda
|
8178
|
+
CompositeExplicitAutograd: sort_out
|
7631
8179
|
|
7632
8180
|
- func: sort.values_stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
8181
|
+
structured: True
|
7633
8182
|
dispatch:
|
7634
|
-
CPU:
|
7635
|
-
CUDA: sort_out_stable_cuda
|
8183
|
+
CPU, CUDA: sort_stable_out
|
7636
8184
|
|
7637
8185
|
- func: sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)
|
7638
8186
|
device_check: NoCheck # TensorIterator
|
7639
8187
|
variants: method, function
|
7640
8188
|
dispatch:
|
7641
|
-
|
7642
|
-
CUDA: sort_cuda
|
7643
|
-
QuantizedCPU: sort_quantized_cpu
|
8189
|
+
CompositeExplicitAutograd: sort
|
7644
8190
|
|
7645
8191
|
- func: sort.stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)
|
8192
|
+
structured_delegate: sort.values_stable
|
7646
8193
|
variants: method, function
|
7647
8194
|
dispatch:
|
7648
|
-
CPU: sort_cpu_stable
|
7649
|
-
CUDA: sort_stable_cuda
|
7650
8195
|
QuantizedCPU: sort_quantized_cpu_stable
|
7651
8196
|
|
7652
8197
|
- func: sort.dimname_values(Tensor self, Dimname dim, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
@@ -7676,6 +8221,7 @@
|
|
7676
8221
|
dispatch:
|
7677
8222
|
CPU: topk_out_cpu
|
7678
8223
|
CUDA: topk_out_cuda
|
8224
|
+
MPS: topk_out_mps
|
7679
8225
|
|
7680
8226
|
- func: topk(Tensor self, int k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices)
|
7681
8227
|
variants: method, function
|
@@ -7693,6 +8239,7 @@
|
|
7693
8239
|
structured: True
|
7694
8240
|
dispatch:
|
7695
8241
|
CPU, CUDA: all_all_out
|
8242
|
+
MPS: all_all_out_mps
|
7696
8243
|
|
7697
8244
|
- func: any(Tensor self) -> Tensor
|
7698
8245
|
device_check: NoCheck # TensorIterator
|
@@ -7706,6 +8253,7 @@
|
|
7706
8253
|
structured: True
|
7707
8254
|
dispatch:
|
7708
8255
|
CPU, CUDA: any_all_out
|
8256
|
+
MPS: any_all_out_mps
|
7709
8257
|
|
7710
8258
|
- func: renorm.out(Tensor self, Scalar p, int dim, Scalar maxnorm, *, Tensor(a!) out) -> Tensor(a!)
|
7711
8259
|
device_check: NoCheck # TensorIterator
|
@@ -7728,7 +8276,7 @@
|
|
7728
8276
|
device_check: NoCheck
|
7729
8277
|
device_guard: False
|
7730
8278
|
dispatch:
|
7731
|
-
CPU, CUDA: unfold
|
8279
|
+
CPU, CUDA, Meta: unfold
|
7732
8280
|
QuantizedCPU, QuantizedCUDA: unfold
|
7733
8281
|
|
7734
8282
|
- func: unfold_backward(Tensor grad_in, int[] input_sizes, int dim, int size, int step) -> Tensor
|
@@ -7749,6 +8297,7 @@
|
|
7749
8297
|
structured_inherits: TensorIteratorBase
|
7750
8298
|
dispatch:
|
7751
8299
|
CPU, CUDA: pow_Tensor_Tensor_out
|
8300
|
+
MPS: pow_tensor_tensor_out_mps
|
7752
8301
|
|
7753
8302
|
- func: pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor
|
7754
8303
|
device_check: NoCheck # TensorIterator
|
@@ -7772,6 +8321,7 @@
|
|
7772
8321
|
dispatch:
|
7773
8322
|
CPU, CUDA: pow_Tensor_Scalar_out
|
7774
8323
|
SparseCPU, SparseCUDA: pow_out_sparse_scalar
|
8324
|
+
MPS: pow_tensor_scalar_out_mps
|
7775
8325
|
|
7776
8326
|
- func: pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor
|
7777
8327
|
device_check: NoCheck # TensorIterator
|
@@ -7815,32 +8365,46 @@
|
|
7815
8365
|
variants: method
|
7816
8366
|
dispatch:
|
7817
8367
|
CPU, CUDA: normal_
|
8368
|
+
MPS: normal_mps_
|
7818
8369
|
Meta: normal_meta_
|
7819
8370
|
SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
|
8371
|
+
autogen: normal.functional, normal.out
|
7820
8372
|
|
7821
8373
|
- func: normal.Tensor_float_out(Tensor mean, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
|
7822
8374
|
dispatch:
|
7823
8375
|
CPU, CUDA: normal_out
|
8376
|
+
MPS: normal_mps_out
|
8377
|
+
Meta: normal_out_meta
|
7824
8378
|
|
7825
8379
|
- func: normal.Tensor_float(Tensor mean, float std=1, *, Generator? generator=None) -> Tensor
|
7826
8380
|
dispatch:
|
7827
8381
|
CPU, CUDA: normal
|
8382
|
+
#MPS: normal_mps
|
8383
|
+
Meta: normal_meta
|
7828
8384
|
|
7829
8385
|
- func: normal.float_Tensor_out(float mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
|
7830
8386
|
dispatch:
|
7831
8387
|
CPU, CUDA: normal_out
|
8388
|
+
Meta: normal_out_meta
|
8389
|
+
MPS: normal_mps_out
|
7832
8390
|
|
7833
8391
|
- func: normal.float_Tensor(float mean, Tensor std, *, Generator? generator=None) -> Tensor
|
7834
8392
|
dispatch:
|
7835
8393
|
CPU, CUDA: normal
|
8394
|
+
Meta: normal_meta
|
8395
|
+
#MPS: normal_mps
|
7836
8396
|
|
7837
8397
|
- func: normal.Tensor_Tensor_out(Tensor mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
|
7838
8398
|
dispatch:
|
7839
8399
|
CPU, CUDA: normal_out
|
8400
|
+
Meta: normal_out_meta
|
8401
|
+
MPS: normal_mps_out
|
7840
8402
|
|
7841
8403
|
- func: normal.Tensor_Tensor(Tensor mean, Tensor std, *, Generator? generator=None) -> Tensor
|
7842
8404
|
dispatch:
|
7843
8405
|
CPU, CUDA: normal
|
8406
|
+
Meta: normal_meta
|
8407
|
+
#MPS: normal_mps
|
7844
8408
|
|
7845
8409
|
- func: normal.float_float(float mean, float std, int[] size, *, Generator? generator=None, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
7846
8410
|
|
@@ -7851,32 +8415,30 @@
|
|
7851
8415
|
dispatch:
|
7852
8416
|
CompositeExplicitAutograd: alias
|
7853
8417
|
|
7854
|
-
- func: _index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
|
7855
|
-
dispatch:
|
7856
|
-
CPU: _index_copy_impl_
|
7857
|
-
CUDA: _index_copy_impl_
|
7858
|
-
|
7859
8418
|
- func: _amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
|
7860
8419
|
variants: function
|
7861
8420
|
dispatch:
|
7862
8421
|
CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
|
8422
|
+
autogen: _amp_foreach_non_finite_check_and_unscale.functional, _amp_foreach_non_finite_check_and_unscale.out
|
7863
8423
|
|
7864
8424
|
- func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
|
7865
8425
|
variants: function
|
7866
8426
|
dispatch:
|
7867
8427
|
CUDA: _amp_update_scale_cuda_
|
8428
|
+
autogen: _amp_update_scale.functional, _amp_update_scale.out
|
7868
8429
|
|
7869
|
-
|
7870
|
-
dispatch:
|
7871
|
-
CPU: _cat_cpu
|
7872
|
-
CUDA: cat_cuda
|
7873
|
-
|
8430
|
+
#- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
|
8431
|
+
#dispatch:
|
8432
|
+
#CPU: _cat_cpu
|
8433
|
+
#CUDA: cat_cuda
|
8434
|
+
#MPS: cat_mps
|
8435
|
+
#QuantizedCPU: cat_quantized_cpu
|
7874
8436
|
|
7875
|
-
|
7876
|
-
dispatch:
|
7877
|
-
CPU: _cat_out_cpu
|
7878
|
-
CUDA: cat_out_cuda
|
7879
|
-
QuantizedCPU: cat_out_quantized_cpu
|
8437
|
+
#- func: _cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
|
8438
|
+
#dispatch:
|
8439
|
+
#CPU: _cat_out_cpu
|
8440
|
+
#CUDA: cat_out_cuda
|
8441
|
+
#QuantizedCPU: cat_out_quantized_cpu
|
7880
8442
|
|
7881
8443
|
- func: _foreach_add.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
7882
8444
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7891,6 +8453,7 @@
|
|
7891
8453
|
dispatch:
|
7892
8454
|
CPU: foreach_tensor_add_scalar_kernel_slow_
|
7893
8455
|
CUDA: foreach_tensor_add_scalar_kernel_cuda_
|
8456
|
+
autogen: _foreach_add.Scalar_functional, _foreach_add.Scalar_out
|
7894
8457
|
|
7895
8458
|
- func: _foreach_sub.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
7896
8459
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7905,6 +8468,7 @@
|
|
7905
8468
|
dispatch:
|
7906
8469
|
CPU: foreach_tensor_sub_scalar_kernel_slow_
|
7907
8470
|
CUDA: foreach_tensor_sub_scalar_kernel_cuda_
|
8471
|
+
autogen: _foreach_sub.Scalar_functional, _foreach_sub.Scalar_out
|
7908
8472
|
|
7909
8473
|
- func: _foreach_mul.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
7910
8474
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7919,6 +8483,7 @@
|
|
7919
8483
|
dispatch:
|
7920
8484
|
CPU: foreach_tensor_mul_scalar_kernel_slow_
|
7921
8485
|
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
|
8486
|
+
autogen: _foreach_mul.Scalar_functional, _foreach_mul.Scalar_out
|
7922
8487
|
|
7923
8488
|
- func: _foreach_div.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
7924
8489
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7933,6 +8498,7 @@
|
|
7933
8498
|
dispatch:
|
7934
8499
|
CPU: foreach_tensor_div_scalar_kernel_slow_
|
7935
8500
|
CUDA: foreach_tensor_div_scalar_kernel_cuda_
|
8501
|
+
autogen: _foreach_div.Scalar_functional, _foreach_div.Scalar_out
|
7936
8502
|
|
7937
8503
|
- func: _foreach_add.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
|
7938
8504
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7947,6 +8513,7 @@
|
|
7947
8513
|
dispatch:
|
7948
8514
|
CPU: foreach_tensor_add_list_kernel_slow_
|
7949
8515
|
CUDA: foreach_tensor_add_list_kernel_cuda_
|
8516
|
+
autogen: _foreach_add.List_functional, _foreach_add.List_out
|
7950
8517
|
|
7951
8518
|
- func: _foreach_sub.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
|
7952
8519
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7961,6 +8528,7 @@
|
|
7961
8528
|
dispatch:
|
7962
8529
|
CPU: foreach_tensor_sub_list_kernel_slow_
|
7963
8530
|
CUDA: foreach_tensor_sub_list_kernel_cuda_
|
8531
|
+
autogen: _foreach_sub.List_functional, _foreach_sub.List_out
|
7964
8532
|
|
7965
8533
|
- func: _foreach_mul.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
|
7966
8534
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7975,6 +8543,7 @@
|
|
7975
8543
|
dispatch:
|
7976
8544
|
CPU: foreach_tensor_mul_list_kernel_slow_
|
7977
8545
|
CUDA: foreach_tensor_mul_list_kernel_cuda_
|
8546
|
+
autogen: _foreach_mul.List_functional, _foreach_mul.List_out
|
7978
8547
|
|
7979
8548
|
- func: _foreach_div.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
|
7980
8549
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7989,6 +8558,7 @@
|
|
7989
8558
|
dispatch:
|
7990
8559
|
CPU: foreach_tensor_div_list_kernel_slow_
|
7991
8560
|
CUDA: foreach_tensor_div_list_kernel_cuda_
|
8561
|
+
autogen: _foreach_div.List_functional, _foreach_div.List_out
|
7992
8562
|
|
7993
8563
|
- func: _foreach_add.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
7994
8564
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8003,6 +8573,7 @@
|
|
8003
8573
|
dispatch:
|
8004
8574
|
CPU: foreach_tensor_add_scalarlist_kernel_slow_
|
8005
8575
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
|
8576
|
+
autogen: _foreach_add.ScalarList_functional, _foreach_add.ScalarList_out
|
8006
8577
|
|
8007
8578
|
- func: _foreach_sub.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
8008
8579
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8017,6 +8588,7 @@
|
|
8017
8588
|
dispatch:
|
8018
8589
|
CPU: foreach_tensor_sub_scalarlist_kernel_slow_
|
8019
8590
|
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
|
8591
|
+
autogen: _foreach_sub.ScalarList_functional, _foreach_sub.ScalarList_out
|
8020
8592
|
|
8021
8593
|
- func: _foreach_div.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
8022
8594
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8031,6 +8603,7 @@
|
|
8031
8603
|
dispatch:
|
8032
8604
|
CPU: foreach_tensor_div_scalarlist_kernel_slow_
|
8033
8605
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
|
8606
|
+
autogen: _foreach_div.ScalarList_functional, _foreach_div.ScalarList_out
|
8034
8607
|
|
8035
8608
|
- func: _foreach_mul.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
8036
8609
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8045,6 +8618,7 @@
|
|
8045
8618
|
dispatch:
|
8046
8619
|
CPU: foreach_tensor_mul_scalarlist_kernel_slow_
|
8047
8620
|
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
|
8621
|
+
autogen: _foreach_mul.ScalarList_functional, _foreach_mul.ScalarList_out
|
8048
8622
|
|
8049
8623
|
- func: _foreach_exp(Tensor[] tensors) -> Tensor[]
|
8050
8624
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8059,6 +8633,7 @@
|
|
8059
8633
|
dispatch:
|
8060
8634
|
CPU: foreach_tensor_zero_slow_
|
8061
8635
|
CUDA: foreach_tensor_zero_cuda_
|
8636
|
+
autogen: _foreach_zero.functional, _foreach_zero.out
|
8062
8637
|
|
8063
8638
|
- func: _foreach_exp_(Tensor(a!)[] self) -> ()
|
8064
8639
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8066,6 +8641,7 @@
|
|
8066
8641
|
dispatch:
|
8067
8642
|
CPU: foreach_tensor_exp_slow_
|
8068
8643
|
CUDA: foreach_tensor_exp_cuda_
|
8644
|
+
autogen: _foreach_exp.functional, _foreach_exp.out
|
8069
8645
|
|
8070
8646
|
- func: _foreach_sqrt(Tensor[] tensors) -> Tensor[]
|
8071
8647
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8080,6 +8656,7 @@
|
|
8080
8656
|
dispatch:
|
8081
8657
|
CPU: foreach_tensor_sqrt_slow_
|
8082
8658
|
CUDA: foreach_tensor_sqrt_cuda_
|
8659
|
+
autogen: _foreach_sqrt.functional, _foreach_sqrt.out
|
8083
8660
|
|
8084
8661
|
- func: _foreach_abs(Tensor[] tensors) -> Tensor[]
|
8085
8662
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8094,6 +8671,7 @@
|
|
8094
8671
|
dispatch:
|
8095
8672
|
CPU: foreach_tensor_abs_slow_
|
8096
8673
|
CUDA: foreach_tensor_abs_cuda_
|
8674
|
+
autogen: _foreach_abs.functional, _foreach_abs.out
|
8097
8675
|
|
8098
8676
|
- func: _foreach_acos(Tensor[] tensors) -> Tensor[]
|
8099
8677
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8108,6 +8686,7 @@
|
|
8108
8686
|
dispatch:
|
8109
8687
|
CPU: foreach_tensor_acos_slow_
|
8110
8688
|
CUDA: foreach_tensor_acos_cuda_
|
8689
|
+
autogen: _foreach_acos.functional, _foreach_acos.out
|
8111
8690
|
|
8112
8691
|
- func: _foreach_asin(Tensor[] tensors) -> Tensor[]
|
8113
8692
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8122,6 +8701,7 @@
|
|
8122
8701
|
dispatch:
|
8123
8702
|
CPU: foreach_tensor_asin_slow_
|
8124
8703
|
CUDA: foreach_tensor_asin_cuda_
|
8704
|
+
autogen: _foreach_asin.functional, _foreach_asin.out
|
8125
8705
|
|
8126
8706
|
- func: _foreach_atan(Tensor[] tensors) -> Tensor[]
|
8127
8707
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8136,6 +8716,7 @@
|
|
8136
8716
|
dispatch:
|
8137
8717
|
CPU: foreach_tensor_atan_slow_
|
8138
8718
|
CUDA: foreach_tensor_atan_cuda_
|
8719
|
+
autogen: _foreach_atan.functional, _foreach_atan.out
|
8139
8720
|
|
8140
8721
|
- func: _foreach_ceil(Tensor[] tensors) -> Tensor[]
|
8141
8722
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8150,6 +8731,7 @@
|
|
8150
8731
|
dispatch:
|
8151
8732
|
CPU: foreach_tensor_ceil_slow_
|
8152
8733
|
CUDA: foreach_tensor_ceil_cuda_
|
8734
|
+
autogen: _foreach_ceil.functional, _foreach_ceil.out
|
8153
8735
|
|
8154
8736
|
- func: _foreach_cos(Tensor[] tensors) -> Tensor[]
|
8155
8737
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8164,6 +8746,7 @@
|
|
8164
8746
|
dispatch:
|
8165
8747
|
CPU: foreach_tensor_cos_slow_
|
8166
8748
|
CUDA: foreach_tensor_cos_cuda_
|
8749
|
+
autogen: _foreach_cos.functional, _foreach_cos.out
|
8167
8750
|
|
8168
8751
|
- func: _foreach_cosh(Tensor[] tensors) -> Tensor[]
|
8169
8752
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8178,6 +8761,7 @@
|
|
8178
8761
|
dispatch:
|
8179
8762
|
CPU: foreach_tensor_cosh_slow_
|
8180
8763
|
CUDA: foreach_tensor_cosh_cuda_
|
8764
|
+
autogen: _foreach_cosh.functional, _foreach_cosh.out
|
8181
8765
|
|
8182
8766
|
- func: _foreach_erf(Tensor[] tensors) -> Tensor[]
|
8183
8767
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8192,6 +8776,7 @@
|
|
8192
8776
|
dispatch:
|
8193
8777
|
CPU: foreach_tensor_erf_slow_
|
8194
8778
|
CUDA: foreach_tensor_erf_cuda_
|
8779
|
+
autogen: _foreach_erf.functional, _foreach_erf.out
|
8195
8780
|
|
8196
8781
|
- func: _foreach_erfc(Tensor[] tensors) -> Tensor[]
|
8197
8782
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8206,6 +8791,7 @@
|
|
8206
8791
|
dispatch:
|
8207
8792
|
CPU: foreach_tensor_erfc_slow_
|
8208
8793
|
CUDA: foreach_tensor_erfc_cuda_
|
8794
|
+
autogen: _foreach_erfc.functional, _foreach_erfc.out
|
8209
8795
|
|
8210
8796
|
- func: _foreach_expm1(Tensor[] tensors) -> Tensor[]
|
8211
8797
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8220,6 +8806,7 @@
|
|
8220
8806
|
dispatch:
|
8221
8807
|
CPU: foreach_tensor_expm1_slow_
|
8222
8808
|
CUDA: foreach_tensor_expm1_cuda_
|
8809
|
+
autogen: _foreach_expm1.functional, _foreach_expm1.out
|
8223
8810
|
|
8224
8811
|
- func: _foreach_floor(Tensor[] tensors) -> Tensor[]
|
8225
8812
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8234,6 +8821,7 @@
|
|
8234
8821
|
dispatch:
|
8235
8822
|
CPU: foreach_tensor_floor_slow_
|
8236
8823
|
CUDA: foreach_tensor_floor_cuda_
|
8824
|
+
autogen: _foreach_floor.functional, _foreach_floor.out
|
8237
8825
|
|
8238
8826
|
- func: _foreach_log(Tensor[] tensors) -> Tensor[]
|
8239
8827
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8248,6 +8836,7 @@
|
|
8248
8836
|
dispatch:
|
8249
8837
|
CPU: foreach_tensor_log_slow_
|
8250
8838
|
CUDA: foreach_tensor_log_cuda_
|
8839
|
+
autogen: _foreach_log.functional, _foreach_log.out
|
8251
8840
|
|
8252
8841
|
- func: _foreach_log10(Tensor[] tensors) -> Tensor[]
|
8253
8842
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8262,6 +8851,7 @@
|
|
8262
8851
|
dispatch:
|
8263
8852
|
CPU: foreach_tensor_log10_slow_
|
8264
8853
|
CUDA: foreach_tensor_log10_cuda_
|
8854
|
+
autogen: _foreach_log10.functional, _foreach_log10.out
|
8265
8855
|
|
8266
8856
|
- func: _foreach_log1p(Tensor[] tensors) -> Tensor[]
|
8267
8857
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8276,6 +8866,7 @@
|
|
8276
8866
|
dispatch:
|
8277
8867
|
CPU: foreach_tensor_log1p_slow_
|
8278
8868
|
CUDA: foreach_tensor_log1p_cuda_
|
8869
|
+
autogen: _foreach_log1p.functional, _foreach_log1p.out
|
8279
8870
|
|
8280
8871
|
- func: _foreach_log2(Tensor[] tensors) -> Tensor[]
|
8281
8872
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8290,6 +8881,7 @@
|
|
8290
8881
|
dispatch:
|
8291
8882
|
CPU: foreach_tensor_log2_slow_
|
8292
8883
|
CUDA: foreach_tensor_log2_cuda_
|
8884
|
+
autogen: _foreach_log2.functional, _foreach_log2.out
|
8293
8885
|
|
8294
8886
|
- func: _foreach_neg(Tensor[] tensors) -> Tensor[]
|
8295
8887
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8304,6 +8896,7 @@
|
|
8304
8896
|
dispatch:
|
8305
8897
|
CPU: foreach_tensor_neg_slow_
|
8306
8898
|
CUDA: foreach_tensor_neg_cuda_
|
8899
|
+
autogen: _foreach_neg.functional, _foreach_neg.out
|
8307
8900
|
|
8308
8901
|
- func: _foreach_tan(Tensor[] tensors) -> Tensor[]
|
8309
8902
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8318,6 +8911,7 @@
|
|
8318
8911
|
dispatch:
|
8319
8912
|
CPU: foreach_tensor_tan_slow_
|
8320
8913
|
CUDA: foreach_tensor_tan_cuda_
|
8914
|
+
autogen: _foreach_tan.functional, _foreach_tan.out
|
8321
8915
|
|
8322
8916
|
- func: _foreach_tanh(Tensor[] tensors) -> Tensor[]
|
8323
8917
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8332,6 +8926,7 @@
|
|
8332
8926
|
dispatch:
|
8333
8927
|
CPU: foreach_tensor_tanh_slow_
|
8334
8928
|
CUDA: foreach_tensor_tanh_cuda_
|
8929
|
+
autogen: _foreach_tanh.functional, _foreach_tanh.out
|
8335
8930
|
|
8336
8931
|
- func: _foreach_sin(Tensor[] tensors) -> Tensor[]
|
8337
8932
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8346,6 +8941,7 @@
|
|
8346
8941
|
dispatch:
|
8347
8942
|
CPU: foreach_tensor_sin_slow_
|
8348
8943
|
CUDA: foreach_tensor_sin_cuda_
|
8944
|
+
autogen: _foreach_sin.functional, _foreach_sin.out
|
8349
8945
|
|
8350
8946
|
- func: _foreach_sinh(Tensor[] tensors) -> Tensor[]
|
8351
8947
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8360,6 +8956,7 @@
|
|
8360
8956
|
dispatch:
|
8361
8957
|
CPU: foreach_tensor_sinh_slow_
|
8362
8958
|
CUDA: foreach_tensor_sinh_cuda_
|
8959
|
+
autogen: _foreach_sinh.functional, _foreach_sinh.out
|
8363
8960
|
|
8364
8961
|
- func: _foreach_round(Tensor[] tensors) -> Tensor[]
|
8365
8962
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8374,6 +8971,7 @@
|
|
8374
8971
|
dispatch:
|
8375
8972
|
CPU: foreach_tensor_round_slow_
|
8376
8973
|
CUDA: foreach_tensor_round_cuda_
|
8974
|
+
autogen: _foreach_round.functional, _foreach_round.out
|
8377
8975
|
|
8378
8976
|
- func: _foreach_lgamma(Tensor[] tensors) -> Tensor[]
|
8379
8977
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8388,6 +8986,7 @@
|
|
8388
8986
|
dispatch:
|
8389
8987
|
CPU: foreach_tensor_lgamma_slow_
|
8390
8988
|
CUDA: foreach_tensor_lgamma_cuda_
|
8989
|
+
autogen: _foreach_lgamma.functional, _foreach_lgamma.out
|
8391
8990
|
|
8392
8991
|
- func: _foreach_frac(Tensor[] tensors) -> Tensor[]
|
8393
8992
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8402,6 +9001,7 @@
|
|
8402
9001
|
dispatch:
|
8403
9002
|
CPU: foreach_tensor_frac_slow_
|
8404
9003
|
CUDA: foreach_tensor_frac_cuda_
|
9004
|
+
autogen: _foreach_frac.functional, _foreach_frac.out
|
8405
9005
|
|
8406
9006
|
- func: _foreach_reciprocal(Tensor[] tensors) -> Tensor[]
|
8407
9007
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8416,6 +9016,7 @@
|
|
8416
9016
|
dispatch:
|
8417
9017
|
CPU: foreach_tensor_reciprocal_slow_
|
8418
9018
|
CUDA: foreach_tensor_reciprocal_cuda_
|
9019
|
+
autogen: _foreach_reciprocal.functional, _foreach_reciprocal.out
|
8419
9020
|
|
8420
9021
|
- func: _foreach_sigmoid(Tensor[] tensors) -> Tensor[]
|
8421
9022
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8430,6 +9031,7 @@
|
|
8430
9031
|
dispatch:
|
8431
9032
|
CPU: foreach_tensor_sigmoid_slow_
|
8432
9033
|
CUDA: foreach_tensor_sigmoid_cuda_
|
9034
|
+
autogen: _foreach_sigmoid.functional, _foreach_sigmoid.out
|
8433
9035
|
|
8434
9036
|
- func: _foreach_trunc(Tensor[] tensors) -> Tensor[]
|
8435
9037
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8444,6 +9046,7 @@
|
|
8444
9046
|
dispatch:
|
8445
9047
|
CPU: foreach_tensor_trunc_slow_
|
8446
9048
|
CUDA: foreach_tensor_trunc_cuda_
|
9049
|
+
autogen: _foreach_trunc.functional, _foreach_trunc.out
|
8447
9050
|
|
8448
9051
|
- func: _foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
8449
9052
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8451,6 +9054,7 @@
|
|
8451
9054
|
dispatch:
|
8452
9055
|
CPU: foreach_tensor_addcdiv_scalar_slow_
|
8453
9056
|
CUDA: foreach_tensor_addcdiv_scalar_cuda_
|
9057
|
+
autogen: _foreach_addcdiv.Scalar_functional, _foreach_addcdiv.Scalar_out
|
8454
9058
|
|
8455
9059
|
- func: _foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
8456
9060
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8458,6 +9062,7 @@
|
|
8458
9062
|
dispatch:
|
8459
9063
|
CPU: foreach_tensor_addcmul_scalar_slow_
|
8460
9064
|
CUDA: foreach_tensor_addcmul_scalar_cuda_
|
9065
|
+
autogen: _foreach_addcmul.Scalar_functional, _foreach_addcmul.Scalar_out
|
8461
9066
|
|
8462
9067
|
- func: _foreach_addcdiv_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
|
8463
9068
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8465,6 +9070,7 @@
|
|
8465
9070
|
dispatch:
|
8466
9071
|
CPU: foreach_tensor_addcdiv_scalarlist_slow_
|
8467
9072
|
CUDA: foreach_tensor_addcdiv_scalarlist_cuda_
|
9073
|
+
autogen: _foreach_addcdiv.ScalarList_functional, _foreach_addcdiv.ScalarList_out
|
8468
9074
|
|
8469
9075
|
- func: _foreach_addcmul_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
|
8470
9076
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8472,6 +9078,7 @@
|
|
8472
9078
|
dispatch:
|
8473
9079
|
CPU: foreach_tensor_addcmul_scalarlist_slow_
|
8474
9080
|
CUDA: foreach_tensor_addcmul_scalarlist_cuda_
|
9081
|
+
autogen: _foreach_addcmul.ScalarList_functional, _foreach_addcmul.ScalarList_out
|
8475
9082
|
|
8476
9083
|
- func: _foreach_addcdiv.Scalar(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
|
8477
9084
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8584,25 +9191,29 @@
|
|
8584
9191
|
|
8585
9192
|
- func: mse_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
|
8586
9193
|
device_check: NoCheck # TensorIterator
|
9194
|
+
structured: True
|
9195
|
+
structured_inherits: TensorIteratorBase
|
8587
9196
|
python_module: nn
|
8588
9197
|
dispatch:
|
8589
9198
|
CPU, CUDA: mse_loss_out
|
9199
|
+
MPS: mse_loss_out_mps
|
8590
9200
|
|
8591
9201
|
- func: mse_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor
|
8592
9202
|
device_check: NoCheck # TensorIterator
|
9203
|
+
structured_delegate: mse_loss.out
|
8593
9204
|
python_module: nn
|
8594
|
-
dispatch:
|
8595
|
-
CPU, CUDA: mse_loss
|
8596
9205
|
|
8597
9206
|
- func: mse_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, *, Tensor(a!) grad_input) -> Tensor(a!)
|
8598
9207
|
python_module: nn
|
8599
9208
|
dispatch:
|
8600
9209
|
CPU, CUDA: mse_loss_backward_out
|
9210
|
+
MPS: mse_loss_backward_out_mps
|
8601
9211
|
|
8602
9212
|
- func: mse_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor
|
8603
9213
|
python_module: nn
|
8604
9214
|
dispatch:
|
8605
9215
|
CPU, CUDA: mse_loss_backward
|
9216
|
+
MPS: mse_loss_backward_mps
|
8606
9217
|
|
8607
9218
|
- func: l1_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
|
8608
9219
|
python_module: nn
|
@@ -8693,6 +9304,7 @@
|
|
8693
9304
|
dispatch:
|
8694
9305
|
CPU: nll_loss_forward_out_cpu
|
8695
9306
|
CUDA: nll_loss_forward_out_cuda
|
9307
|
+
MPS: nll_loss_forward_out_mps
|
8696
9308
|
|
8697
9309
|
- func: nll_loss_forward(Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index) -> (Tensor output, Tensor total_weight)
|
8698
9310
|
python_module: nn
|
@@ -8704,6 +9316,7 @@
|
|
8704
9316
|
dispatch:
|
8705
9317
|
CPU: nll_loss_backward_out_cpu
|
8706
9318
|
CUDA: nll_loss_backward_out_cuda
|
9319
|
+
MPS: nll_loss_backward_out_mps
|
8707
9320
|
|
8708
9321
|
- func: nll_loss_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, Tensor total_weight) -> Tensor
|
8709
9322
|
python_module: nn
|
@@ -8720,24 +9333,28 @@
|
|
8720
9333
|
dispatch:
|
8721
9334
|
CPU: nll_loss2d_forward_out_cpu
|
8722
9335
|
CUDA: nll_loss2d_forward_out_cuda
|
9336
|
+
MPS: nll_loss2d_forward_out_mps
|
8723
9337
|
|
8724
9338
|
- func: nll_loss2d_forward(Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index) -> (Tensor output, Tensor total_weight)
|
8725
9339
|
python_module: nn
|
8726
9340
|
dispatch:
|
8727
9341
|
CPU: nll_loss2d_forward_cpu
|
8728
9342
|
CUDA: nll_loss2d_forward_cuda
|
9343
|
+
MPS: nll_loss2d_forward_mps
|
8729
9344
|
|
8730
9345
|
- func: nll_loss2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, Tensor total_weight, *, Tensor(a!) grad_input) -> Tensor(a!)
|
8731
9346
|
python_module: nn
|
8732
9347
|
dispatch:
|
8733
9348
|
CPU: nll_loss2d_backward_out_cpu
|
8734
9349
|
CUDA: nll_loss2d_backward_out_cuda
|
9350
|
+
MPS: nll_loss2d_backward_out_mps
|
8735
9351
|
|
8736
9352
|
- func: nll_loss2d_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, Tensor total_weight) -> Tensor
|
8737
9353
|
python_module: nn
|
8738
9354
|
dispatch:
|
8739
9355
|
CPU: nll_loss2d_backward_cpu
|
8740
9356
|
CUDA: nll_loss2d_backward_cuda
|
9357
|
+
MPS: nll_loss2d_backward_mps
|
8741
9358
|
|
8742
9359
|
- func: smooth_l1_loss.out(Tensor self, Tensor target, int reduction=Mean, float beta=1.0, *, Tensor(a!) out) -> Tensor(a!)
|
8743
9360
|
device_check: NoCheck # TensorIterator
|
@@ -8746,6 +9363,7 @@
|
|
8746
9363
|
python_module: nn
|
8747
9364
|
dispatch:
|
8748
9365
|
CPU, CUDA: smooth_l1_loss_out
|
9366
|
+
MPS: smooth_l1_loss_out_mps
|
8749
9367
|
|
8750
9368
|
- func: smooth_l1_loss(Tensor self, Tensor target, int reduction=Mean, float beta=1.0) -> Tensor
|
8751
9369
|
device_check: NoCheck # TensorIterator
|
@@ -8757,6 +9375,7 @@
|
|
8757
9375
|
dispatch:
|
8758
9376
|
CPU: smooth_l1_loss_backward_out
|
8759
9377
|
CUDA: smooth_l1_loss_backward_out
|
9378
|
+
MPS: smooth_l1_loss_backward_out_mps
|
8760
9379
|
|
8761
9380
|
- func: smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta) -> Tensor
|
8762
9381
|
python_module: nn
|
@@ -8810,6 +9429,7 @@
|
|
8810
9429
|
python_module: nn
|
8811
9430
|
dispatch:
|
8812
9431
|
CPU, CUDA: elu_out
|
9432
|
+
MPS: elu_out_mps
|
8813
9433
|
|
8814
9434
|
- func: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor
|
8815
9435
|
structured_delegate: elu.out
|
@@ -8822,6 +9442,7 @@
|
|
8822
9442
|
python_module: nn
|
8823
9443
|
dispatch:
|
8824
9444
|
CPU, CUDA: elu_backward_out
|
9445
|
+
MPS: elu_backward_out_mps
|
8825
9446
|
|
8826
9447
|
- func: elu_backward(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result) -> Tensor
|
8827
9448
|
structured_delegate: elu_backward.grad_input
|
@@ -8858,6 +9479,16 @@
|
|
8858
9479
|
CPU: glu_backward_cpu
|
8859
9480
|
CUDA: glu_backward_cuda
|
8860
9481
|
|
9482
|
+
- func: glu_jvp(Tensor glu, Tensor x, Tensor dx, int dim) -> Tensor
|
9483
|
+
python_module: nn
|
9484
|
+
dispatch:
|
9485
|
+
CPU, CUDA: glu_jvp
|
9486
|
+
|
9487
|
+
- func: glu_backward_jvp(Tensor grad_x, Tensor grad_glu, Tensor x, Tensor dgrad_glu, Tensor dx, int dim) -> Tensor
|
9488
|
+
python_module: nn
|
9489
|
+
dispatch:
|
9490
|
+
CPU, CUDA: glu_backward_jvp
|
9491
|
+
|
8861
9492
|
- func: hardsigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
8862
9493
|
structured: True
|
8863
9494
|
structured_inherits: TensorIteratorBase
|
@@ -8894,31 +9525,33 @@
|
|
8894
9525
|
device_check: NoCheck # TensorIterator
|
8895
9526
|
python_module: nn
|
8896
9527
|
dispatch:
|
8897
|
-
CPU, CUDA: hardtanh_out
|
9528
|
+
CPU, CUDA, MPS: hardtanh_out
|
8898
9529
|
QuantizedCPU: hardtanh_out_quantized_cpu
|
8899
9530
|
|
8900
9531
|
- func: hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> Tensor
|
8901
9532
|
device_check: NoCheck # TensorIterator
|
8902
9533
|
python_module: nn
|
8903
9534
|
dispatch:
|
8904
|
-
CPU, CUDA: hardtanh
|
9535
|
+
CPU, CUDA, MPS: hardtanh
|
8905
9536
|
QuantizedCPU: hardtanh_quantized_cpu
|
8906
9537
|
|
8907
9538
|
- func: hardtanh_backward.grad_input(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val, *, Tensor(a!) grad_input) -> Tensor(a!)
|
8908
9539
|
python_module: nn
|
8909
9540
|
dispatch:
|
8910
9541
|
CPU, CUDA: hardtanh_backward_out
|
9542
|
+
MPS: hardtanh_backward_out_mps
|
8911
9543
|
|
8912
9544
|
- func: hardtanh_backward(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val) -> Tensor
|
8913
9545
|
python_module: nn
|
8914
9546
|
dispatch:
|
8915
9547
|
CPU, CUDA: hardtanh_backward
|
9548
|
+
MPS: hardtanh_backward_mps
|
8916
9549
|
|
8917
9550
|
- func: hardtanh_(Tensor(a!) self, Scalar min_val=-1, Scalar max_val=1) -> Tensor(a!)
|
8918
9551
|
device_check: NoCheck # TensorIterator
|
8919
9552
|
python_module: nn
|
8920
9553
|
dispatch:
|
8921
|
-
CPU, CUDA: hardtanh_
|
9554
|
+
CPU, CUDA, MPS: hardtanh_
|
8922
9555
|
QuantizedCPU: hardtanh_quantized_cpu_
|
8923
9556
|
|
8924
9557
|
- func: hardswish.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8951,6 +9584,7 @@
|
|
8951
9584
|
python_module: nn
|
8952
9585
|
dispatch:
|
8953
9586
|
CPU, CUDA: leaky_relu_out
|
9587
|
+
MPS: leaky_relu_out_mps
|
8954
9588
|
QuantizedCPU: leaky_relu_out_quantized_cpu
|
8955
9589
|
|
8956
9590
|
- func: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor
|
@@ -8966,6 +9600,7 @@
|
|
8966
9600
|
python_module: nn
|
8967
9601
|
dispatch:
|
8968
9602
|
CPU, CUDA: leaky_relu_backward_out
|
9603
|
+
MPS: leaky_relu_backward_out_mps
|
8969
9604
|
|
8970
9605
|
- func: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor
|
8971
9606
|
structured_delegate: leaky_relu_backward.grad_input
|
@@ -9088,6 +9723,7 @@
|
|
9088
9723
|
dispatch:
|
9089
9724
|
CPU: adaptive_avg_pool2d_out_cpu
|
9090
9725
|
CUDA: adaptive_avg_pool2d_out_cuda
|
9726
|
+
MPS: adaptive_avg_pool2d_out_mps
|
9091
9727
|
MkldnnCPU: mkldnn_adaptive_avg_pool2d_out
|
9092
9728
|
|
9093
9729
|
- func: adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor
|
@@ -9105,13 +9741,16 @@
|
|
9105
9741
|
dispatch:
|
9106
9742
|
CPU: adaptive_avg_pool2d_cpu
|
9107
9743
|
CUDA: adaptive_avg_pool2d_cuda
|
9744
|
+
MPS: adaptive_avg_pool2d_mps
|
9108
9745
|
QuantizedCPU: adaptive_avg_pool2d_quantized_cpu
|
9746
|
+
QuantizedCUDA: adaptive_avg_pool2d_quantized_cuda
|
9109
9747
|
|
9110
9748
|
- func: _adaptive_avg_pool2d_backward(Tensor grad_output, Tensor self) -> Tensor
|
9111
9749
|
python_module: nn
|
9112
9750
|
dispatch:
|
9113
9751
|
CPU: adaptive_avg_pool2d_backward_cpu
|
9114
9752
|
CUDA: adaptive_avg_pool2d_backward_cuda
|
9753
|
+
MPS: adaptive_avg_pool2d_backward_mps
|
9115
9754
|
|
9116
9755
|
- func: adaptive_avg_pool3d.out(Tensor self, int[3] output_size, *, Tensor(a!) out) -> Tensor(a!)
|
9117
9756
|
python_module: nn
|
@@ -9148,6 +9787,7 @@
|
|
9148
9787
|
dispatch:
|
9149
9788
|
CPU: adaptive_max_pool2d_out_cpu
|
9150
9789
|
CUDA: adaptive_max_pool2d_out_cuda
|
9790
|
+
MPS: adaptive_max_pool2d_out_mps
|
9151
9791
|
|
9152
9792
|
# Return: (Tensor output, Tensor indices)
|
9153
9793
|
- func: adaptive_max_pool2d(Tensor self, int[2] output_size) -> (Tensor, Tensor)
|
@@ -9160,6 +9800,7 @@
|
|
9160
9800
|
dispatch:
|
9161
9801
|
CPU: adaptive_max_pool2d_backward_out_cpu
|
9162
9802
|
CUDA: adaptive_max_pool2d_backward_out_cuda
|
9803
|
+
MPS: adaptive_max_pool2d_backward_out_mps
|
9163
9804
|
|
9164
9805
|
- func: adaptive_max_pool2d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor
|
9165
9806
|
python_module: nn
|
@@ -9199,6 +9840,7 @@
|
|
9199
9840
|
dispatch:
|
9200
9841
|
CPU: avg_pool2d_out_cpu
|
9201
9842
|
CUDA: avg_pool2d_out_cuda
|
9843
|
+
MPS: avg_pool2d_out_mps
|
9202
9844
|
MkldnnCPU: mkldnn_avg_pool2d_out
|
9203
9845
|
|
9204
9846
|
- func: avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor
|
@@ -9214,6 +9856,7 @@
|
|
9214
9856
|
dispatch:
|
9215
9857
|
CPU: avg_pool2d_backward_out_cpu
|
9216
9858
|
CUDA: avg_pool2d_backward_out_cuda
|
9859
|
+
MPS: avg_pool2d_backward_out_mps
|
9217
9860
|
MkldnnCPU: mkldnn_avg_pool2d_backward_out
|
9218
9861
|
|
9219
9862
|
- func: avg_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor
|
@@ -9282,6 +9925,7 @@
|
|
9282
9925
|
precomputed:
|
9283
9926
|
- kernel_size -> int poolSizeT, int poolSizeH, int poolSizeW
|
9284
9927
|
- output_size -> int outputT, int outputH, int outputW
|
9928
|
+
- int numBatch, int numPlanes, int inputT, int inputH, int inputW
|
9285
9929
|
dispatch:
|
9286
9930
|
CPU: fractional_max_pool3d_out_cpu
|
9287
9931
|
CUDA: fractional_max_pool3d_out_cuda
|
@@ -9310,6 +9954,7 @@
|
|
9310
9954
|
dispatch:
|
9311
9955
|
CPU: max_pool2d_with_indices_out_cpu
|
9312
9956
|
CUDA: max_pool2d_with_indices_out_cuda
|
9957
|
+
MPS: max_pool2d_with_indices_out_mps
|
9313
9958
|
|
9314
9959
|
# Return: (Tensor output, Tensor indices)
|
9315
9960
|
- func: max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
|
@@ -9322,6 +9967,7 @@
|
|
9322
9967
|
dispatch:
|
9323
9968
|
CPU: max_pool2d_with_indices_backward_out_cpu
|
9324
9969
|
CUDA: max_pool2d_with_indices_backward_out_cuda
|
9970
|
+
MPS: max_pool2d_with_indices_backward_out_mps
|
9325
9971
|
|
9326
9972
|
- func: max_pool2d_with_indices_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices) -> Tensor
|
9327
9973
|
python_module: nn
|
@@ -9365,18 +10011,6 @@
|
|
9365
10011
|
CPU: max_unpooling2d_forward_cpu
|
9366
10012
|
CUDA: max_unpooling2d_forward_cuda
|
9367
10013
|
|
9368
|
-
- func: max_unpool2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor indices, int[2] output_size, *, Tensor(a!) grad_input) -> Tensor(a!)
|
9369
|
-
python_module: nn
|
9370
|
-
dispatch:
|
9371
|
-
CPU: max_unpooling2d_backward_out_cpu
|
9372
|
-
CUDA: max_unpooling2d_backward_out_cuda
|
9373
|
-
|
9374
|
-
- func: max_unpool2d_backward(Tensor grad_output, Tensor self, Tensor indices, int[2] output_size) -> Tensor
|
9375
|
-
python_module: nn
|
9376
|
-
dispatch:
|
9377
|
-
CPU: max_unpooling2d_backward_cpu
|
9378
|
-
CUDA: max_unpooling2d_backward_cuda
|
9379
|
-
|
9380
10014
|
- func: max_unpool3d.out(Tensor self, Tensor indices, int[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) out) -> Tensor(a!)
|
9381
10015
|
python_module: nn
|
9382
10016
|
dispatch:
|
@@ -9389,30 +10023,18 @@
|
|
9389
10023
|
CPU: max_unpooling3d_forward_cpu
|
9390
10024
|
CUDA: max_unpooling3d_forward_cuda
|
9391
10025
|
|
9392
|
-
- func: max_unpool3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor indices, int[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
|
9393
|
-
python_module: nn
|
9394
|
-
dispatch:
|
9395
|
-
CPU: max_unpooling3d_backward_out_cpu
|
9396
|
-
CUDA: max_unpooling3d_backward_out_cuda
|
9397
|
-
|
9398
|
-
- func: max_unpool3d_backward(Tensor grad_output, Tensor self, Tensor indices, int[3] output_size, int[3] stride, int[3] padding) -> Tensor
|
9399
|
-
python_module: nn
|
9400
|
-
dispatch:
|
9401
|
-
CPU: max_unpooling3d_backward_cpu
|
9402
|
-
CUDA: max_unpooling3d_backward_cuda
|
9403
|
-
|
9404
10026
|
- func: reflection_pad1d.out(Tensor self, int[2] padding, *, Tensor(a!) out) -> Tensor(a!)
|
9405
10027
|
python_module: nn
|
9406
10028
|
structured: True
|
9407
10029
|
dispatch:
|
9408
|
-
CPU
|
10030
|
+
CPU: reflection_pad1d_out_cpu
|
10031
|
+
QuantizedCPU: reflection_pad1d_out_quantized_cpu
|
9409
10032
|
CUDA: reflection_pad1d_out_cuda
|
10033
|
+
MPS: reflection_pad1d_out_mps
|
9410
10034
|
|
9411
10035
|
- func: reflection_pad1d(Tensor self, int[2] padding) -> Tensor
|
9412
10036
|
python_module: nn
|
9413
10037
|
structured_delegate: reflection_pad1d.out
|
9414
|
-
dispatch:
|
9415
|
-
QuantizedCPU: reflection_pad1d_cpu
|
9416
10038
|
|
9417
10039
|
- func: reflection_pad1d_backward.grad_input(Tensor grad_output, Tensor self, int[2] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
|
9418
10040
|
python_module: nn
|
@@ -9420,6 +10042,7 @@
|
|
9420
10042
|
dispatch:
|
9421
10043
|
CPU: reflection_pad1d_backward_out_cpu
|
9422
10044
|
CUDA: reflection_pad1d_backward_out_cuda
|
10045
|
+
MPS: reflection_pad1d_backward_out_mps
|
9423
10046
|
|
9424
10047
|
- func: reflection_pad1d_backward(Tensor grad_output, Tensor self, int[2] padding) -> Tensor
|
9425
10048
|
python_module: nn
|
@@ -9430,24 +10053,29 @@
|
|
9430
10053
|
dispatch:
|
9431
10054
|
CPU, QuantizedCPU: reflection_pad2d_out_cpu
|
9432
10055
|
CUDA: reflection_pad2d_out_cuda
|
10056
|
+
MPS: reflection_pad2d_out_mps
|
9433
10057
|
|
9434
10058
|
- func: reflection_pad2d(Tensor self, int[4] padding) -> Tensor
|
9435
10059
|
python_module: nn
|
9436
10060
|
dispatch:
|
9437
|
-
CPU
|
10061
|
+
CPU: reflection_pad2d_cpu
|
10062
|
+
QuantizedCPU: reflection_pad2d_quantized_cpu
|
9438
10063
|
CUDA: reflection_pad2d_cuda
|
10064
|
+
MPS: reflection_pad2d_mps
|
9439
10065
|
|
9440
10066
|
- func: reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
|
9441
10067
|
python_module: nn
|
9442
10068
|
dispatch:
|
9443
10069
|
CPU: reflection_pad2d_backward_out_cpu
|
9444
10070
|
CUDA: reflection_pad2d_backward_out_cuda
|
10071
|
+
MPS: reflection_pad2d_backward_out_mps
|
9445
10072
|
|
9446
10073
|
- func: reflection_pad2d_backward(Tensor grad_output, Tensor self, int[4] padding) -> Tensor
|
9447
10074
|
python_module: nn
|
9448
10075
|
dispatch:
|
9449
10076
|
CPU: reflection_pad2d_backward_cpu
|
9450
10077
|
CUDA: reflection_pad2d_backward_cuda
|
10078
|
+
MPS: reflection_pad2d_backward_mps
|
9451
10079
|
|
9452
10080
|
- func: reflection_pad3d.out(Tensor self, int[6] padding, *, Tensor(a!) out) -> Tensor(a!)
|
9453
10081
|
python_module: nn
|
@@ -9455,6 +10083,7 @@
|
|
9455
10083
|
dispatch:
|
9456
10084
|
CPU: reflection_pad3d_out_cpu
|
9457
10085
|
CUDA: reflection_pad3d_out_cuda
|
10086
|
+
MPS: reflection_pad3d_out_mps
|
9458
10087
|
|
9459
10088
|
- func: reflection_pad3d(Tensor self, int[6] padding) -> Tensor
|
9460
10089
|
python_module: nn
|
@@ -9466,6 +10095,7 @@
|
|
9466
10095
|
dispatch:
|
9467
10096
|
CPU: reflection_pad3d_backward_out_cpu
|
9468
10097
|
CUDA: reflection_pad3d_backward_out_cuda
|
10098
|
+
MPS: reflection_pad3d_backward_out_mps
|
9469
10099
|
|
9470
10100
|
- func: reflection_pad3d_backward(Tensor grad_output, Tensor self, int[6] padding) -> Tensor
|
9471
10101
|
python_module: nn
|
@@ -9477,6 +10107,7 @@
|
|
9477
10107
|
dispatch:
|
9478
10108
|
CPU: replication_pad1d_out_cpu
|
9479
10109
|
CUDA: replication_pad1d_out_cuda
|
10110
|
+
MPS: replication_pad1d_out_mps
|
9480
10111
|
|
9481
10112
|
- func: replication_pad1d(Tensor self, int[2] padding) -> Tensor
|
9482
10113
|
python_module: nn
|
@@ -9488,6 +10119,7 @@
|
|
9488
10119
|
dispatch:
|
9489
10120
|
CPU: replication_pad1d_backward_out_cpu
|
9490
10121
|
CUDA: replication_pad1d_backward_out_cuda
|
10122
|
+
MPS: replication_pad1d_backward_out_mps
|
9491
10123
|
|
9492
10124
|
- func: replication_pad1d_backward(Tensor grad_output, Tensor self, int[2] padding) -> Tensor
|
9493
10125
|
python_module: nn
|
@@ -9499,6 +10131,7 @@
|
|
9499
10131
|
dispatch:
|
9500
10132
|
CPU: replication_pad2d_out_cpu
|
9501
10133
|
CUDA: replication_pad2d_out_cuda
|
10134
|
+
MPS: replication_pad2d_out_mps
|
9502
10135
|
|
9503
10136
|
- func: replication_pad2d(Tensor self, int[4] padding) -> Tensor
|
9504
10137
|
python_module: nn
|
@@ -9509,12 +10142,14 @@
|
|
9509
10142
|
dispatch:
|
9510
10143
|
CPU: replication_pad2d_backward_out_cpu
|
9511
10144
|
CUDA: replication_pad2d_backward_out_cuda
|
10145
|
+
MPS: replication_pad2d_backward_out_mps
|
9512
10146
|
|
9513
10147
|
- func: replication_pad2d_backward(Tensor grad_output, Tensor self, int[4] padding) -> Tensor
|
9514
10148
|
python_module: nn
|
9515
10149
|
dispatch:
|
9516
10150
|
CPU: replication_pad2d_backward_cpu
|
9517
10151
|
CUDA: replication_pad2d_backward_cuda
|
10152
|
+
MPS: replication_pad2d_backward_mps
|
9518
10153
|
|
9519
10154
|
- func: replication_pad3d.out(Tensor self, int[6] padding, *, Tensor(a!) out) -> Tensor(a!)
|
9520
10155
|
python_module: nn
|
@@ -9522,6 +10157,7 @@
|
|
9522
10157
|
dispatch:
|
9523
10158
|
CPU: replication_pad3d_out_cpu
|
9524
10159
|
CUDA: replication_pad3d_out_cuda
|
10160
|
+
MPS: replication_pad3d_out_mps
|
9525
10161
|
|
9526
10162
|
- func: replication_pad3d(Tensor self, int[6] padding) -> Tensor
|
9527
10163
|
python_module: nn
|
@@ -9532,12 +10168,23 @@
|
|
9532
10168
|
dispatch:
|
9533
10169
|
CPU: replication_pad3d_backward_out_cpu
|
9534
10170
|
CUDA: replication_pad3d_backward_out_cuda
|
10171
|
+
MPS: replication_pad3d_backward_out_mps
|
9535
10172
|
|
9536
10173
|
- func: replication_pad3d_backward(Tensor grad_output, Tensor self, int[6] padding) -> Tensor
|
9537
10174
|
python_module: nn
|
9538
10175
|
dispatch:
|
9539
10176
|
CPU: replication_pad3d_backward_cpu
|
9540
10177
|
CUDA: replication_pad3d_backward_cuda
|
10178
|
+
MPS: replication_pad3d_backward_mps
|
10179
|
+
|
10180
|
+
- func: _pad_circular(Tensor self, int[] pad) -> Tensor
|
10181
|
+
python_module: nn
|
10182
|
+
|
10183
|
+
- func: _pad_enum(Tensor self, int[] pad, int mode, float? value=None) -> Tensor
|
10184
|
+
python_module: nn
|
10185
|
+
|
10186
|
+
- func: pad(Tensor self, int[] pad, str mode="constant", float? value=None) -> Tensor
|
10187
|
+
python_module: nn
|
9541
10188
|
|
9542
10189
|
- func: upsample_linear1d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
|
9543
10190
|
python_module: nn
|
@@ -9694,6 +10341,7 @@
|
|
9694
10341
|
dispatch:
|
9695
10342
|
CPU: upsample_bilinear2d_out_cpu
|
9696
10343
|
CUDA: upsample_bilinear2d_out_cuda
|
10344
|
+
MPS: upsample_bilinear2d_out_mps
|
9697
10345
|
|
9698
10346
|
- func: upsample_bilinear2d(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
|
9699
10347
|
python_module: nn
|
@@ -9707,6 +10355,7 @@
|
|
9707
10355
|
dispatch:
|
9708
10356
|
CPU: upsample_bilinear2d_backward_out_cpu
|
9709
10357
|
CUDA: upsample_bilinear2d_backward_out_cuda
|
10358
|
+
MPS: upsample_bilinear2d_backward_out_mps
|
9710
10359
|
|
9711
10360
|
- func: upsample_bilinear2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
|
9712
10361
|
python_module: nn
|
@@ -9850,6 +10499,7 @@
|
|
9850
10499
|
dispatch:
|
9851
10500
|
CPU: upsample_nearest2d_out_cpu
|
9852
10501
|
CUDA: upsample_nearest2d_out_cuda
|
10502
|
+
MPS: upsample_nearest2d_out_mps
|
9853
10503
|
|
9854
10504
|
- func: _upsample_nearest_exact2d.out(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
|
9855
10505
|
python_module: nn
|
@@ -9857,6 +10507,7 @@
|
|
9857
10507
|
dispatch:
|
9858
10508
|
CPU: _upsample_nearest_exact2d_out_cpu
|
9859
10509
|
CUDA: _upsample_nearest_exact2d_out_cuda
|
10510
|
+
MPS: _upsample_nearest_exact2d_out_mps
|
9860
10511
|
|
9861
10512
|
- func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
|
9862
10513
|
python_module: nn
|
@@ -9876,6 +10527,7 @@
|
|
9876
10527
|
dispatch:
|
9877
10528
|
CPU: upsample_nearest2d_backward_out_cpu
|
9878
10529
|
CUDA: upsample_nearest2d_backward_out_cuda
|
10530
|
+
MPS: upsample_nearest2d_backward_out_mps
|
9879
10531
|
|
9880
10532
|
- func: _upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
|
9881
10533
|
python_module: nn
|
@@ -9883,6 +10535,7 @@
|
|
9883
10535
|
dispatch:
|
9884
10536
|
CPU: _upsample_nearest_exact2d_backward_out_cpu
|
9885
10537
|
CUDA: _upsample_nearest_exact2d_backward_out_cuda
|
10538
|
+
MPS: _upsample_nearest_exact2d_backward_out_mps
|
9886
10539
|
|
9887
10540
|
- func: upsample_nearest2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
|
9888
10541
|
python_module: nn
|
@@ -9946,6 +10599,7 @@
|
|
9946
10599
|
structured_inherits: TensorIteratorBase
|
9947
10600
|
dispatch:
|
9948
10601
|
CPU, CUDA: sigmoid_backward_out
|
10602
|
+
MPS: sigmoid_backward_out_mps
|
9949
10603
|
|
9950
10604
|
- func: sigmoid_backward(Tensor grad_output, Tensor output) -> Tensor
|
9951
10605
|
python_module: nn
|
@@ -9968,6 +10622,7 @@
|
|
9968
10622
|
structured_inherits: TensorIteratorBase
|
9969
10623
|
dispatch:
|
9970
10624
|
CPU, CUDA: tanh_backward_out
|
10625
|
+
MPS: tanh_backward_out_mps
|
9971
10626
|
|
9972
10627
|
- func: tanh_backward(Tensor grad_output, Tensor output) -> Tensor
|
9973
10628
|
python_module: nn
|
@@ -10233,6 +10888,19 @@
|
|
10233
10888
|
dispatch:
|
10234
10889
|
CPU, CUDA: special_ndtri_out
|
10235
10890
|
|
10891
|
+
- func: special_log_ndtr(Tensor self) -> Tensor
|
10892
|
+
structured_delegate: special_log_ndtr.out
|
10893
|
+
python_module: special
|
10894
|
+
variants: function
|
10895
|
+
|
10896
|
+
- func: special_log_ndtr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
10897
|
+
structured: True
|
10898
|
+
structured_inherits: TensorIteratorBase
|
10899
|
+
python_module: special
|
10900
|
+
variants: function
|
10901
|
+
dispatch:
|
10902
|
+
CPU, CUDA: special_log_ndtr_out
|
10903
|
+
|
10236
10904
|
- func: special_expm1(Tensor self) -> Tensor
|
10237
10905
|
python_module: special
|
10238
10906
|
variants: function
|
@@ -10486,7 +11154,7 @@
|
|
10486
11154
|
|
10487
11155
|
- func: special_polygamma(int n, Tensor self) -> Tensor
|
10488
11156
|
python_module: special
|
10489
|
-
variants: function
|
11157
|
+
variants: function
|
10490
11158
|
|
10491
11159
|
- func: special_polygamma.out(int n, Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
10492
11160
|
python_module: special
|
@@ -10782,11 +11450,15 @@
|
|
10782
11450
|
- func: linalg_cross(Tensor self, Tensor other, *, int dim=-1) -> Tensor
|
10783
11451
|
python_module: linalg
|
10784
11452
|
variants: function
|
11453
|
+
structured_delegate: linalg_cross.out
|
10785
11454
|
dispatch:
|
10786
|
-
|
11455
|
+
ZeroTensor: linalg_cross_zerotensor
|
10787
11456
|
|
10788
11457
|
- func: linalg_cross.out(Tensor self, Tensor other, *, int dim=-1, Tensor(a!) out) -> Tensor(a!)
|
10789
11458
|
python_module: linalg
|
11459
|
+
structured: True
|
11460
|
+
precomputed:
|
11461
|
+
- dim -> int dim
|
10790
11462
|
dispatch:
|
10791
11463
|
CPU, CUDA: linalg_cross_out
|
10792
11464
|
|
@@ -10811,6 +11483,20 @@
|
|
10811
11483
|
dispatch:
|
10812
11484
|
CPU, CUDA: linalg_lu_factor_ex_out
|
10813
11485
|
|
11486
|
+
# linalg.lu
|
11487
|
+
- func: linalg_lu(Tensor A, *, bool pivot=True) -> (Tensor P, Tensor L, Tensor U)
|
11488
|
+
python_module: linalg
|
11489
|
+
structured_delegate: linalg_lu.out
|
11490
|
+
variants: function
|
11491
|
+
|
11492
|
+
- func: linalg_lu.out(Tensor A, *, bool pivot=True, Tensor(a!) P, Tensor(b!) L, Tensor(c!) U) -> (Tensor(a!) P, Tensor(b!) L, Tensor(c!) U)
|
11493
|
+
python_module: linalg
|
11494
|
+
variants: function
|
11495
|
+
structured: True
|
11496
|
+
dispatch:
|
11497
|
+
CPU, CUDA: linalg_lu_out
|
11498
|
+
|
11499
|
+
# linalg.det
|
10814
11500
|
- func: linalg_det(Tensor self) -> Tensor
|
10815
11501
|
python_module: linalg
|
10816
11502
|
variants: function
|
@@ -10832,6 +11518,38 @@
|
|
10832
11518
|
dispatch:
|
10833
11519
|
CPU, CUDA: _det_lu_based_helper_backward_helper
|
10834
11520
|
|
11521
|
+
- func: linalg_ldl_factor_ex(Tensor self, *, bool hermitian=False, bool check_errors=False) -> (Tensor LD, Tensor pivots, Tensor info)
|
11522
|
+
structured_delegate: linalg_ldl_factor_ex.out
|
11523
|
+
python_module: linalg
|
11524
|
+
variants: function
|
11525
|
+
|
11526
|
+
- func: linalg_ldl_factor_ex.out(Tensor self, *, bool hermitian=False, bool check_errors=False, Tensor(a!) LD, Tensor(b!) pivots, Tensor(c!) info) -> (Tensor(a!) LD, Tensor(b!) pivots, Tensor(c!) info)
|
11527
|
+
structured: True
|
11528
|
+
python_module: linalg
|
11529
|
+
variants: function
|
11530
|
+
dispatch:
|
11531
|
+
CPU, CUDA: linalg_ldl_factor_ex_out
|
11532
|
+
|
11533
|
+
- func: linalg_ldl_factor(Tensor self, *, bool hermitian=False) -> (Tensor LD, Tensor pivots)
|
11534
|
+
python_module: linalg
|
11535
|
+
variants: function
|
11536
|
+
|
11537
|
+
- func: linalg_ldl_factor.out(Tensor self, *, bool hermitian=False, Tensor(a!) LD, Tensor(b!) pivots) -> (Tensor(a!) LD, Tensor(b!) pivots)
|
11538
|
+
python_module: linalg
|
11539
|
+
variants: function
|
11540
|
+
|
11541
|
+
- func: linalg_ldl_solve(Tensor LD, Tensor pivots, Tensor B, *, bool hermitian=False) -> Tensor
|
11542
|
+
structured_delegate: linalg_ldl_solve.out
|
11543
|
+
python_module: linalg
|
11544
|
+
variants: function
|
11545
|
+
|
11546
|
+
- func: linalg_ldl_solve.out(Tensor LD, Tensor pivots, Tensor B, *, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
|
11547
|
+
structured: True
|
11548
|
+
python_module: linalg
|
11549
|
+
variants: function
|
11550
|
+
dispatch:
|
11551
|
+
CPU, CUDA: linalg_ldl_solve_out
|
11552
|
+
|
10835
11553
|
- func: linalg_lstsq(Tensor self, Tensor b, float? rcond=None, *, str? driver=None) -> (Tensor solution, Tensor residuals, Tensor rank, Tensor singular_values)
|
10836
11554
|
python_module: linalg
|
10837
11555
|
variants: function
|
@@ -10901,7 +11619,7 @@
|
|
10901
11619
|
python_module: linalg
|
10902
11620
|
variants: function
|
10903
11621
|
|
10904
|
-
- func: linalg_eigvalsh.out(Tensor self, str UPLO=
|
11622
|
+
- func: linalg_eigvalsh.out(Tensor self, str UPLO="L", *, Tensor(a!) out) -> Tensor(a!)
|
10905
11623
|
python_module: linalg
|
10906
11624
|
dispatch:
|
10907
11625
|
CPU, CUDA: linalg_eigvalsh_out
|
@@ -10922,6 +11640,7 @@
|
|
10922
11640
|
dispatch:
|
10923
11641
|
CPU: _linalg_inv_out_helper_cpu
|
10924
11642
|
CUDA: _linalg_inv_out_helper_cuda
|
11643
|
+
autogen: _linalg_inv_out_helper.functional, _linalg_inv_out_helper.out
|
10925
11644
|
|
10926
11645
|
- func: linalg_inv_ex(Tensor self, *, bool check_errors=False) -> (Tensor inverse, Tensor info)
|
10927
11646
|
python_module: linalg
|
@@ -10978,11 +11697,11 @@
|
|
10978
11697
|
- func: linalg_vector_norm(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
10979
11698
|
python_module: linalg
|
10980
11699
|
variants: function
|
10981
|
-
|
10982
|
-
CPU, CUDA: linalg_vector_norm
|
11700
|
+
structured_delegate: linalg_vector_norm.out
|
10983
11701
|
|
10984
11702
|
- func: linalg_vector_norm.out(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
|
10985
11703
|
python_module: linalg
|
11704
|
+
structured: True
|
10986
11705
|
dispatch:
|
10987
11706
|
CPU, CUDA: linalg_vector_norm_out
|
10988
11707
|
|
@@ -11106,13 +11825,13 @@
|
|
11106
11825
|
python_module: linalg
|
11107
11826
|
variants: function
|
11108
11827
|
|
11109
|
-
- func: linalg_qr(Tensor
|
11828
|
+
- func: linalg_qr(Tensor A, str mode='reduced') -> (Tensor Q, Tensor R)
|
11110
11829
|
python_module: linalg
|
11111
11830
|
variants: function
|
11112
11831
|
dispatch:
|
11113
11832
|
CompositeExplicitAutograd: linalg_qr
|
11114
11833
|
|
11115
|
-
- func: linalg_qr.out(Tensor
|
11834
|
+
- func: linalg_qr.out(Tensor A, str mode='reduced', *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R)
|
11116
11835
|
python_module: linalg
|
11117
11836
|
variants: function
|
11118
11837
|
dispatch:
|
@@ -11232,3 +11951,447 @@
|
|
11232
11951
|
- func: unflatten_dense_tensors(Tensor flat, Tensor[] tensors) -> Tensor[]
|
11233
11952
|
variants: function
|
11234
11953
|
python_module: nn
|
11954
|
+
|
11955
|
+
- func: nested_tensor(Tensor[] list, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
11956
|
+
variants: function
|
11957
|
+
|
11958
|
+
- func: _fw_primal_copy(Tensor self, int level) -> Tensor
|
11959
|
+
variants: function
|
11960
|
+
dispatch:
|
11961
|
+
CompositeExplicitAutograd: _fw_primal_copy
|
11962
|
+
tags: view_copy
|
11963
|
+
|
11964
|
+
- func: _make_dual_copy(Tensor primal, Tensor tangent, int level) -> Tensor
|
11965
|
+
variants: function
|
11966
|
+
dispatch:
|
11967
|
+
CompositeExplicitAutograd: _make_dual_copy
|
11968
|
+
tags: view_copy
|
11969
|
+
|
11970
|
+
- func: view_as_real_copy(Tensor self) -> Tensor
|
11971
|
+
variants: function
|
11972
|
+
dispatch:
|
11973
|
+
CompositeExplicitAutograd: view_as_real_copy
|
11974
|
+
tags: view_copy
|
11975
|
+
|
11976
|
+
- func: view_as_complex_copy(Tensor self) -> Tensor
|
11977
|
+
variants: function
|
11978
|
+
dispatch:
|
11979
|
+
CompositeExplicitAutograd: view_as_complex_copy
|
11980
|
+
tags: view_copy
|
11981
|
+
|
11982
|
+
- func: _conj_copy(Tensor self) -> Tensor
|
11983
|
+
variants: function
|
11984
|
+
dispatch:
|
11985
|
+
CompositeExplicitAutograd: _conj_copy
|
11986
|
+
tags: view_copy
|
11987
|
+
|
11988
|
+
- func: _neg_view_copy(Tensor self) -> Tensor
|
11989
|
+
variants: function
|
11990
|
+
dispatch:
|
11991
|
+
CompositeExplicitAutograd: _neg_view_copy
|
11992
|
+
tags: view_copy
|
11993
|
+
|
11994
|
+
- func: as_strided_copy(Tensor self, int[] size, int[] stride, int? storage_offset=None) -> Tensor
|
11995
|
+
variants: function
|
11996
|
+
dispatch:
|
11997
|
+
CompositeExplicitAutograd: as_strided_copy
|
11998
|
+
tags: view_copy
|
11999
|
+
|
12000
|
+
- func: _sparse_broadcast_to_copy(Tensor self, int[] size) -> Tensor
|
12001
|
+
variants: function
|
12002
|
+
dispatch:
|
12003
|
+
CompositeExplicitAutograd: _sparse_broadcast_to_copy
|
12004
|
+
tags: view_copy
|
12005
|
+
|
12006
|
+
- func: diagonal_copy(Tensor self, int offset=0, int dim1=0, int dim2=1) -> Tensor
|
12007
|
+
variants: function
|
12008
|
+
dispatch:
|
12009
|
+
CompositeExplicitAutograd: diagonal_copy
|
12010
|
+
tags: view_copy
|
12011
|
+
|
12012
|
+
- func: expand_copy(Tensor self, int[] size, *, bool implicit=False) -> Tensor
|
12013
|
+
variants: function
|
12014
|
+
dispatch:
|
12015
|
+
CompositeExplicitAutograd: expand_copy
|
12016
|
+
tags: view_copy
|
12017
|
+
|
12018
|
+
- func: expand_copy.SymInt(Tensor self, SymInt[] size, *, bool implicit=False) -> Tensor
|
12019
|
+
variants: function
|
12020
|
+
dispatch:
|
12021
|
+
CompositeExplicitAutograd: expand_copy_SymInt
|
12022
|
+
tags: view_copy
|
12023
|
+
|
12024
|
+
- func: permute_copy(Tensor self, int[] dims) -> Tensor
|
12025
|
+
variants: function
|
12026
|
+
dispatch:
|
12027
|
+
CompositeExplicitAutograd: permute_copy
|
12028
|
+
tags: view_copy
|
12029
|
+
|
12030
|
+
- func: _reshape_alias_copy(Tensor self, int[] size, int[] stride) -> Tensor
|
12031
|
+
variants: function
|
12032
|
+
dispatch:
|
12033
|
+
CompositeExplicitAutograd: _reshape_alias_copy
|
12034
|
+
tags: view_copy
|
12035
|
+
|
12036
|
+
- func: select_copy.int(Tensor self, int dim, int index) -> Tensor
|
12037
|
+
variants: function
|
12038
|
+
dispatch:
|
12039
|
+
CompositeExplicitAutograd: select_copy_int
|
12040
|
+
tags: view_copy
|
12041
|
+
|
12042
|
+
- func: detach_copy(Tensor self) -> Tensor
|
12043
|
+
variants: function
|
12044
|
+
dispatch:
|
12045
|
+
CompositeExplicitAutograd: detach_copy
|
12046
|
+
tags: view_copy
|
12047
|
+
|
12048
|
+
- func: slice_copy.Tensor(Tensor self, int dim=0, int? start=None, int? end=None, int step=1) -> Tensor
|
12049
|
+
variants: function
|
12050
|
+
dispatch:
|
12051
|
+
CompositeExplicitAutograd: slice_copy_Tensor
|
12052
|
+
tags: view_copy
|
12053
|
+
|
12054
|
+
- func: split_copy.Tensor(Tensor self, int split_size, int dim=0) -> Tensor[]
|
12055
|
+
variants: function
|
12056
|
+
dispatch:
|
12057
|
+
CompositeExplicitAutograd: split_copy_Tensor
|
12058
|
+
tags: view_copy
|
12059
|
+
|
12060
|
+
- func: split_with_sizes_copy(Tensor self, int[] split_sizes, int dim=0) -> Tensor[]
|
12061
|
+
variants: function
|
12062
|
+
dispatch:
|
12063
|
+
CompositeExplicitAutograd: split_with_sizes_copy
|
12064
|
+
tags: view_copy
|
12065
|
+
|
12066
|
+
- func: squeeze_copy(Tensor self) -> Tensor
|
12067
|
+
variants: function
|
12068
|
+
dispatch:
|
12069
|
+
CompositeExplicitAutograd: squeeze_copy
|
12070
|
+
tags: view_copy
|
12071
|
+
|
12072
|
+
- func: squeeze_copy.dim(Tensor self, int dim) -> Tensor
|
12073
|
+
variants: function
|
12074
|
+
dispatch:
|
12075
|
+
CompositeExplicitAutograd: squeeze_copy_dim
|
12076
|
+
tags: view_copy
|
12077
|
+
|
12078
|
+
- func: t_copy(Tensor self) -> Tensor
|
12079
|
+
variants: function
|
12080
|
+
dispatch:
|
12081
|
+
CompositeExplicitAutograd: t_copy
|
12082
|
+
tags: view_copy
|
12083
|
+
|
12084
|
+
- func: transpose_copy.int(Tensor self, int dim0, int dim1) -> Tensor
|
12085
|
+
variants: function
|
12086
|
+
dispatch:
|
12087
|
+
CompositeExplicitAutograd: transpose_copy_int
|
12088
|
+
tags: view_copy
|
12089
|
+
|
12090
|
+
- func: unsqueeze_copy(Tensor self, int dim) -> Tensor
|
12091
|
+
variants: function
|
12092
|
+
dispatch:
|
12093
|
+
CompositeExplicitAutograd: unsqueeze_copy
|
12094
|
+
tags: view_copy
|
12095
|
+
|
12096
|
+
- func: _indices_copy(Tensor self) -> Tensor
|
12097
|
+
variants: function
|
12098
|
+
dispatch:
|
12099
|
+
CompositeExplicitAutograd: _indices_copy
|
12100
|
+
tags: view_copy
|
12101
|
+
|
12102
|
+
- func: _values_copy(Tensor self) -> Tensor
|
12103
|
+
variants: function
|
12104
|
+
dispatch:
|
12105
|
+
CompositeExplicitAutograd: _values_copy
|
12106
|
+
tags: view_copy
|
12107
|
+
|
12108
|
+
- func: indices_copy(Tensor self) -> Tensor
|
12109
|
+
variants: function
|
12110
|
+
dispatch:
|
12111
|
+
CompositeExplicitAutograd: indices_copy
|
12112
|
+
tags: view_copy
|
12113
|
+
|
12114
|
+
- func: values_copy(Tensor self) -> Tensor
|
12115
|
+
variants: function
|
12116
|
+
dispatch:
|
12117
|
+
CompositeExplicitAutograd: values_copy
|
12118
|
+
tags: view_copy
|
12119
|
+
|
12120
|
+
- func: crow_indices_copy(Tensor self) -> Tensor
|
12121
|
+
variants: function
|
12122
|
+
dispatch:
|
12123
|
+
CompositeExplicitAutograd: crow_indices_copy
|
12124
|
+
tags: view_copy
|
12125
|
+
|
12126
|
+
- func: col_indices_copy(Tensor self) -> Tensor
|
12127
|
+
variants: function
|
12128
|
+
dispatch:
|
12129
|
+
CompositeExplicitAutograd: col_indices_copy
|
12130
|
+
tags: view_copy
|
12131
|
+
|
12132
|
+
- func: ccol_indices_copy(Tensor self) -> Tensor
|
12133
|
+
variants: function
|
12134
|
+
dispatch:
|
12135
|
+
CompositeExplicitAutograd: ccol_indices_copy
|
12136
|
+
tags: view_copy
|
12137
|
+
|
12138
|
+
- func: row_indices_copy(Tensor self) -> Tensor
|
12139
|
+
variants: function
|
12140
|
+
dispatch:
|
12141
|
+
CompositeExplicitAutograd: row_indices_copy
|
12142
|
+
tags: view_copy
|
12143
|
+
|
12144
|
+
- func: unbind_copy.int(Tensor self, int dim=0) -> Tensor[]
|
12145
|
+
variants: function
|
12146
|
+
dispatch:
|
12147
|
+
CompositeExplicitAutograd: unbind_copy_int
|
12148
|
+
tags: view_copy
|
12149
|
+
|
12150
|
+
- func: view_copy(Tensor self, int[] size) -> Tensor
|
12151
|
+
variants: function
|
12152
|
+
dispatch:
|
12153
|
+
CompositeExplicitAutograd: view_copy
|
12154
|
+
tags: view_copy
|
12155
|
+
|
12156
|
+
- func: view_copy.dtype(Tensor self, ScalarType dtype) -> Tensor
|
12157
|
+
variants: function
|
12158
|
+
dispatch:
|
12159
|
+
CompositeExplicitAutograd: view_copy_dtype
|
12160
|
+
tags: view_copy
|
12161
|
+
|
12162
|
+
- func: unfold_copy(Tensor self, int dimension, int size, int step) -> Tensor
|
12163
|
+
variants: function
|
12164
|
+
dispatch:
|
12165
|
+
CompositeExplicitAutograd: unfold_copy
|
12166
|
+
tags: view_copy
|
12167
|
+
|
12168
|
+
- func: alias_copy(Tensor self) -> Tensor
|
12169
|
+
variants: function
|
12170
|
+
dispatch:
|
12171
|
+
CompositeExplicitAutograd: alias_copy
|
12172
|
+
tags: view_copy
|
12173
|
+
|
12174
|
+
- func: _fw_primal_copy.out(Tensor self, int level, *, Tensor(a!) out) -> Tensor(a!)
|
12175
|
+
variants: function
|
12176
|
+
dispatch:
|
12177
|
+
CompositeExplicitAutograd: _fw_primal_copy_out
|
12178
|
+
|
12179
|
+
|
12180
|
+
- func: _make_dual_copy.out(Tensor primal, Tensor tangent, int level, *, Tensor(a!) out) -> Tensor(a!)
|
12181
|
+
variants: function
|
12182
|
+
dispatch:
|
12183
|
+
CompositeExplicitAutograd: _make_dual_copy_out
|
12184
|
+
|
12185
|
+
|
12186
|
+
- func: view_as_real_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12187
|
+
variants: function
|
12188
|
+
dispatch:
|
12189
|
+
CompositeExplicitAutograd: view_as_real_copy_out
|
12190
|
+
|
12191
|
+
|
12192
|
+
- func: view_as_complex_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12193
|
+
variants: function
|
12194
|
+
dispatch:
|
12195
|
+
CompositeExplicitAutograd: view_as_complex_copy_out
|
12196
|
+
|
12197
|
+
|
12198
|
+
- func: _conj_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12199
|
+
variants: function
|
12200
|
+
dispatch:
|
12201
|
+
CompositeExplicitAutograd: _conj_copy_out
|
12202
|
+
|
12203
|
+
|
12204
|
+
- func: _neg_view_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12205
|
+
variants: function
|
12206
|
+
dispatch:
|
12207
|
+
CompositeExplicitAutograd: _neg_view_copy_out
|
12208
|
+
|
12209
|
+
|
12210
|
+
- func: as_strided_copy.out(Tensor self, int[] size, int[] stride, int? storage_offset=None, *, Tensor(a!) out) -> Tensor(a!)
|
12211
|
+
variants: function
|
12212
|
+
dispatch:
|
12213
|
+
CompositeExplicitAutograd: as_strided_copy_out
|
12214
|
+
|
12215
|
+
|
12216
|
+
- func: _sparse_broadcast_to_copy.out(Tensor self, int[] size, *, Tensor(a!) out) -> Tensor(a!)
|
12217
|
+
variants: function
|
12218
|
+
dispatch:
|
12219
|
+
CompositeExplicitAutograd: _sparse_broadcast_to_copy_out
|
12220
|
+
|
12221
|
+
|
12222
|
+
- func: diagonal_copy.out(Tensor self, int offset=0, int dim1=0, int dim2=1, *, Tensor(a!) out) -> Tensor(a!)
|
12223
|
+
variants: function
|
12224
|
+
dispatch:
|
12225
|
+
CompositeExplicitAutograd: diagonal_copy_out
|
12226
|
+
|
12227
|
+
|
12228
|
+
- func: expand_copy.SymInt_out(Tensor self, SymInt[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!)
|
12229
|
+
variants: function
|
12230
|
+
dispatch:
|
12231
|
+
CompositeExplicitAutograd: expand_copy_SymInt_out
|
12232
|
+
|
12233
|
+
|
12234
|
+
- func: expand_copy.out(Tensor self, int[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!)
|
12235
|
+
variants: function
|
12236
|
+
dispatch:
|
12237
|
+
CompositeExplicitAutograd: expand_copy_out
|
12238
|
+
|
12239
|
+
|
12240
|
+
- func: permute_copy.out(Tensor self, int[] dims, *, Tensor(a!) out) -> Tensor(a!)
|
12241
|
+
variants: function
|
12242
|
+
dispatch:
|
12243
|
+
CompositeExplicitAutograd: permute_copy_out
|
12244
|
+
|
12245
|
+
|
12246
|
+
- func: _reshape_alias_copy.out(Tensor self, int[] size, int[] stride, *, Tensor(a!) out) -> Tensor(a!)
|
12247
|
+
variants: function
|
12248
|
+
dispatch:
|
12249
|
+
CompositeExplicitAutograd: _reshape_alias_copy_out
|
12250
|
+
|
12251
|
+
|
12252
|
+
- func: select_copy.int_out(Tensor self, int dim, int index, *, Tensor(a!) out) -> Tensor(a!)
|
12253
|
+
variants: function
|
12254
|
+
dispatch:
|
12255
|
+
CompositeExplicitAutograd: select_copy_int_out
|
12256
|
+
|
12257
|
+
|
12258
|
+
- func: detach_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12259
|
+
variants: function
|
12260
|
+
dispatch:
|
12261
|
+
CompositeExplicitAutograd: detach_copy_out
|
12262
|
+
|
12263
|
+
|
12264
|
+
- func: slice_copy.Tensor_out(Tensor self, int dim=0, int? start=None, int? end=None, int step=1, *, Tensor(a!) out) -> Tensor(a!)
|
12265
|
+
variants: function
|
12266
|
+
dispatch:
|
12267
|
+
CompositeExplicitAutograd: slice_copy_Tensor_out
|
12268
|
+
|
12269
|
+
|
12270
|
+
- func: split_copy.Tensor_out(Tensor self, int split_size, int dim=0, *, Tensor(a!)[] out) -> ()
|
12271
|
+
variants: function
|
12272
|
+
dispatch:
|
12273
|
+
CompositeExplicitAutograd: split_copy_Tensor_out
|
12274
|
+
|
12275
|
+
|
12276
|
+
- func: split_with_sizes_copy.out(Tensor self, int[] split_sizes, int dim=0, *, Tensor(a!)[] out) -> ()
|
12277
|
+
variants: function
|
12278
|
+
dispatch:
|
12279
|
+
CompositeExplicitAutograd: split_with_sizes_copy_out
|
12280
|
+
|
12281
|
+
|
12282
|
+
- func: squeeze_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12283
|
+
variants: function
|
12284
|
+
dispatch:
|
12285
|
+
CompositeExplicitAutograd: squeeze_copy_out
|
12286
|
+
|
12287
|
+
|
12288
|
+
- func: squeeze_copy.dim_out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
|
12289
|
+
variants: function
|
12290
|
+
dispatch:
|
12291
|
+
CompositeExplicitAutograd: squeeze_copy_dim_out
|
12292
|
+
|
12293
|
+
|
12294
|
+
- func: t_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12295
|
+
variants: function
|
12296
|
+
dispatch:
|
12297
|
+
CompositeExplicitAutograd: t_copy_out
|
12298
|
+
|
12299
|
+
|
12300
|
+
- func: transpose_copy.int_out(Tensor self, int dim0, int dim1, *, Tensor(a!) out) -> Tensor(a!)
|
12301
|
+
variants: function
|
12302
|
+
dispatch:
|
12303
|
+
CompositeExplicitAutograd: transpose_copy_int_out
|
12304
|
+
|
12305
|
+
|
12306
|
+
- func: unsqueeze_copy.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
|
12307
|
+
variants: function
|
12308
|
+
dispatch:
|
12309
|
+
CompositeExplicitAutograd: unsqueeze_copy_out
|
12310
|
+
|
12311
|
+
|
12312
|
+
- func: _indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12313
|
+
variants: function
|
12314
|
+
dispatch:
|
12315
|
+
CompositeExplicitAutograd: _indices_copy_out
|
12316
|
+
|
12317
|
+
|
12318
|
+
- func: _values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12319
|
+
variants: function
|
12320
|
+
dispatch:
|
12321
|
+
CompositeExplicitAutograd: _values_copy_out
|
12322
|
+
|
12323
|
+
|
12324
|
+
- func: indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12325
|
+
variants: function
|
12326
|
+
dispatch:
|
12327
|
+
CompositeExplicitAutograd: indices_copy_out
|
12328
|
+
|
12329
|
+
|
12330
|
+
- func: values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12331
|
+
variants: function
|
12332
|
+
dispatch:
|
12333
|
+
CompositeExplicitAutograd: values_copy_out
|
12334
|
+
|
12335
|
+
|
12336
|
+
- func: crow_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12337
|
+
variants: function
|
12338
|
+
dispatch:
|
12339
|
+
CompositeExplicitAutograd: crow_indices_copy_out
|
12340
|
+
|
12341
|
+
|
12342
|
+
- func: col_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12343
|
+
variants: function
|
12344
|
+
dispatch:
|
12345
|
+
CompositeExplicitAutograd: col_indices_copy_out
|
12346
|
+
|
12347
|
+
|
12348
|
+
- func: unbind_copy.int_out(Tensor self, int dim=0, *, Tensor(a!)[] out) -> ()
|
12349
|
+
variants: function
|
12350
|
+
dispatch:
|
12351
|
+
CompositeExplicitAutograd: unbind_copy_int_out
|
12352
|
+
|
12353
|
+
|
12354
|
+
- func: view_copy.out(Tensor self, int[] size, *, Tensor(a!) out) -> Tensor(a!)
|
12355
|
+
variants: function
|
12356
|
+
dispatch:
|
12357
|
+
CompositeExplicitAutograd: view_copy_out
|
12358
|
+
|
12359
|
+
|
12360
|
+
- func: view_copy.dtype_out(Tensor self, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
|
12361
|
+
variants: function
|
12362
|
+
dispatch:
|
12363
|
+
CompositeExplicitAutograd: view_copy_dtype_out
|
12364
|
+
|
12365
|
+
|
12366
|
+
- func: unfold_copy.out(Tensor self, int dimension, int size, int step, *, Tensor(a!) out) -> Tensor(a!)
|
12367
|
+
variants: function
|
12368
|
+
dispatch:
|
12369
|
+
CompositeExplicitAutograd: unfold_copy_out
|
12370
|
+
|
12371
|
+
|
12372
|
+
- func: alias_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12373
|
+
variants: function
|
12374
|
+
dispatch:
|
12375
|
+
CompositeExplicitAutograd: alias_copy_out
|
12376
|
+
|
12377
|
+
- func: to_padded_tensor(Tensor self, float padding, int[]? output_size=None) -> Tensor
|
12378
|
+
variants: method
|
12379
|
+
dispatch:
|
12380
|
+
NestedTensorCPU: NestedTensor_to_padded_tensor_generic
|
12381
|
+
NestedTensorCUDA: NestedTensor_to_padded_tensor_cuda
|
12382
|
+
|
12383
|
+
- func: _nested_tensor_layer_norm(Tensor self, Tensor? weight, Tensor? bias, float eps) -> Tensor
|
12384
|
+
variants: method
|
12385
|
+
dispatch:
|
12386
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_layer_norm
|
12387
|
+
|
12388
|
+
# Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
|
12389
|
+
- func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None) -> Tensor
|
12390
|
+
variants: function
|
12391
|
+
dispatch:
|
12392
|
+
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: transformer_encoder_layer_forward
|
12393
|
+
|
12394
|
+
- func: _native_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True) -> (Tensor, Tensor)
|
12395
|
+
variants: function
|
12396
|
+
dispatch:
|
12397
|
+
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: native_multi_head_attention
|