torch-rb 0.10.1 → 0.11.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +16 -3
- data/codegen/function.rb +1 -1
- data/codegen/generate_functions.rb +31 -11
- data/codegen/native_functions.yaml +1362 -199
- data/ext/torch/extconf.rb +1 -13
- data/ext/torch/ruby_arg_parser.cpp +64 -2
- data/ext/torch/ruby_arg_parser.h +18 -3
- data/ext/torch/utils.h +1 -1
- data/lib/torch/tensor.rb +8 -5
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +1 -12
- metadata +3 -3
@@ -145,6 +145,7 @@
|
|
145
145
|
|
146
146
|
- func: rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!)
|
147
147
|
variants: method
|
148
|
+
tags: inplace_view
|
148
149
|
|
149
150
|
- func: rename(Tensor(a) self, Dimname[]? names) -> Tensor(a)
|
150
151
|
variants: method
|
@@ -274,6 +275,7 @@
|
|
274
275
|
device_check: NoCheck # TensorIterator
|
275
276
|
dispatch:
|
276
277
|
CPU, CUDA: abs_out
|
278
|
+
MPS: abs_out_mps
|
277
279
|
SparseCPU, SparseCUDA: abs_sparse_out
|
278
280
|
SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_out
|
279
281
|
|
@@ -328,12 +330,12 @@
|
|
328
330
|
- func: view_as_real(Tensor(a) self) -> Tensor(a)
|
329
331
|
variants: function
|
330
332
|
dispatch:
|
331
|
-
CPU, CUDA: view_as_real
|
333
|
+
CPU, CUDA, MPS, Meta: view_as_real
|
332
334
|
|
333
335
|
- func: view_as_complex(Tensor(a) self) -> Tensor(a)
|
334
336
|
variants: function
|
335
337
|
dispatch:
|
336
|
-
CPU, CUDA: view_as_complex
|
338
|
+
CPU, CUDA, Meta: view_as_complex
|
337
339
|
|
338
340
|
- func: sgn(Tensor self) -> Tensor
|
339
341
|
variants: function, method
|
@@ -357,6 +359,9 @@
|
|
357
359
|
SparseCPU, SparseCUDA: sgn_sparse_out
|
358
360
|
SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
|
359
361
|
|
362
|
+
- func: chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
|
363
|
+
variants: method
|
364
|
+
|
360
365
|
- func: real(Tensor(a) self) -> Tensor(a)
|
361
366
|
device_check: NoCheck # TensorIterator
|
362
367
|
variants: function
|
@@ -422,6 +427,7 @@
|
|
422
427
|
structured_inherits: TensorIteratorBase
|
423
428
|
dispatch:
|
424
429
|
CPU, CUDA: acos_out
|
430
|
+
MPS: acos_out_mps
|
425
431
|
|
426
432
|
# arccos, alias of acos
|
427
433
|
- func: arccos(Tensor self) -> Tensor
|
@@ -448,6 +454,7 @@
|
|
448
454
|
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr
|
449
455
|
MkldnnCPU: mkldnn_add
|
450
456
|
ZeroTensor: add_zerotensor
|
457
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
451
458
|
|
452
459
|
- func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
453
460
|
device_check: NoCheck # TensorIterator
|
@@ -457,18 +464,22 @@
|
|
457
464
|
SparseCPU, SparseCUDA: add_sparse_
|
458
465
|
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr_
|
459
466
|
MkldnnCPU: mkldnn_add_
|
467
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
460
468
|
|
461
469
|
- func: add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
462
470
|
device_check: NoCheck # TensorIterator
|
463
471
|
structured: True
|
464
472
|
structured_inherits: TensorIteratorBase
|
473
|
+
ufunc_inner_loop:
|
474
|
+
Generic: add (AllAndComplex, BFloat16, Half, ComplexHalf)
|
475
|
+
ScalarOnly: add (Bool)
|
465
476
|
dispatch:
|
466
|
-
CPU, CUDA: add_out
|
467
477
|
SparseCPU: add_out_sparse_cpu
|
468
478
|
SparseCUDA: add_out_sparse_cuda
|
469
479
|
SparseCsrCPU: add_out_sparse_csr_cpu
|
470
480
|
SparseCsrCUDA: add_out_sparse_csr_cuda
|
471
481
|
MkldnnCPU: mkldnn_add_out
|
482
|
+
MPS: add_out_mps
|
472
483
|
|
473
484
|
- func: _add_relu.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
|
474
485
|
variants: function
|
@@ -494,6 +505,7 @@
|
|
494
505
|
variants: function
|
495
506
|
dispatch:
|
496
507
|
CPU: add_relu_
|
508
|
+
autogen: _add_relu.Scalar_out
|
497
509
|
|
498
510
|
# For C++ only, until we have conversion from C++ numbers to Tensor
|
499
511
|
- func: add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
|
@@ -507,6 +519,7 @@
|
|
507
519
|
variants: method
|
508
520
|
dispatch:
|
509
521
|
CompositeExplicitAutograd: add_
|
522
|
+
autogen: add.Scalar_out
|
510
523
|
|
511
524
|
- func: addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
512
525
|
structured_delegate: addmv.out
|
@@ -521,8 +534,9 @@
|
|
521
534
|
dispatch:
|
522
535
|
CPU: addmv_out_cpu
|
523
536
|
CUDA: addmv_out_cuda
|
524
|
-
|
525
|
-
|
537
|
+
MPS: addmv_out_mps
|
538
|
+
SparseCsrCPU: addmv_out_sparse_compressed
|
539
|
+
SparseCsrCUDA: addmv_out_sparse_compressed_cuda
|
526
540
|
|
527
541
|
- func: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
528
542
|
variants: function, method
|
@@ -560,6 +574,7 @@
|
|
560
574
|
- dim -> int dim
|
561
575
|
dispatch:
|
562
576
|
CPU, CUDA: all_out
|
577
|
+
MPS: all_out_mps
|
563
578
|
|
564
579
|
- func: all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
|
565
580
|
device_check: NoCheck # TensorIterator
|
@@ -583,6 +598,7 @@
|
|
583
598
|
- dim -> int dim
|
584
599
|
dispatch:
|
585
600
|
CPU, CUDA: any_out
|
601
|
+
MPS: any_out_mps
|
586
602
|
|
587
603
|
- func: any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
|
588
604
|
device_check: NoCheck # TensorIterator
|
@@ -595,6 +611,12 @@
|
|
595
611
|
|
596
612
|
- func: arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
597
613
|
|
614
|
+
# Note [arange.start_step schema]
|
615
|
+
# We want `arange.start_step` to be grouped up with `arange.start_out`,
|
616
|
+
# But this doesn't happen automatically because the step argument
|
617
|
+
# is defaultable for .start_out but not for .start_step.
|
618
|
+
# We should probably just make "step" a defaultable param on arange.start,
|
619
|
+
# and kill arange.start_step.
|
598
620
|
- func: arange.start_step(Scalar start, Scalar end, Scalar step, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
599
621
|
|
600
622
|
- func: arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -603,6 +625,7 @@
|
|
603
625
|
dispatch:
|
604
626
|
CPU, Meta: arange_out
|
605
627
|
CUDA: arange_cuda_out
|
628
|
+
MPS: arange_mps_out
|
606
629
|
|
607
630
|
# This function is a temporary hack to allow tracing of arange like constructs with dynamic
|
608
631
|
# bounds on arange. Normal arange is not traceable because it does not take any tensor inputs;
|
@@ -620,6 +643,7 @@
|
|
620
643
|
structured: True
|
621
644
|
dispatch:
|
622
645
|
CPU, CUDA: argmax_out
|
646
|
+
MPS: argmax_out_mps
|
623
647
|
|
624
648
|
- func: argmin(Tensor self, int? dim=None, bool keepdim=False) -> Tensor
|
625
649
|
structured_delegate: argmin.out
|
@@ -644,6 +668,7 @@
|
|
644
668
|
structured_inherits: TensorIteratorBase
|
645
669
|
dispatch:
|
646
670
|
CPU, CUDA: acosh_out
|
671
|
+
MPS: acosh_out_mps
|
647
672
|
|
648
673
|
# arccosh, alias for acosh
|
649
674
|
- func: arccosh(Tensor self) -> Tensor
|
@@ -673,6 +698,7 @@
|
|
673
698
|
structured_inherits: TensorIteratorBase
|
674
699
|
dispatch:
|
675
700
|
CPU, CUDA: asinh_out
|
701
|
+
MPS: asinh_out_mps
|
676
702
|
SparseCPU, SparseCUDA: asinh_sparse_out
|
677
703
|
SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_out
|
678
704
|
|
@@ -705,6 +731,7 @@
|
|
705
731
|
structured_inherits: TensorIteratorBase
|
706
732
|
dispatch:
|
707
733
|
CPU, CUDA: atanh_out
|
734
|
+
MPS: atanh_out_mps
|
708
735
|
SparseCPU, SparseCUDA: atanh_sparse_out
|
709
736
|
SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_out
|
710
737
|
|
@@ -721,6 +748,7 @@
|
|
721
748
|
variants: function, method
|
722
749
|
dispatch:
|
723
750
|
ZeroTensor, CPU, CUDA, Meta: as_strided_tensorimpl
|
751
|
+
MPS: as_strided_tensorimpl_mps
|
724
752
|
QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
|
725
753
|
device_check: NoCheck
|
726
754
|
device_guard: False
|
@@ -756,6 +784,7 @@
|
|
756
784
|
structured_inherits: TensorIteratorBase
|
757
785
|
dispatch:
|
758
786
|
CPU, CUDA: asin_out
|
787
|
+
MPS: asin_out_mps
|
759
788
|
SparseCPU, SparseCUDA: asin_sparse_out
|
760
789
|
SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_out
|
761
790
|
|
@@ -790,6 +819,7 @@
|
|
790
819
|
structured_inherits: TensorIteratorBase
|
791
820
|
dispatch:
|
792
821
|
CPU, CUDA: atan_out
|
822
|
+
MPS: atan_out_mps
|
793
823
|
SparseCPU, SparseCUDA: atan_sparse_out
|
794
824
|
SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_out
|
795
825
|
|
@@ -833,6 +863,7 @@
|
|
833
863
|
dispatch:
|
834
864
|
CPU: baddbmm_out_cpu
|
835
865
|
CUDA: baddbmm_out_cuda
|
866
|
+
MPS: baddbmm_out_mps
|
836
867
|
SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
|
837
868
|
|
838
869
|
- func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -861,19 +892,26 @@
|
|
861
892
|
variants: function
|
862
893
|
dispatch:
|
863
894
|
CPU, CUDA: bernoulli_out
|
895
|
+
MPS: bernoulli_out_mps
|
864
896
|
|
865
897
|
- func: bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!)
|
866
898
|
device_check: NoCheck # TensorIterator
|
867
899
|
variants: method
|
868
900
|
dispatch:
|
869
901
|
CPU, CUDA: bernoulli_
|
902
|
+
MPS: bernoulli_mps_
|
903
|
+
autogen: bernoulli.Tensor_functional, bernoulli.Tensor_out
|
870
904
|
|
871
905
|
- func: bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!)
|
872
906
|
device_check: NoCheck # TensorIterator
|
873
907
|
variants: method
|
874
908
|
dispatch:
|
875
909
|
CPU, CUDA: bernoulli_
|
910
|
+
MPS: bernoulli_mps_
|
911
|
+
autogen: bernoulli.float_out
|
876
912
|
|
913
|
+
# Note [bernoulli.p schema]
|
914
|
+
# We should probably just fix the overload ambiguity by appending a _functional to the C++ API name (BC breaking)
|
877
915
|
# This out-of-place version isn't used explicitly, but needed by jit.
|
878
916
|
# There is no default valid on `p` here because it would introduce ambiguity
|
879
917
|
# with `bernoulli(Tensor self, *, Generator? generator=None)` declaration.
|
@@ -890,6 +928,7 @@
|
|
890
928
|
dispatch:
|
891
929
|
CPU: binary_cross_entropy_cpu
|
892
930
|
CUDA: binary_cross_entropy_cuda
|
931
|
+
MPS: binary_cross_entropy_mps
|
893
932
|
|
894
933
|
- func: binary_cross_entropy.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
|
895
934
|
device_check: NoCheck # TensorIterator
|
@@ -898,6 +937,7 @@
|
|
898
937
|
dispatch:
|
899
938
|
CPU: binary_cross_entropy_out_cpu
|
900
939
|
CUDA: binary_cross_entropy_out_cuda
|
940
|
+
MPS: binary_cross_entropy_out_mps
|
901
941
|
|
902
942
|
- func: binary_cross_entropy_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor
|
903
943
|
python_module: nn
|
@@ -905,6 +945,7 @@
|
|
905
945
|
dispatch:
|
906
946
|
CPU: binary_cross_entropy_backward_cpu
|
907
947
|
CUDA: binary_cross_entropy_backward_cuda
|
948
|
+
MPS: binary_cross_entropy_backward_mps
|
908
949
|
|
909
950
|
- func: binary_cross_entropy_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) grad_input) -> Tensor(a!)
|
910
951
|
python_module: nn
|
@@ -912,6 +953,7 @@
|
|
912
953
|
dispatch:
|
913
954
|
CPU: binary_cross_entropy_backward_out_cpu
|
914
955
|
CUDA: binary_cross_entropy_backward_out_cuda
|
956
|
+
MPS: binary_cross_entropy_backward_out_mps
|
915
957
|
|
916
958
|
- func: binary_cross_entropy_with_logits(Tensor self, Tensor target, Tensor? weight=None, Tensor? pos_weight=None, int reduction=Mean) -> Tensor
|
917
959
|
device_check: NoCheck # TensorIterator
|
@@ -1061,6 +1103,7 @@
|
|
1061
1103
|
dispatch:
|
1062
1104
|
CPU: bmm_out_cpu
|
1063
1105
|
CUDA: bmm_out_cuda
|
1106
|
+
MPS: bmm_out_mps
|
1064
1107
|
SparseCPU: bmm_out_sparse_cpu
|
1065
1108
|
SparseCUDA: bmm_out_sparse_cuda
|
1066
1109
|
SparseCsrCUDA: bmm_out_sparse_csr_cuda
|
@@ -1078,12 +1121,20 @@
|
|
1078
1121
|
SparseCPU, SparseCUDA: sparse_broadcast_to
|
1079
1122
|
|
1080
1123
|
- func: cat(Tensor[] tensors, int dim=0) -> Tensor
|
1124
|
+
structured_delegate: cat.out
|
1081
1125
|
dispatch:
|
1082
|
-
|
1126
|
+
SparseCPU, SparseCUDA: cat_sparse
|
1127
|
+
QuantizedCPU: cat_quantized_cpu
|
1083
1128
|
|
1084
1129
|
- func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
|
1130
|
+
structured: True
|
1131
|
+
precomputed:
|
1132
|
+
- dim -> int dim, int valid, bool all_contiguous, bool all_same_dtype, bool all_same_sizes_and_stride, MemoryFormat memory_format
|
1085
1133
|
dispatch:
|
1086
|
-
|
1134
|
+
CPU: cat_out_cpu
|
1135
|
+
CUDA: cat_out_cuda
|
1136
|
+
MPS: cat_out_mps
|
1137
|
+
QuantizedCPU: cat_out_quantized_cpu
|
1087
1138
|
|
1088
1139
|
- func: cat.names(Tensor[] tensors, Dimname dim) -> Tensor
|
1089
1140
|
|
@@ -1125,6 +1176,7 @@
|
|
1125
1176
|
structured_inherits: TensorIteratorBase
|
1126
1177
|
dispatch:
|
1127
1178
|
CPU, CUDA: ceil_out
|
1179
|
+
MPS: ceil_out_mps
|
1128
1180
|
SparseCPU, SparseCUDA: ceil_sparse_out
|
1129
1181
|
SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_out
|
1130
1182
|
|
@@ -1164,8 +1216,7 @@
|
|
1164
1216
|
|
1165
1217
|
- func: clamp.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor
|
1166
1218
|
variants: function, method
|
1167
|
-
|
1168
|
-
CPU, CUDA: clamp
|
1219
|
+
structured_delegate: clamp.Tensor_out
|
1169
1220
|
|
1170
1221
|
- func: clamp_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)
|
1171
1222
|
device_check: NoCheck # TensorIterator
|
@@ -1177,8 +1228,7 @@
|
|
1177
1228
|
|
1178
1229
|
- func: clamp_.Tensor(Tensor(a!) self, Tensor? min=None, Tensor? max=None) -> Tensor(a!)
|
1179
1230
|
variants: function, method
|
1180
|
-
|
1181
|
-
CompositeExplicitAutograd: clamp_
|
1231
|
+
structured_delegate: clamp.Tensor_out
|
1182
1232
|
|
1183
1233
|
- func: clamp.out(Tensor self, Scalar? min=None, Scalar? max=None, *, Tensor(a!) out) -> Tensor(a!)
|
1184
1234
|
device_check: NoCheck # TensorIterator
|
@@ -1187,73 +1237,83 @@
|
|
1187
1237
|
structured_inherits: TensorIteratorBase
|
1188
1238
|
dispatch:
|
1189
1239
|
CPU, CUDA: clamp_out
|
1240
|
+
MPS: clamp_out_mps
|
1190
1241
|
|
1191
1242
|
- func: clamp.Tensor_out(Tensor self, Tensor? min=None, Tensor? max=None, *, Tensor(a!) out) -> Tensor(a!)
|
1192
1243
|
device_check: NoCheck # TensorIterator
|
1244
|
+
structured: True
|
1245
|
+
structured_inherits: TensorIteratorBase
|
1193
1246
|
dispatch:
|
1194
|
-
CPU, CUDA:
|
1247
|
+
CPU, CUDA: clamp_Tensor_out
|
1248
|
+
MPS: clamp_Tensor_out_mps
|
1195
1249
|
|
1196
1250
|
- func: clamp_max(Tensor self, Scalar max) -> Tensor
|
1197
1251
|
device_check: NoCheck # TensorIterator
|
1198
1252
|
variants: function, method
|
1199
|
-
|
1200
|
-
CompositeExplicitAutograd: clamp_max
|
1253
|
+
structured_delegate: clamp_max.out
|
1201
1254
|
|
1202
1255
|
- func: clamp_max.Tensor(Tensor self, Tensor max) -> Tensor
|
1203
1256
|
variants: function, method
|
1204
|
-
|
1205
|
-
CompositeExplicitAutograd: clamp_max
|
1257
|
+
structured_delegate: clamp_max.Tensor_out
|
1206
1258
|
|
1207
1259
|
- func: clamp_max_(Tensor(a!) self, Scalar max) -> Tensor(a!)
|
1208
1260
|
device_check: NoCheck # TensorIterator
|
1209
1261
|
variants: function, method
|
1210
|
-
|
1211
|
-
CompositeExplicitAutograd: clamp_max_
|
1262
|
+
structured_delegate: clamp_max.out
|
1212
1263
|
|
1213
1264
|
- func: clamp_max_.Tensor(Tensor(a!) self, Tensor max) -> Tensor(a!)
|
1214
1265
|
variants: function, method
|
1215
|
-
|
1216
|
-
CompositeExplicitAutograd: clamp_max_
|
1266
|
+
structured_delegate: clamp_max.Tensor_out
|
1217
1267
|
|
1218
1268
|
- func: clamp_max.out(Tensor self, Scalar max, *, Tensor(a!) out) -> Tensor(a!)
|
1219
1269
|
device_check: NoCheck # TensorIterator
|
1270
|
+
structured: True
|
1271
|
+
structured_inherits: TensorIteratorBase
|
1220
1272
|
dispatch:
|
1221
1273
|
CPU, CUDA: clamp_max_out
|
1274
|
+
MPS: clamp_max_out_mps
|
1222
1275
|
|
1223
1276
|
- func: clamp_max.Tensor_out(Tensor self, Tensor max, *, Tensor(a!) out) -> Tensor(a!)
|
1277
|
+
device_check: NoCheck # TensorIterator
|
1278
|
+
structured: True
|
1279
|
+
structured_inherits: TensorIteratorBase
|
1224
1280
|
dispatch:
|
1225
|
-
CPU, CUDA:
|
1281
|
+
CPU, CUDA: clamp_max_Tensor_out
|
1282
|
+
MPS: clamp_max_Tensor_out_mps
|
1226
1283
|
|
1227
1284
|
- func: clamp_min(Tensor self, Scalar min) -> Tensor
|
1228
1285
|
device_check: NoCheck # TensorIterator
|
1229
1286
|
variants: function, method
|
1230
|
-
|
1231
|
-
CompositeExplicitAutograd: clamp_min
|
1287
|
+
structured_delegate: clamp_min.out
|
1232
1288
|
|
1233
1289
|
- func: clamp_min.Tensor(Tensor self, Tensor min) -> Tensor
|
1234
1290
|
variants: function, method
|
1235
|
-
|
1236
|
-
CompositeExplicitAutograd: clamp_min
|
1291
|
+
structured_delegate: clamp_min.Tensor_out
|
1237
1292
|
|
1238
1293
|
- func: clamp_min_(Tensor(a!) self, Scalar min) -> Tensor(a!)
|
1239
1294
|
device_check: NoCheck # TensorIterator
|
1240
1295
|
variants: function, method
|
1241
|
-
|
1242
|
-
CompositeExplicitAutograd: clamp_min_
|
1296
|
+
structured_delegate: clamp_min.out
|
1243
1297
|
|
1244
1298
|
- func: clamp_min_.Tensor(Tensor(a!) self, Tensor min) -> Tensor(a!)
|
1245
1299
|
variants: function, method
|
1246
|
-
|
1247
|
-
CompositeExplicitAutograd: clamp_min_
|
1300
|
+
structured_delegate: clamp_min.Tensor_out
|
1248
1301
|
|
1249
1302
|
- func: clamp_min.out(Tensor self, Scalar min, *, Tensor(a!) out) -> Tensor(a!)
|
1250
1303
|
device_check: NoCheck # TensorIterator
|
1304
|
+
structured: True
|
1305
|
+
structured_inherits: TensorIteratorBase
|
1251
1306
|
dispatch:
|
1252
1307
|
CPU, CUDA: clamp_min_out
|
1308
|
+
MPS: clamp_min_out_mps
|
1253
1309
|
|
1254
1310
|
- func: clamp_min.Tensor_out(Tensor self, Tensor min, *, Tensor(a!) out) -> Tensor(a!)
|
1311
|
+
device_check: NoCheck # TensorIterator
|
1312
|
+
structured: True
|
1313
|
+
structured_inherits: TensorIteratorBase
|
1255
1314
|
dispatch:
|
1256
|
-
CPU, CUDA:
|
1315
|
+
CPU, CUDA: clamp_min_Tensor_out
|
1316
|
+
MPS: clamp_min_Tensor_out_mps
|
1257
1317
|
|
1258
1318
|
# clip is an alias for clamp
|
1259
1319
|
- func: clip(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor
|
@@ -1360,23 +1420,29 @@
|
|
1360
1420
|
|
1361
1421
|
- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor
|
1362
1422
|
|
1423
|
+
- func: copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor
|
1424
|
+
variants: function
|
1425
|
+
|
1363
1426
|
- func: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
|
1364
1427
|
variants: method
|
1365
1428
|
device_check: NoCheck
|
1366
1429
|
device_guard: False
|
1367
1430
|
dispatch:
|
1368
1431
|
MkldnnCPU: copy_mkldnn_
|
1369
|
-
SparseCPU, SparseCUDA
|
1432
|
+
SparseCPU, SparseCUDA: copy_sparse_wrapper_
|
1370
1433
|
CompositeExplicitAutograd: copy_
|
1371
|
-
SparseCsrCPU, SparseCsrCUDA:
|
1434
|
+
SparseCsrCPU, SparseCsrCUDA: copy_sparse_compressed_
|
1435
|
+
autogen: copy.out
|
1372
1436
|
|
1373
1437
|
- func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
|
1374
|
-
dispatch:
|
1438
|
+
dispatch:
|
1439
|
+
MPS: _copy_from_mps
|
1375
1440
|
|
1376
1441
|
# We need this to be able to properly copy from a CPU to an XLA tensor with different sizes.
|
1377
1442
|
# See https://github.com/pytorch/xla/issues/2881
|
1378
1443
|
- func: _copy_from_and_resize(Tensor self, Tensor dst) -> Tensor
|
1379
|
-
dispatch:
|
1444
|
+
dispatch:
|
1445
|
+
MPS: _copy_from_and_resize_mps
|
1380
1446
|
|
1381
1447
|
- func: cos(Tensor self) -> Tensor
|
1382
1448
|
device_check: NoCheck # TensorIterator
|
@@ -1394,6 +1460,7 @@
|
|
1394
1460
|
structured_inherits: TensorIteratorBase
|
1395
1461
|
dispatch:
|
1396
1462
|
CPU, CUDA: cos_out
|
1463
|
+
MPS: cos_out_mps
|
1397
1464
|
|
1398
1465
|
- func: cosh(Tensor self) -> Tensor
|
1399
1466
|
device_check: NoCheck # TensorIterator
|
@@ -1411,6 +1478,7 @@
|
|
1411
1478
|
structured_inherits: TensorIteratorBase
|
1412
1479
|
dispatch:
|
1413
1480
|
CPU, CUDA: cosh_out
|
1481
|
+
MPS: cosh_out_mps
|
1414
1482
|
|
1415
1483
|
- func: cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor
|
1416
1484
|
|
@@ -1419,6 +1487,7 @@
|
|
1419
1487
|
dispatch:
|
1420
1488
|
CPU: count_nonzero_cpu
|
1421
1489
|
CUDA: count_nonzero_cuda
|
1490
|
+
MPS: count_nonzero_mps
|
1422
1491
|
|
1423
1492
|
- func: count_nonzero(Tensor self, int? dim=None) -> Tensor
|
1424
1493
|
variants: function, method
|
@@ -1457,6 +1526,14 @@
|
|
1457
1526
|
dispatch:
|
1458
1527
|
CUDA: cudnn_convolution_transpose
|
1459
1528
|
|
1529
|
+
- func: _mps_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups) -> Tensor
|
1530
|
+
dispatch:
|
1531
|
+
MPS: _mps_convolution_transpose
|
1532
|
+
|
1533
|
+
- func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool[2] output_mask) -> (Tensor, Tensor)
|
1534
|
+
dispatch:
|
1535
|
+
MPS: mps_convolution_transpose_backward
|
1536
|
+
|
1460
1537
|
- func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
|
1461
1538
|
dispatch:
|
1462
1539
|
CUDA: cudnn_convolution_relu
|
@@ -1679,6 +1756,7 @@
|
|
1679
1756
|
structured_inherits: TensorIteratorBase
|
1680
1757
|
dispatch:
|
1681
1758
|
CPU, CUDA: div_out
|
1759
|
+
MPS: div_out_mps
|
1682
1760
|
SparseCPU, SparseCUDA: div_out_sparse_zerodim
|
1683
1761
|
|
1684
1762
|
- func: div.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor
|
@@ -1701,6 +1779,7 @@
|
|
1701
1779
|
structured_inherits: TensorIteratorBase
|
1702
1780
|
dispatch:
|
1703
1781
|
CPU, CUDA: div_out_mode
|
1782
|
+
MPS: div_out_mode_mps
|
1704
1783
|
SparseCPU, SparseCUDA: div_out_sparse_zerodim
|
1705
1784
|
|
1706
1785
|
# For C++ only, until we have conversion from C++ numbers to Tensor
|
@@ -1715,6 +1794,7 @@
|
|
1715
1794
|
variants: method
|
1716
1795
|
dispatch:
|
1717
1796
|
CompositeExplicitAutograd: div_
|
1797
|
+
autogen: div.Scalar_out
|
1718
1798
|
|
1719
1799
|
- func: div.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor
|
1720
1800
|
variants: function, method
|
@@ -1725,6 +1805,7 @@
|
|
1725
1805
|
variants: method
|
1726
1806
|
dispatch:
|
1727
1807
|
CompositeExplicitAutograd: div_
|
1808
|
+
autogen: div.Scalar_mode_out
|
1728
1809
|
|
1729
1810
|
# divide, alias for div
|
1730
1811
|
- func: divide.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -1780,6 +1861,7 @@
|
|
1780
1861
|
dispatch:
|
1781
1862
|
CPU: dot
|
1782
1863
|
CUDA: dot_cuda
|
1864
|
+
MPS: dot_mps
|
1783
1865
|
|
1784
1866
|
- func: dot.out(Tensor self, Tensor tensor, *, Tensor(a!) out) -> Tensor(a!)
|
1785
1867
|
dispatch:
|
@@ -1800,6 +1882,7 @@
|
|
1800
1882
|
- func: embedding(Tensor weight, Tensor indices, int padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor
|
1801
1883
|
dispatch:
|
1802
1884
|
CompositeExplicitAutograd: embedding
|
1885
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_embedding
|
1803
1886
|
|
1804
1887
|
- func: embedding_backward(Tensor grad, Tensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq, bool sparse) -> Tensor
|
1805
1888
|
|
@@ -1807,11 +1890,13 @@
|
|
1807
1890
|
dispatch:
|
1808
1891
|
CPU: embedding_dense_backward_cpu
|
1809
1892
|
CUDA: embedding_dense_backward_cuda
|
1893
|
+
MPS: embedding_dense_backward_mps
|
1810
1894
|
|
1811
1895
|
- func: embedding_renorm_(Tensor(a!) self, Tensor indices, float max_norm, float norm_type) -> Tensor(a!)
|
1812
1896
|
dispatch:
|
1813
1897
|
CPU: embedding_renorm_cpu_
|
1814
1898
|
CUDA: embedding_renorm_cuda_
|
1899
|
+
autogen: embedding_renorm.functional, embedding_renorm.out
|
1815
1900
|
|
1816
1901
|
- func: embedding_sparse_backward(Tensor grad, Tensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq) -> Tensor
|
1817
1902
|
|
@@ -1872,10 +1957,12 @@
|
|
1872
1957
|
dispatch:
|
1873
1958
|
CPU: empty_cpu
|
1874
1959
|
CUDA: empty_cuda
|
1960
|
+
MPS: empty_mps
|
1875
1961
|
Meta: empty_meta
|
1876
1962
|
MkldnnCPU: empty_mkldnn
|
1877
1963
|
SparseCPU, SparseCUDA: empty_sparse
|
1878
|
-
SparseCsrCPU, SparseCsrCUDA:
|
1964
|
+
SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
|
1965
|
+
QuantizedCPU, QuantizedCUDA: empty_unknown_quantized
|
1879
1966
|
|
1880
1967
|
# We do not make new_empty a composite that calls into new_empty_strided, as the strided version
|
1881
1968
|
# is significantly more difficult to implement by different backends
|
@@ -1920,8 +2007,20 @@
|
|
1920
2007
|
dispatch:
|
1921
2008
|
CPU, Meta: resize_
|
1922
2009
|
CUDA: resize_cuda_
|
2010
|
+
MPS: resize_mps_
|
1923
2011
|
QuantizedCPU: quantized_resize_cpu_
|
1924
2012
|
SparseCsrCPU, SparseCsrCUDA: resize_sparse_csr_
|
2013
|
+
autogen: resize.functional, resize.out
|
2014
|
+
|
2015
|
+
# This is a utility function to enable users to resize out tensor while registering kernels for out variants.
|
2016
|
+
# Eventually, we can consider exposing `resize_output` as a public API to ship it with python op registration
|
2017
|
+
# to make it easy to register out variants for ops.
|
2018
|
+
- func: _resize_output_(Tensor(a!) self, int[] size, Device device) -> Tensor(a!)
|
2019
|
+
use_const_ref_for_mutable_tensors: True
|
2020
|
+
variants: function
|
2021
|
+
dispatch:
|
2022
|
+
Meta: _resize_output_
|
2023
|
+
autogen: _resize_output.functional, _resize_output.out
|
1925
2024
|
|
1926
2025
|
- func: empty_quantized(int[] size, Tensor qtensor, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
1927
2026
|
category_override: factory
|
@@ -1938,6 +2037,7 @@
|
|
1938
2037
|
device_guard: False
|
1939
2038
|
dispatch:
|
1940
2039
|
CompositeExplicitAutograd: empty_like
|
2040
|
+
QuantizedCPU, QuantizedCUDA: empty_like_quantized
|
1941
2041
|
SparseCPU, SparseCUDA: empty_like_sparse_coo
|
1942
2042
|
SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
|
1943
2043
|
|
@@ -1945,7 +2045,9 @@
|
|
1945
2045
|
dispatch:
|
1946
2046
|
CPU: empty_strided_cpu
|
1947
2047
|
CUDA: empty_strided_cuda
|
2048
|
+
MPS: empty_strided_mps
|
1948
2049
|
Meta: empty_strided_meta
|
2050
|
+
QuantizedCPU, QuantizedCUDA: empty_strided_unknown_quantized
|
1949
2051
|
|
1950
2052
|
- func: erf(Tensor self) -> Tensor
|
1951
2053
|
device_check: NoCheck # TensorIterator
|
@@ -1969,6 +2071,7 @@
|
|
1969
2071
|
structured_inherits: TensorIteratorBase
|
1970
2072
|
dispatch:
|
1971
2073
|
CPU, CUDA: erf_out
|
2074
|
+
MPS: erf_out_mps
|
1972
2075
|
SparseCPU, SparseCUDA: erf_sparse_out
|
1973
2076
|
SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_out
|
1974
2077
|
|
@@ -2005,6 +2108,7 @@
|
|
2005
2108
|
structured_inherits: TensorIteratorBase
|
2006
2109
|
dispatch:
|
2007
2110
|
CPU, CUDA: exp_out
|
2111
|
+
MPS: exp_out_mps
|
2008
2112
|
|
2009
2113
|
- func: exp2(Tensor self) -> Tensor
|
2010
2114
|
structured_delegate: exp2.out
|
@@ -2019,6 +2123,7 @@
|
|
2019
2123
|
structured_inherits: TensorIteratorBase
|
2020
2124
|
dispatch:
|
2021
2125
|
CPU, CUDA: exp2_out
|
2126
|
+
MPS: exp2_out_mps
|
2022
2127
|
|
2023
2128
|
- func: expm1(Tensor self) -> Tensor
|
2024
2129
|
device_check: NoCheck # TensorIterator
|
@@ -2045,6 +2150,13 @@
|
|
2045
2150
|
SparseCPU, SparseCUDA: expm1_sparse_out
|
2046
2151
|
SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_out
|
2047
2152
|
|
2153
|
+
- func: expand.SymInt(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)
|
2154
|
+
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
|
2155
|
+
device_check: NoCheck
|
2156
|
+
device_guard: False
|
2157
|
+
dispatch:
|
2158
|
+
CompositeExplicitAutograd: expand_symint
|
2159
|
+
|
2048
2160
|
- func: expand(Tensor(a) self, int[] size, *, bool implicit=False) -> Tensor(a)
|
2049
2161
|
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
|
2050
2162
|
device_check: NoCheck
|
@@ -2065,11 +2177,13 @@
|
|
2065
2177
|
dispatch:
|
2066
2178
|
CPU: eye_out_cpu
|
2067
2179
|
CUDA: eye_out_cuda
|
2180
|
+
MPS: eye_out_mps
|
2068
2181
|
|
2069
2182
|
- func: eye.m_out(int n, int m, *, Tensor(a!) out) -> Tensor(a!)
|
2070
2183
|
dispatch:
|
2071
2184
|
CPU: eye_out_cpu
|
2072
2185
|
CUDA: eye_out_cuda
|
2186
|
+
MPS: eye_out_mps
|
2073
2187
|
|
2074
2188
|
- func: flatten.using_ints(Tensor(a) self, int start_dim=0, int end_dim=-1) -> Tensor(a)
|
2075
2189
|
variants: function, method
|
@@ -2089,21 +2203,36 @@
|
|
2089
2203
|
- func: unflatten.Dimname(Tensor(a) self, Dimname dim, int[] sizes, Dimname[] names) -> Tensor(a)
|
2090
2204
|
variants: method
|
2091
2205
|
|
2206
|
+
- func: fill.Scalar(Tensor self, Scalar value) -> Tensor
|
2207
|
+
variants: function
|
2208
|
+
dispatch:
|
2209
|
+
CompositeExplicitAutograd: fill
|
2210
|
+
|
2211
|
+
- func: fill.Tensor(Tensor self, Tensor value) -> Tensor
|
2212
|
+
variants: function
|
2213
|
+
dispatch:
|
2214
|
+
CompositeExplicitAutograd: fill
|
2215
|
+
|
2092
2216
|
- func: fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!)
|
2093
2217
|
device_check: NoCheck # TensorIterator
|
2094
2218
|
variants: function, method
|
2095
2219
|
dispatch:
|
2096
2220
|
CPU, CUDA: fill_
|
2221
|
+
MPS: fill_scalar_mps
|
2097
2222
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
2098
2223
|
Meta: fill_meta_
|
2224
|
+
SparseCsrCPU, SparseCsrCUDA: fill_sparse_csr_
|
2225
|
+
autogen: fill.Scalar_out
|
2099
2226
|
|
2100
2227
|
- func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
|
2101
2228
|
device_check: NoCheck # TensorIterator
|
2102
2229
|
variants: function, method
|
2103
2230
|
dispatch:
|
2104
2231
|
CPU, CUDA: fill_
|
2232
|
+
MPS: fill_tensor_mps_
|
2105
2233
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
2106
2234
|
Meta: fill_meta_
|
2235
|
+
autogen: fill.Tensor_out
|
2107
2236
|
|
2108
2237
|
- func: floor(Tensor self) -> Tensor
|
2109
2238
|
device_check: NoCheck # TensorIterator
|
@@ -2129,6 +2258,7 @@
|
|
2129
2258
|
structured_inherits: TensorIteratorBase
|
2130
2259
|
dispatch:
|
2131
2260
|
CPU, CUDA: floor_out
|
2261
|
+
MPS: floor_out_mps
|
2132
2262
|
SparseCPU, SparseCUDA: floor_sparse_out
|
2133
2263
|
SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_out
|
2134
2264
|
|
@@ -2220,10 +2350,12 @@
|
|
2220
2350
|
variants: function, method
|
2221
2351
|
|
2222
2352
|
# NOTE [ grid_sampler Native Functions ]
|
2223
|
-
# `grid_sampler`
|
2224
|
-
# `cudnn_grid_sampler`, `grid_sampler_2d`, or `grid_sampler_3d`, each of
|
2225
|
-
# has the corresponding backward defined as native functions as well.
|
2226
|
-
#
|
2353
|
+
# `grid_sampler` is _supposed to_ do all the shape checking and then dispatch to
|
2354
|
+
# one of `cudnn_grid_sampler`, `grid_sampler_2d`, or `grid_sampler_3d`, each of
|
2355
|
+
# which has the corresponding backward defined as native functions as well.
|
2356
|
+
# However, we do shape checking everywhere for now since each of the mentioned
|
2357
|
+
# functions can be called directly, which will lead to crashes otherwise.
|
2358
|
+
# See https://github.com/pytorch/pytorch/issues/73187 for more information.
|
2227
2359
|
#
|
2228
2360
|
# There is also _grid_sampler_2d_backward_cpu_fallback which is an
|
2229
2361
|
# implementation detail of grid_sampler_2d and is only exposed here for testing
|
@@ -2261,7 +2393,10 @@
|
|
2261
2393
|
CPU: grid_sampler_3d_cpu
|
2262
2394
|
CUDA: grid_sampler_3d_cuda
|
2263
2395
|
|
2264
|
-
|
2396
|
+
# `grid_sampler_3d_backward` takes in `output_mask` to optimize performance for
|
2397
|
+
# the case where `input` doesn't require gradient. Gradient for `grid` is always
|
2398
|
+
# computed (only `output_mask[0]` is checked by the implementations).
|
2399
|
+
- func: grid_sampler_3d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask) -> (Tensor, Tensor)
|
2265
2400
|
dispatch:
|
2266
2401
|
CPU: grid_sampler_3d_backward_cpu
|
2267
2402
|
CUDA: grid_sampler_3d_backward_cuda
|
@@ -2355,15 +2490,21 @@
|
|
2355
2490
|
# - Tensor Tensor::index(ArrayRef<TensorIndex> indices)
|
2356
2491
|
# - Tensor Tensor::index(std::initializer_list<TensorIndex> indices)
|
2357
2492
|
|
2493
|
+
- func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
|
2494
|
+
structured: True
|
2495
|
+
variants: function
|
2496
|
+
precomputed:
|
2497
|
+
- dim -> int dim
|
2498
|
+
dispatch:
|
2499
|
+
CPU, CUDA: index_copy_out
|
2500
|
+
|
2358
2501
|
- func: index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
|
2359
2502
|
variants: method
|
2360
|
-
|
2361
|
-
CompositeExplicitAutograd: index_copy_
|
2503
|
+
structured_delegate: index_copy.out
|
2362
2504
|
|
2363
2505
|
- func: index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor
|
2364
2506
|
variants: function, method
|
2365
|
-
|
2366
|
-
CompositeExplicitAutograd: index_copy
|
2507
|
+
structured_delegate: index_copy.out
|
2367
2508
|
|
2368
2509
|
- func: index_copy_.dimname(Tensor(a!) self, Dimname dim, Tensor index, Tensor source) -> Tensor(a!)
|
2369
2510
|
variants: method
|
@@ -2376,6 +2517,7 @@
|
|
2376
2517
|
variants: function, method
|
2377
2518
|
dispatch:
|
2378
2519
|
CompositeExplicitAutograd: index_put_
|
2520
|
+
autogen: index_put.out
|
2379
2521
|
# NB: The following functions are declared in aten/src/ATen/templates/TensorBody.h and defined in aten/src/ATen/TensorIndexing.cpp:
|
2380
2522
|
# - Tensor & Tensor::index_put_(ArrayRef<TensorIndex> indices, Tensor const & rhs)
|
2381
2523
|
# - Tensor & Tensor::index_put_(ArrayRef<TensorIndex> indices, Scalar v)
|
@@ -2393,6 +2535,7 @@
|
|
2393
2535
|
variants: function
|
2394
2536
|
dispatch:
|
2395
2537
|
CPU, CUDA: _index_put_impl_
|
2538
|
+
autogen: _index_put_impl.functional, _index_put_impl.out
|
2396
2539
|
|
2397
2540
|
- func: instance_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool use_input_stats, float momentum, float eps, bool cudnn_enabled) -> Tensor
|
2398
2541
|
variants: function
|
@@ -2444,7 +2587,7 @@
|
|
2444
2587
|
device_check: NoCheck
|
2445
2588
|
device_guard: False
|
2446
2589
|
dispatch:
|
2447
|
-
CPU, CUDA: isnan
|
2590
|
+
CPU, CUDA, MPS: isnan
|
2448
2591
|
SparseCPU, SparseCUDA: isnan_sparse
|
2449
2592
|
SparseCsrCPU, SparseCsrCUDA: isnan_sparse_csr
|
2450
2593
|
|
@@ -2540,17 +2683,14 @@
|
|
2540
2683
|
dispatch:
|
2541
2684
|
CPU: layer_norm_cpu
|
2542
2685
|
CUDA: layer_norm_cuda
|
2686
|
+
MPS: layer_norm_mps
|
2543
2687
|
CompositeImplicitAutograd: math_native_layer_norm
|
2544
2688
|
|
2545
|
-
- func: _native_multi_head_self_attention(Tensor query, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor
|
2546
|
-
dispatch:
|
2547
|
-
CPU: multi_head_self_attention_cpu
|
2548
|
-
CUDA: multi_head_self_attention_cuda
|
2549
|
-
|
2550
2689
|
- func: native_layer_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
2551
2690
|
dispatch:
|
2552
2691
|
CPU: layer_norm_backward_cpu
|
2553
2692
|
CUDA: layer_norm_backward_cuda
|
2693
|
+
MPS: layer_norm_backward_mps
|
2554
2694
|
|
2555
2695
|
- func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
|
2556
2696
|
variants: function, method
|
@@ -2575,6 +2715,14 @@
|
|
2575
2715
|
- func: linear.out(Tensor input, Tensor weight, Tensor? bias=None, *, Tensor(a!) out) -> Tensor(a!)
|
2576
2716
|
python_module: nn
|
2577
2717
|
|
2718
|
+
# TODO: Add this function to MPS dispatch key so that we avoid declaring it in
|
2719
|
+
# native_functions.yaml
|
2720
|
+
# https://github.com/pytorch/pytorch/issues/77394
|
2721
|
+
- func: _mps_linear(Tensor self, Tensor weight, Tensor? bias=None) -> Tensor
|
2722
|
+
python_module: nn
|
2723
|
+
dispatch:
|
2724
|
+
MPS: _mps_linear
|
2725
|
+
|
2578
2726
|
- func: mkldnn_linear(Tensor self, Tensor weight, Tensor? bias=None) -> Tensor
|
2579
2727
|
python_module: nn
|
2580
2728
|
dispatch:
|
@@ -2592,6 +2740,18 @@
|
|
2592
2740
|
dispatch:
|
2593
2741
|
MkldnnCPU: mkldnn_linear_backward
|
2594
2742
|
|
2743
|
+
- func: _mps_linear_backward_input(int[] input_size, Tensor grad_output, Tensor weight) -> Tensor
|
2744
|
+
dispatch:
|
2745
|
+
MPS: _mps_linear_backward_input
|
2746
|
+
|
2747
|
+
- func: _mps_linear_backward_weights(Tensor grad_output, Tensor input, Tensor weight, bool bias_defined) -> (Tensor, Tensor)
|
2748
|
+
dispatch:
|
2749
|
+
MPS: _mps_linear_backward_weights
|
2750
|
+
|
2751
|
+
- func: mps_linear_backward(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
2752
|
+
dispatch:
|
2753
|
+
MPS: mps_linear_backward
|
2754
|
+
|
2595
2755
|
- func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
2596
2756
|
|
2597
2757
|
- func: fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
@@ -2622,6 +2782,7 @@
|
|
2622
2782
|
dispatch:
|
2623
2783
|
CPU, Meta: linspace_out
|
2624
2784
|
CUDA: linspace_cuda_out
|
2785
|
+
MPS: linspace_out_mps
|
2625
2786
|
|
2626
2787
|
- func: log(Tensor self) -> Tensor
|
2627
2788
|
device_check: NoCheck # TensorIterator
|
@@ -2639,6 +2800,7 @@
|
|
2639
2800
|
structured_inherits: TensorIteratorBase
|
2640
2801
|
dispatch:
|
2641
2802
|
CPU, CUDA: log_out
|
2803
|
+
MPS: log_out_mps
|
2642
2804
|
|
2643
2805
|
- func: log10(Tensor self) -> Tensor
|
2644
2806
|
device_check: NoCheck # TensorIterator
|
@@ -2658,6 +2820,7 @@
|
|
2658
2820
|
structured_inherits: TensorIteratorBase
|
2659
2821
|
dispatch:
|
2660
2822
|
CPU, CUDA: log10_out
|
2823
|
+
MPS: log10_out_mps
|
2661
2824
|
|
2662
2825
|
- func: log1p(Tensor self) -> Tensor
|
2663
2826
|
device_check: NoCheck # TensorIterator
|
@@ -2681,6 +2844,7 @@
|
|
2681
2844
|
structured_inherits: TensorIteratorBase
|
2682
2845
|
dispatch:
|
2683
2846
|
CPU, CUDA: log1p_out
|
2847
|
+
MPS: log1p_out_mps
|
2684
2848
|
SparseCPU, SparseCUDA: log1p_sparse_out
|
2685
2849
|
SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_out
|
2686
2850
|
|
@@ -2700,12 +2864,14 @@
|
|
2700
2864
|
structured_inherits: TensorIteratorBase
|
2701
2865
|
dispatch:
|
2702
2866
|
CPU, CUDA: log2_out
|
2867
|
+
MPS: log2_out_mps
|
2703
2868
|
|
2704
2869
|
- func: logaddexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
2705
2870
|
structured: True
|
2706
2871
|
structured_inherits: TensorIteratorBase
|
2707
2872
|
dispatch:
|
2708
2873
|
CPU, CUDA: logaddexp_out
|
2874
|
+
MPS: logaddexp_out_mps
|
2709
2875
|
|
2710
2876
|
- func: logaddexp(Tensor self, Tensor other) -> Tensor
|
2711
2877
|
variants: method, function
|
@@ -2718,6 +2884,7 @@
|
|
2718
2884
|
structured_inherits: TensorIteratorBase
|
2719
2885
|
dispatch:
|
2720
2886
|
CPU, CUDA: logaddexp2_out
|
2887
|
+
MPS: logaddexp2_out_mps
|
2721
2888
|
|
2722
2889
|
- func: logaddexp2(Tensor self, Tensor other) -> Tensor
|
2723
2890
|
variants: method, function
|
@@ -2791,6 +2958,11 @@
|
|
2791
2958
|
- func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
|
2792
2959
|
variants: function, method
|
2793
2960
|
|
2961
|
+
- func: log_softmax.int_out(Tensor self, int dim, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)
|
2962
|
+
variants: function
|
2963
|
+
dispatch:
|
2964
|
+
CompositeExplicitAutograd: log_softmax_out
|
2965
|
+
|
2794
2966
|
- func: log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
|
2795
2967
|
variants: function, method
|
2796
2968
|
|
@@ -2802,6 +2974,7 @@
|
|
2802
2974
|
dispatch:
|
2803
2975
|
CPU: log_softmax_cpu_out
|
2804
2976
|
CUDA: log_softmax_cuda_out
|
2977
|
+
MPS: log_softmax_mps_out
|
2805
2978
|
|
2806
2979
|
- func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
|
2807
2980
|
structured_delegate: _log_softmax_backward_data.out
|
@@ -2811,6 +2984,7 @@
|
|
2811
2984
|
dispatch:
|
2812
2985
|
CPU: log_softmax_backward_cpu_out
|
2813
2986
|
CUDA: log_softmax_backward_cuda_out
|
2987
|
+
MPS: log_softmax_backward_mps_out
|
2814
2988
|
|
2815
2989
|
- func: _logcumsumexp(Tensor self, int dim) -> Tensor
|
2816
2990
|
dispatch:
|
@@ -2922,6 +3096,7 @@
|
|
2922
3096
|
- dim -> int dim
|
2923
3097
|
dispatch:
|
2924
3098
|
CPU, CUDA: max_out
|
3099
|
+
MPS: max_out_mps
|
2925
3100
|
|
2926
3101
|
- func: max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
2927
3102
|
device_check: NoCheck # TensorIterator
|
@@ -2937,10 +3112,10 @@
|
|
2937
3112
|
|
2938
3113
|
- func: amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
|
2939
3114
|
variants: function, method
|
2940
|
-
|
2941
|
-
CompositeExplicitAutograd: amax
|
3115
|
+
structured_delegate: amax.out
|
2942
3116
|
|
2943
3117
|
- func: amax.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
3118
|
+
structured: True
|
2944
3119
|
dispatch:
|
2945
3120
|
CPU, CUDA: amax_out
|
2946
3121
|
|
@@ -2951,6 +3126,17 @@
|
|
2951
3126
|
|
2952
3127
|
- func: max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
|
2953
3128
|
|
3129
|
+
# TODO: Add this function to MPS dispatch key so that we avoid declaring it in
|
3130
|
+
# native_functions.yaml
|
3131
|
+
# https://github.com/pytorch/pytorch/issues/77394
|
3132
|
+
- func: _mps_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
|
3133
|
+
dispatch:
|
3134
|
+
MPS: _mps_max_pool2d
|
3135
|
+
|
3136
|
+
- func: mps_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
|
3137
|
+
dispatch:
|
3138
|
+
MPS: mps_max_pool2d_backward
|
3139
|
+
|
2954
3140
|
- func: mkldnn_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
|
2955
3141
|
dispatch:
|
2956
3142
|
MkldnnCPU: mkldnn_max_pool2d
|
@@ -2974,6 +3160,7 @@
|
|
2974
3160
|
- func: quantized_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
|
2975
3161
|
dispatch:
|
2976
3162
|
QuantizedCPU: quantized_max_pool2d
|
3163
|
+
QuantizedCUDA: quantized_max_pool2d_cudnn
|
2977
3164
|
|
2978
3165
|
- func: max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
|
2979
3166
|
|
@@ -2997,6 +3184,7 @@
|
|
2997
3184
|
device_check: NoCheck # TensorIterator
|
2998
3185
|
dispatch:
|
2999
3186
|
CPU, CUDA: mean_out
|
3187
|
+
MPS: mean_out_mps
|
3000
3188
|
QuantizedCPU: mean_out_quantized_cpu
|
3001
3189
|
|
3002
3190
|
- func: mean.names_dim(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
@@ -3069,6 +3257,7 @@
|
|
3069
3257
|
- dim -> int dim
|
3070
3258
|
dispatch:
|
3071
3259
|
CPU, CUDA: min_out
|
3260
|
+
MPS: min_out_mps
|
3072
3261
|
|
3073
3262
|
- func: min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
3074
3263
|
device_check: NoCheck # TensorIterator
|
@@ -3079,13 +3268,24 @@
|
|
3079
3268
|
|
3080
3269
|
- func: amin(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
|
3081
3270
|
variants: function, method
|
3082
|
-
|
3083
|
-
CompositeExplicitAutograd: amin
|
3271
|
+
structured_delegate: amin.out
|
3084
3272
|
|
3085
3273
|
- func: amin.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
3274
|
+
structured: True
|
3086
3275
|
dispatch:
|
3087
3276
|
CPU, CUDA: amin_out
|
3088
3277
|
|
3278
|
+
# TODO: Add this function to MPS dispatch key so that we avoid declaring it in
|
3279
|
+
# native_functions.yaml
|
3280
|
+
# https://github.com/pytorch/pytorch/issues/77394
|
3281
|
+
- func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
|
3282
|
+
dispatch:
|
3283
|
+
MPS: _mps_convolution
|
3284
|
+
|
3285
|
+
- func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
3286
|
+
dispatch:
|
3287
|
+
MPS: mps_convolution_backward
|
3288
|
+
|
3089
3289
|
- func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
|
3090
3290
|
dispatch:
|
3091
3291
|
CompositeExplicitAutograd: mkldnn_convolution
|
@@ -3130,10 +3330,12 @@
|
|
3130
3330
|
dispatch:
|
3131
3331
|
CPU: mm_out_cpu
|
3132
3332
|
CUDA: mm_out_cuda
|
3333
|
+
MPS: mm_out_mps
|
3133
3334
|
SparseCPU, SparseCUDA: _sparse_mm_out
|
3134
3335
|
SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm_out
|
3135
3336
|
|
3136
3337
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
3338
|
+
python_module: sparse
|
3137
3339
|
|
3138
3340
|
- func: _sparse_sparse_matmul(Tensor self, Tensor other) -> Tensor
|
3139
3341
|
dispatch:
|
@@ -3165,8 +3367,10 @@
|
|
3165
3367
|
variants: function, method
|
3166
3368
|
dispatch:
|
3167
3369
|
SparseCPU, SparseCUDA: mul_sparse
|
3370
|
+
SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr
|
3168
3371
|
MkldnnCPU: mkldnn_mul
|
3169
3372
|
ZeroTensor: mul_zerotensor
|
3373
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
|
3170
3374
|
|
3171
3375
|
- func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
3172
3376
|
device_check: NoCheck # TensorIterator
|
@@ -3174,7 +3378,9 @@
|
|
3174
3378
|
variants: method
|
3175
3379
|
dispatch:
|
3176
3380
|
SparseCPU, SparseCUDA: mul_sparse_
|
3381
|
+
SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr_
|
3177
3382
|
MkldnnCPU: mkldnn_mul_
|
3383
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Tensor
|
3178
3384
|
|
3179
3385
|
- func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
3180
3386
|
device_check: NoCheck # TensorIterator
|
@@ -3182,8 +3388,10 @@
|
|
3182
3388
|
structured_inherits: TensorIteratorBase
|
3183
3389
|
dispatch:
|
3184
3390
|
CPU, CUDA: mul_out
|
3391
|
+
MPS: mul_out_mps
|
3185
3392
|
SparseCPU: mul_out_sparse_cpu
|
3186
3393
|
SparseCUDA: mul_out_sparse_cuda
|
3394
|
+
SparseCsrCPU, SparseCsrCUDA: mul_out_sparse_csr
|
3187
3395
|
MkldnnCPU: mkldnn_mul_out
|
3188
3396
|
|
3189
3397
|
# For C++ only, until we have conversion from C++ numbers to Tensor
|
@@ -3192,12 +3400,15 @@
|
|
3192
3400
|
variants: function, method
|
3193
3401
|
dispatch:
|
3194
3402
|
CompositeExplicitAutograd: mul
|
3403
|
+
SparseCsrCPU, SparseCsrCUDA: mul_scalar_sparse_csr
|
3195
3404
|
|
3196
3405
|
- func: mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
3197
3406
|
device_check: NoCheck # TensorIterator
|
3198
3407
|
variants: method
|
3199
3408
|
dispatch:
|
3200
3409
|
CompositeExplicitAutograd: mul_
|
3410
|
+
SparseCsrCPU, SparseCsrCUDA: mul__scalar_sparse_csr
|
3411
|
+
autogen: mul.Scalar_out
|
3201
3412
|
|
3202
3413
|
# multiply, alias for mul
|
3203
3414
|
- func: multiply.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -3246,6 +3457,12 @@
|
|
3246
3457
|
CPU: narrow_copy_dense_cpu
|
3247
3458
|
SparseCPU, SparseCUDA: narrow_copy_sparse
|
3248
3459
|
CompositeExplicitAutograd: narrow_copy_dense
|
3460
|
+
tags: view_copy
|
3461
|
+
|
3462
|
+
- func: narrow_copy.SymInt(Tensor self, int dim, int start, SymInt length) -> Tensor
|
3463
|
+
variants: function, method
|
3464
|
+
dispatch:
|
3465
|
+
CompositeExplicitAutograd: narrow_copy_symint
|
3249
3466
|
|
3250
3467
|
- func: narrow_copy.out(Tensor self, int dim, int start, int length, *, Tensor(a!) out) -> Tensor(a!)
|
3251
3468
|
dispatch:
|
@@ -3265,11 +3482,13 @@
|
|
3265
3482
|
dispatch:
|
3266
3483
|
CPU: batch_norm_cpu
|
3267
3484
|
CUDA: batch_norm_cuda
|
3485
|
+
MPS: batch_norm_mps
|
3268
3486
|
MkldnnCPU: mkldnn_batch_norm
|
3269
3487
|
|
3270
3488
|
- func: native_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!))
|
3271
3489
|
dispatch:
|
3272
3490
|
CUDA: batch_norm_cuda_out
|
3491
|
+
MPS: batch_norm_mps_out
|
3273
3492
|
|
3274
3493
|
- func: batch_norm_stats(Tensor input, float eps) -> (Tensor, Tensor)
|
3275
3494
|
dispatch:
|
@@ -3296,6 +3515,7 @@
|
|
3296
3515
|
dispatch:
|
3297
3516
|
CPU: batch_norm_backward_cpu
|
3298
3517
|
CUDA: batch_norm_backward_cuda
|
3518
|
+
MPS: batch_norm_backward_mps
|
3299
3519
|
MkldnnCPU: mkldnn_batch_norm_backward
|
3300
3520
|
|
3301
3521
|
- func: batch_norm_backward_reduce(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, bool input_g, bool weight_g, bool bias_g) -> (Tensor, Tensor, Tensor, Tensor)
|
@@ -3363,6 +3583,7 @@
|
|
3363
3583
|
variants: function, method
|
3364
3584
|
dispatch:
|
3365
3585
|
CompositeExplicitAutograd: permute
|
3586
|
+
MPS: permute_mps
|
3366
3587
|
|
3367
3588
|
- func: movedim.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a)
|
3368
3589
|
variants: function, method
|
@@ -3403,8 +3624,14 @@
|
|
3403
3624
|
variants: function, method
|
3404
3625
|
|
3405
3626
|
- func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
|
3627
|
+
dispatch:
|
3628
|
+
CPU: pixel_shuffle_cpu
|
3629
|
+
CompositeExplicitAutograd: math_pixel_shuffle
|
3406
3630
|
|
3407
3631
|
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
|
3632
|
+
dispatch:
|
3633
|
+
CPU: pixel_unshuffle_cpu
|
3634
|
+
CompositeExplicitAutograd: math_pixel_unshuffle
|
3408
3635
|
|
3409
3636
|
- func: channel_shuffle(Tensor self, int groups) -> Tensor
|
3410
3637
|
dispatch:
|
@@ -3420,6 +3647,7 @@
|
|
3420
3647
|
variants: method
|
3421
3648
|
dispatch:
|
3422
3649
|
CUDA: is_pinned_cuda
|
3650
|
+
MPS: is_pinned_mps
|
3423
3651
|
CompositeExplicitAutograd: is_pinned_default
|
3424
3652
|
|
3425
3653
|
# TODO: add a copy kwarg that guarantees that the tensor is put into fresh
|
@@ -3431,6 +3659,7 @@
|
|
3431
3659
|
- func: _pin_memory(Tensor self, Device? device=None) -> Tensor
|
3432
3660
|
dispatch:
|
3433
3661
|
CUDA: _pin_memory_cuda
|
3662
|
+
MPS: _pin_memory_mps
|
3434
3663
|
|
3435
3664
|
- func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
|
3436
3665
|
variants: function, method
|
@@ -3566,6 +3795,7 @@
|
|
3566
3795
|
structured_inherits: TensorIteratorBase
|
3567
3796
|
dispatch:
|
3568
3797
|
CPU, CUDA: reciprocal_out
|
3798
|
+
MPS: reciprocal_out_mps
|
3569
3799
|
|
3570
3800
|
- func: neg(Tensor self) -> Tensor
|
3571
3801
|
device_check: NoCheck # TensorIterator
|
@@ -3589,6 +3819,7 @@
|
|
3589
3819
|
structured_inherits: TensorIteratorBase
|
3590
3820
|
dispatch:
|
3591
3821
|
CPU, CUDA: neg_out
|
3822
|
+
MPS: neg_out_mps
|
3592
3823
|
SparseCPU, SparseCUDA: neg_out_sparse
|
3593
3824
|
SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_out
|
3594
3825
|
|
@@ -3605,6 +3836,7 @@
|
|
3605
3836
|
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
|
3606
3837
|
dispatch:
|
3607
3838
|
CompositeExplicitAutograd: repeat
|
3839
|
+
MPS: repeat_mps
|
3608
3840
|
|
3609
3841
|
- func: repeat_interleave.Tensor(Tensor repeats, *, int? output_size=None) -> Tensor
|
3610
3842
|
variants: function
|
@@ -3631,7 +3863,7 @@
|
|
3631
3863
|
device_check: NoCheck
|
3632
3864
|
device_guard: False
|
3633
3865
|
dispatch:
|
3634
|
-
CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor: _reshape_alias
|
3866
|
+
CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor, MPS: _reshape_alias
|
3635
3867
|
# We don't need to support mkldnn since this is handled explicitly by the reshape operator.
|
3636
3868
|
|
3637
3869
|
- func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
|
@@ -3668,6 +3900,7 @@
|
|
3668
3900
|
dispatch:
|
3669
3901
|
CPU: round_out
|
3670
3902
|
CUDA: round_out
|
3903
|
+
MPS: round_out_mps
|
3671
3904
|
SparseCPU, SparseCUDA: round_sparse_out
|
3672
3905
|
SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_out
|
3673
3906
|
|
@@ -3700,16 +3933,21 @@
|
|
3700
3933
|
variants: function, method
|
3701
3934
|
dispatch:
|
3702
3935
|
CPU, CUDA: relu
|
3936
|
+
MPS: relu_mps
|
3703
3937
|
MkldnnCPU: mkldnn_relu
|
3704
3938
|
QuantizedCPU: relu_quantized_cpu
|
3939
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu
|
3705
3940
|
|
3706
3941
|
- func: relu_(Tensor(a!) self) -> Tensor(a!)
|
3707
3942
|
device_check: NoCheck # TensorIterator
|
3708
3943
|
variants: function, method
|
3709
3944
|
dispatch:
|
3710
3945
|
CPU, CUDA: relu_
|
3946
|
+
MPS: relu_mps_
|
3711
3947
|
MkldnnCPU: mkldnn_relu_
|
3712
3948
|
QuantizedCPU: relu_quantized_cpu_
|
3949
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu_
|
3950
|
+
autogen: relu.out
|
3713
3951
|
|
3714
3952
|
- func: relu6(Tensor self) -> Tensor
|
3715
3953
|
python_module: nn
|
@@ -3720,16 +3958,18 @@
|
|
3720
3958
|
- func: prelu(Tensor self, Tensor weight) -> Tensor
|
3721
3959
|
variants: function, method
|
3722
3960
|
dispatch:
|
3961
|
+
MkldnnCPU: mkldnn_prelu
|
3723
3962
|
CPU: prelu_cpu
|
3724
3963
|
CUDA: prelu_cuda
|
3725
3964
|
|
3726
3965
|
- func: prelu_backward(Tensor grad_output, Tensor self, Tensor weight) -> (Tensor, Tensor)
|
3727
3966
|
variants: function, method
|
3728
3967
|
dispatch:
|
3968
|
+
MkldnnCPU: mkldnn_prelu_backward
|
3729
3969
|
CPU: prelu_backward_cpu
|
3730
3970
|
CUDA: prelu_backward_cuda
|
3731
3971
|
|
3732
|
-
- func: gelu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
3972
|
+
- func: gelu.out(Tensor self, *, str approximate='none', Tensor(a!) out) -> Tensor(a!)
|
3733
3973
|
structured: True
|
3734
3974
|
structured_inherits: TensorIteratorBase
|
3735
3975
|
device_check: NoCheck # TensorIterator
|
@@ -3737,24 +3977,34 @@
|
|
3737
3977
|
dispatch:
|
3738
3978
|
CPU: gelu_out_cpu
|
3739
3979
|
CUDA: gelu_out_cuda
|
3980
|
+
MPS: gelu_out_mps
|
3981
|
+
|
3982
|
+
- func: gelu_(Tensor(a!) self, *, str approximate='none') -> Tensor(a!)
|
3983
|
+
structured_delegate: gelu.out
|
3984
|
+
device_check: NoCheck # TensorIterator
|
3985
|
+
python_module: nn
|
3986
|
+
dispatch:
|
3987
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
|
3740
3988
|
|
3741
|
-
- func: gelu(Tensor self) -> Tensor
|
3989
|
+
- func: gelu(Tensor self, *, str approximate='none') -> Tensor
|
3742
3990
|
structured_delegate: gelu.out
|
3743
3991
|
device_check: NoCheck # TensorIterator
|
3744
3992
|
python_module: nn
|
3745
3993
|
dispatch:
|
3746
3994
|
MkldnnCPU: mkldnn_gelu
|
3747
3995
|
QuantizedCPU: gelu_quantized_cpu
|
3996
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu
|
3748
3997
|
|
3749
|
-
- func: gelu_backward.grad_input(Tensor
|
3998
|
+
- func: gelu_backward.grad_input(Tensor grad_output, Tensor self, *, str approximate='none', Tensor(a!) grad_input) -> Tensor(a!)
|
3750
3999
|
structured: True
|
3751
4000
|
structured_inherits: TensorIteratorBase
|
3752
4001
|
python_module: nn
|
3753
4002
|
dispatch:
|
3754
4003
|
CPU: gelu_backward_out_cpu
|
3755
4004
|
CUDA: gelu_backward_out_cuda
|
4005
|
+
MPS: gelu_backward_out_mps
|
3756
4006
|
|
3757
|
-
- func: gelu_backward(Tensor
|
4007
|
+
- func: gelu_backward(Tensor grad_output, Tensor self, *, str approximate='none') -> Tensor
|
3758
4008
|
structured_delegate: gelu_backward.grad_input
|
3759
4009
|
python_module: nn
|
3760
4010
|
dispatch:
|
@@ -3804,6 +4054,7 @@
|
|
3804
4054
|
structured_inherits: TensorIteratorBase
|
3805
4055
|
dispatch:
|
3806
4056
|
CPU, CUDA: rsqrt_out
|
4057
|
+
MPS: rsqrt_out_mps
|
3807
4058
|
|
3808
4059
|
- func: select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)
|
3809
4060
|
variants: function, method
|
@@ -3816,6 +4067,7 @@
|
|
3816
4067
|
device_guard: False
|
3817
4068
|
dispatch:
|
3818
4069
|
CompositeExplicitAutograd: select
|
4070
|
+
SparseCsrCPU, SparseCsrCUDA: select_sparse_csr
|
3819
4071
|
|
3820
4072
|
- func: select_backward(Tensor grad_output, int[] input_sizes, int dim, int index) -> Tensor
|
3821
4073
|
variants: function
|
@@ -3839,6 +4091,7 @@
|
|
3839
4091
|
device_check: NoCheck # TensorIterator
|
3840
4092
|
dispatch:
|
3841
4093
|
CompositeExplicitAutograd: celu_
|
4094
|
+
autogen: celu.out
|
3842
4095
|
|
3843
4096
|
- func: silu(Tensor self) -> Tensor
|
3844
4097
|
structured_delegate: silu.out
|
@@ -3858,6 +4111,7 @@
|
|
3858
4111
|
python_module: nn
|
3859
4112
|
dispatch:
|
3860
4113
|
CPU, CUDA: silu_out
|
4114
|
+
MPS: silu_out_mps
|
3861
4115
|
|
3862
4116
|
- func: silu_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
|
3863
4117
|
structured: True
|
@@ -3865,6 +4119,7 @@
|
|
3865
4119
|
python_module: nn
|
3866
4120
|
dispatch:
|
3867
4121
|
CPU, CUDA: silu_backward_out
|
4122
|
+
MPS: silu_backward_out_mps
|
3868
4123
|
|
3869
4124
|
- func: silu_backward(Tensor grad_output, Tensor self) -> Tensor
|
3870
4125
|
structured_delegate: silu_backward.grad_input
|
@@ -3918,6 +4173,7 @@
|
|
3918
4173
|
structured_inherits: TensorIteratorBase
|
3919
4174
|
dispatch:
|
3920
4175
|
CPU, CUDA: sigmoid_out
|
4176
|
+
MPS: sigmoid_out_mps
|
3921
4177
|
|
3922
4178
|
- func: logit(Tensor self, float? eps=None) -> Tensor
|
3923
4179
|
variants: function, method
|
@@ -3955,6 +4211,7 @@
|
|
3955
4211
|
structured_inherits: TensorIteratorBase
|
3956
4212
|
dispatch:
|
3957
4213
|
CPU, CUDA: sin_out
|
4214
|
+
MPS: sin_out_mps
|
3958
4215
|
SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_out
|
3959
4216
|
SparseCPU, SparseCUDA: sin_sparse_out
|
3960
4217
|
|
@@ -3994,6 +4251,7 @@
|
|
3994
4251
|
structured_inherits: TensorIteratorBase
|
3995
4252
|
dispatch:
|
3996
4253
|
CPU, CUDA: sinh_out
|
4254
|
+
MPS: sinh_out_mps
|
3997
4255
|
SparseCPU, SparseCUDA: sinh_sparse_out
|
3998
4256
|
SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_out
|
3999
4257
|
|
@@ -4080,6 +4338,11 @@
|
|
4080
4338
|
- func: softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
|
4081
4339
|
variants: function, method
|
4082
4340
|
|
4341
|
+
- func: softmax.int_out(Tensor self, int dim, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)
|
4342
|
+
variants: function
|
4343
|
+
dispatch:
|
4344
|
+
CompositeExplicitAutograd: softmax_out
|
4345
|
+
|
4083
4346
|
- func: softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
|
4084
4347
|
variants: function, method
|
4085
4348
|
|
@@ -4093,6 +4356,7 @@
|
|
4093
4356
|
dispatch:
|
4094
4357
|
CPU: softmax_cpu_out
|
4095
4358
|
CUDA: softmax_cuda_out
|
4359
|
+
MPS: softmax_mps_out
|
4096
4360
|
|
4097
4361
|
- func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
|
4098
4362
|
structured_delegate: _softmax_backward_data.out
|
@@ -4102,6 +4366,7 @@
|
|
4102
4366
|
dispatch:
|
4103
4367
|
CPU: softmax_backward_cpu_out
|
4104
4368
|
CUDA: softmax_backward_cuda_out
|
4369
|
+
MPS: softmax_backward_mps_out
|
4105
4370
|
|
4106
4371
|
- func: unsafe_split.Tensor(Tensor self, int split_size, int dim=0) -> Tensor[]
|
4107
4372
|
variants: function, method
|
@@ -4117,6 +4382,10 @@
|
|
4117
4382
|
dispatch:
|
4118
4383
|
CompositeExplicitAutograd: split
|
4119
4384
|
|
4385
|
+
- func: split.sizes(Tensor(a -> *) self, int[] split_size, int dim=0) -> Tensor(a)[]
|
4386
|
+
variants: function, method
|
4387
|
+
device_guard: False
|
4388
|
+
|
4120
4389
|
- func: unsafe_split_with_sizes(Tensor self, int[] split_sizes, int dim=0) -> Tensor[]
|
4121
4390
|
variants: function, method
|
4122
4391
|
device_check: NoCheck
|
@@ -4154,7 +4423,7 @@
|
|
4154
4423
|
device_check: NoCheck
|
4155
4424
|
device_guard: False
|
4156
4425
|
dispatch:
|
4157
|
-
|
4426
|
+
CompositeExplicitAutograd: squeeze
|
4158
4427
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
4159
4428
|
|
4160
4429
|
- func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
|
@@ -4162,7 +4431,7 @@
|
|
4162
4431
|
device_check: NoCheck
|
4163
4432
|
device_guard: False
|
4164
4433
|
dispatch:
|
4165
|
-
|
4434
|
+
CompositeExplicitAutograd: squeeze
|
4166
4435
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
4167
4436
|
|
4168
4437
|
- func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
|
@@ -4232,12 +4501,13 @@
|
|
4232
4501
|
|
4233
4502
|
- func: dstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)
|
4234
4503
|
|
4235
|
-
#
|
4236
|
-
# missing the `pad_mode` and `center` arguments, which are taken care of at
|
4237
|
-
# `torch.functional.py`. They shall be moved here once we have mapping between
|
4238
|
-
# Python strings and C++ Enum in codegen.
|
4504
|
+
# Overload without center & pad mode, needed for forward-compatibility
|
4239
4505
|
- func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
|
4240
4506
|
variants: function, method
|
4507
|
+
cpp_no_default_args: ['hop_length', 'win_length', 'window', 'normalized']
|
4508
|
+
|
4509
|
+
- func: stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
|
4510
|
+
variants: function, method
|
4241
4511
|
|
4242
4512
|
- func: istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool? onesided=None, int? length=None, bool return_complex=False) -> Tensor
|
4243
4513
|
variants: function, method
|
@@ -4258,6 +4528,7 @@
|
|
4258
4528
|
variants: function, method
|
4259
4529
|
dispatch:
|
4260
4530
|
CompositeExplicitAutograd: sum
|
4531
|
+
SparseCsrCPU, SparseCsrCUDA: sum_csr
|
4261
4532
|
|
4262
4533
|
- func: sum.dim_IntList(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
4263
4534
|
structured_delegate: sum.IntList_out
|
@@ -4273,21 +4544,17 @@
|
|
4273
4544
|
device_check: NoCheck # TensorIterator
|
4274
4545
|
dispatch:
|
4275
4546
|
CPU, CUDA: sum_out
|
4547
|
+
MPS: sum_out_mps
|
4276
4548
|
|
4277
4549
|
- func: sum.DimnameList_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
|
4278
4550
|
device_check: NoCheck # TensorIterator
|
4279
4551
|
|
4280
|
-
- func: nansum(Tensor self, *, ScalarType? dtype=None) -> Tensor
|
4552
|
+
- func: nansum(Tensor self, int[1] dim=[], bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
4281
4553
|
variants: function, method
|
4282
4554
|
dispatch:
|
4283
4555
|
CPU, CUDA: nansum
|
4284
4556
|
|
4285
|
-
- func: nansum.
|
4286
|
-
variants: function, method
|
4287
|
-
dispatch:
|
4288
|
-
CPU, CUDA: nansum
|
4289
|
-
|
4290
|
-
- func: nansum.IntList_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
|
4557
|
+
- func: nansum.out(Tensor self, int[1] dim=[], bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
|
4291
4558
|
dispatch:
|
4292
4559
|
CPU, CUDA: nansum_out
|
4293
4560
|
|
@@ -4318,6 +4585,7 @@
|
|
4318
4585
|
structured_inherits: TensorIteratorBase
|
4319
4586
|
dispatch:
|
4320
4587
|
CPU, CUDA: sqrt_out
|
4588
|
+
MPS: sqrt_out_mps
|
4321
4589
|
SparseCPU, SparseCUDA: sqrt_sparse_out
|
4322
4590
|
SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_out
|
4323
4591
|
|
@@ -4330,8 +4598,6 @@
|
|
4330
4598
|
variants: function, method
|
4331
4599
|
|
4332
4600
|
- func: square.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
4333
|
-
dispatch:
|
4334
|
-
CPU, CUDA: square_out
|
4335
4601
|
|
4336
4602
|
- func: std(Tensor self, bool unbiased=True) -> Tensor
|
4337
4603
|
device_check: NoCheck # TensorIterator
|
@@ -4346,6 +4612,7 @@
|
|
4346
4612
|
variants: function, method
|
4347
4613
|
dispatch:
|
4348
4614
|
CPU, CUDA: std
|
4615
|
+
MPS: std_mps
|
4349
4616
|
|
4350
4617
|
- func: std_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)
|
4351
4618
|
device_check: NoCheck # TensorIterator
|
@@ -4397,6 +4664,7 @@
|
|
4397
4664
|
variants: function, method
|
4398
4665
|
dispatch:
|
4399
4666
|
CPU, CUDA: prod
|
4667
|
+
MPS: prod_mps
|
4400
4668
|
|
4401
4669
|
- func: prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
4402
4670
|
structured_delegate: prod.int_out
|
@@ -4408,6 +4676,7 @@
|
|
4408
4676
|
device_check: NoCheck # TensorIterator
|
4409
4677
|
dispatch:
|
4410
4678
|
CPU, CUDA: prod_out
|
4679
|
+
MPS: prod_out_mps
|
4411
4680
|
|
4412
4681
|
- func: prod.dim_Dimname(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
4413
4682
|
device_check: NoCheck # TensorIterator
|
@@ -4453,6 +4722,7 @@
|
|
4453
4722
|
structured_inherits: TensorIteratorBase
|
4454
4723
|
dispatch:
|
4455
4724
|
CPU, CUDA: tan_out
|
4725
|
+
MPS: tan_out_mps
|
4456
4726
|
SparseCPU, SparseCUDA: tan_sparse_out
|
4457
4727
|
SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_out
|
4458
4728
|
|
@@ -4481,6 +4751,7 @@
|
|
4481
4751
|
structured_inherits: TensorIteratorBase
|
4482
4752
|
dispatch:
|
4483
4753
|
CPU, CUDA: tanh_out
|
4754
|
+
MPS: tanh_out_mps
|
4484
4755
|
SparseCPU, SparseCUDA: tanh_sparse_out
|
4485
4756
|
SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_out
|
4486
4757
|
|
@@ -4511,12 +4782,14 @@
|
|
4511
4782
|
structured_inherits: TensorIteratorBase
|
4512
4783
|
dispatch:
|
4513
4784
|
CPU, CUDA: threshold_out
|
4785
|
+
MPS: threshold_out_mps
|
4514
4786
|
|
4515
4787
|
- func: threshold_backward.grad_input(Tensor grad_output, Tensor self, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)
|
4516
4788
|
structured: True
|
4517
4789
|
structured_inherits: TensorIteratorBase
|
4518
4790
|
dispatch:
|
4519
4791
|
CPU, CUDA: threshold_backward_out
|
4792
|
+
MPS: threshold_backward_out_mps
|
4520
4793
|
|
4521
4794
|
- func: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor
|
4522
4795
|
variants: function
|
@@ -4558,6 +4831,7 @@
|
|
4558
4831
|
device_guard: False
|
4559
4832
|
dispatch:
|
4560
4833
|
MkldnnCPU: mkldnn_transpose_
|
4834
|
+
autogen: _mkldnn_transpose.out
|
4561
4835
|
|
4562
4836
|
- func: one_hot(Tensor self, int num_classes=-1) -> Tensor
|
4563
4837
|
python_module: nn
|
@@ -4595,6 +4869,28 @@
|
|
4595
4869
|
|
4596
4870
|
- func: trapz.dx(Tensor y, *, float dx=1, int dim=-1) -> Tensor
|
4597
4871
|
|
4872
|
+
# Fused implementation detail for transformers. Adds in-projection bias to QKV and divides Q by sqrt(D/num_heads).
|
4873
|
+
- func: _transform_bias_rescale_qkv(Tensor qkv, Tensor qkv_bias, int num_heads) -> (Tensor, Tensor, Tensor)
|
4874
|
+
dispatch:
|
4875
|
+
CPU, NestedTensorCPU: transform_bias_rescale_qkv_cpu
|
4876
|
+
CUDA, NestedTensorCUDA: transform_bias_rescale_qkv_cuda
|
4877
|
+
|
4878
|
+
- func: _nested_tensor_from_mask(Tensor t, Tensor mask) -> Tensor
|
4879
|
+
dispatch:
|
4880
|
+
CPU, CUDA: NestedTensor_nested_tensor_from_mask
|
4881
|
+
|
4882
|
+
- func: _nested_from_padded(Tensor padded, Tensor cpu_nested_shape_example, bool fuse_transform_0213=False) -> Tensor
|
4883
|
+
device_check: NoCheck # cpu_nested_shape_example will always be on CPU
|
4884
|
+
dispatch:
|
4885
|
+
CPU: nested_from_padded_generic
|
4886
|
+
CUDA: nested_from_padded_cuda
|
4887
|
+
|
4888
|
+
# _nested_from_padded is not usable from Python, so
|
4889
|
+
# _nested_from_padded_and_nested_example is available for testing.
|
4890
|
+
- func: _nested_from_padded_and_nested_example(Tensor padded, Tensor nt_example) -> Tensor
|
4891
|
+
dispatch:
|
4892
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_from_padded_and_nested_example
|
4893
|
+
|
4598
4894
|
- func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
|
4599
4895
|
dispatch:
|
4600
4896
|
CompositeExplicitAutograd: _trilinear
|
@@ -4625,6 +4921,7 @@
|
|
4625
4921
|
device_check: NoCheck # TensorIterator
|
4626
4922
|
dispatch:
|
4627
4923
|
CPU, CUDA: trunc_out
|
4924
|
+
MPS: trunc_out_mps
|
4628
4925
|
SparseCPU, SparseCUDA: trunc_sparse_out
|
4629
4926
|
SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_out
|
4630
4927
|
|
@@ -4686,7 +4983,7 @@
|
|
4686
4983
|
device_check: NoCheck
|
4687
4984
|
device_guard: False
|
4688
4985
|
dispatch:
|
4689
|
-
|
4986
|
+
CompositeExplicitAutograd: unsqueeze
|
4690
4987
|
SparseCPU, SparseCUDA: unsqueeze_sparse
|
4691
4988
|
QuantizedCPU, QuantizedCUDA: unsqueeze_quantized
|
4692
4989
|
|
@@ -4713,6 +5010,7 @@
|
|
4713
5010
|
variants: function, method
|
4714
5011
|
dispatch:
|
4715
5012
|
CPU, CUDA: var
|
5013
|
+
MPS: var_mps
|
4716
5014
|
|
4717
5015
|
- func: var.out(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
4718
5016
|
device_check: NoCheck # TensorIterator
|
@@ -4764,12 +5062,18 @@
|
|
4764
5062
|
device_check: NoCheck
|
4765
5063
|
device_guard: False
|
4766
5064
|
|
4767
|
-
# we define both of these because 'where' does the broadcast and '_s_where' doesn't;
|
4768
|
-
# this allows us to implicitly calculate the broadcast derivative, while only dealing with the
|
4769
|
-
# _s_where derivative.
|
4770
5065
|
- func: where.self(Tensor condition, Tensor self, Tensor other) -> Tensor
|
4771
5066
|
device_check: NoCheck # TensorIterator
|
4772
5067
|
variants: function, method
|
5068
|
+
dispatch:
|
5069
|
+
CPU, CUDA: where
|
5070
|
+
MPS: where_mps
|
5071
|
+
|
5072
|
+
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
5073
|
+
device_check: NoCheck # TensorIterator
|
5074
|
+
dispatch:
|
5075
|
+
CPU, CUDA: where_self_out
|
5076
|
+
MPS: where_self_out_mps
|
4773
5077
|
|
4774
5078
|
- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
|
4775
5079
|
variants: function
|
@@ -4784,11 +5088,6 @@
|
|
4784
5088
|
device_check: NoCheck # TensorIterator
|
4785
5089
|
variants: function
|
4786
5090
|
|
4787
|
-
- func: _s_where(Tensor condition, Tensor self, Tensor other) -> Tensor
|
4788
|
-
variants: function
|
4789
|
-
dispatch:
|
4790
|
-
CPU, CUDA: _s_where
|
4791
|
-
|
4792
5091
|
- func: norm_except_dim(Tensor v, int pow=2, int dim=0) -> Tensor
|
4793
5092
|
variants: function
|
4794
5093
|
|
@@ -4797,15 +5096,17 @@
|
|
4797
5096
|
- func: _weight_norm(Tensor v, Tensor g, int dim=0) -> Tensor
|
4798
5097
|
variants: function
|
4799
5098
|
|
4800
|
-
- func:
|
5099
|
+
- func: _weight_norm_interface(Tensor v, Tensor g, int dim=0) -> (Tensor, Tensor)
|
4801
5100
|
variants: function
|
4802
5101
|
dispatch:
|
5102
|
+
CPU: weight_norm_cpu
|
4803
5103
|
CUDA: weight_norm_cuda
|
4804
5104
|
|
4805
|
-
- func:
|
5105
|
+
- func: _weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
|
4806
5106
|
variants: function
|
4807
5107
|
dispatch:
|
4808
|
-
|
5108
|
+
CPU: weight_norm_backward_cpu
|
5109
|
+
CUDA: weight_norm_backward_cuda
|
4809
5110
|
|
4810
5111
|
- func: _weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
|
4811
5112
|
variants: function
|
@@ -4887,6 +5188,16 @@
|
|
4887
5188
|
SparseCPU: _sparse_sum_backward_cpu
|
4888
5189
|
SparseCUDA: _sparse_sum_backward_cuda
|
4889
5190
|
|
5191
|
+
- func: _sparse_csr_sum.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
5192
|
+
dispatch:
|
5193
|
+
SparseCsrCPU: _sparse_csr_sum_cpu
|
5194
|
+
SparseCsrCUDA: _sparse_csr_sum_cuda
|
5195
|
+
|
5196
|
+
- func: _sparse_csr_prod.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
5197
|
+
dispatch:
|
5198
|
+
SparseCsrCPU: _sparse_csr_prod_cpu
|
5199
|
+
SparseCsrCUDA: _sparse_csr_prod_cuda
|
5200
|
+
|
4890
5201
|
- func: _sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
|
4891
5202
|
python_module: sparse
|
4892
5203
|
variants: function
|
@@ -4962,6 +5273,7 @@
|
|
4962
5273
|
device_check: NoCheck # TensorIterator
|
4963
5274
|
dispatch:
|
4964
5275
|
CPU, CUDA: norm_out
|
5276
|
+
MPS: norm_out_mps
|
4965
5277
|
|
4966
5278
|
# These four redispatch in their implementation, so OK to be CompositeImplicitAutograd
|
4967
5279
|
- func: norm.names_ScalarOpt_dim_dtype(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor
|
@@ -4987,24 +5299,31 @@
|
|
4987
5299
|
dispatch:
|
4988
5300
|
CPU, CUDA: frexp_out
|
4989
5301
|
|
5302
|
+
# Deprecated (v.1.12)
|
4990
5303
|
- func: frobenius_norm(Tensor self) -> Tensor
|
4991
5304
|
variants: function
|
4992
5305
|
|
5306
|
+
# Deprecated (v.1.12)
|
4993
5307
|
- func: frobenius_norm.dim(Tensor self, int[1] dim, bool keepdim=False) -> Tensor
|
4994
5308
|
variants: function
|
4995
5309
|
|
5310
|
+
# Deprecated (v.1.12)
|
4996
5311
|
- func: frobenius_norm.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
4997
5312
|
variants: function
|
4998
5313
|
|
5314
|
+
# Deprecated (v.1.12)
|
4999
5315
|
- func: nuclear_norm(Tensor self, bool keepdim=False) -> Tensor
|
5000
5316
|
variants: function
|
5001
5317
|
|
5318
|
+
# Deprecated (v.1.12)
|
5002
5319
|
- func: nuclear_norm.out(Tensor self, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
5003
5320
|
variants: function
|
5004
5321
|
|
5322
|
+
# Deprecated (v.1.12)
|
5005
5323
|
- func: nuclear_norm.dim(Tensor self, int[2] dim, bool keepdim=False) -> Tensor
|
5006
5324
|
variants: function
|
5007
5325
|
|
5326
|
+
# Deprecated (v.1.12)
|
5008
5327
|
- func: nuclear_norm.dim_out(Tensor self, int[2] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
5009
5328
|
variants: function
|
5010
5329
|
|
@@ -5013,7 +5332,7 @@
|
|
5013
5332
|
dispatch:
|
5014
5333
|
CompositeExplicitAutograd: clone
|
5015
5334
|
SparseCPU, SparseCUDA: clone_sparse
|
5016
|
-
SparseCsrCPU, SparseCsrCUDA:
|
5335
|
+
SparseCsrCPU, SparseCsrCUDA: clone_sparse_compressed
|
5017
5336
|
MkldnnCPU: mkldnn_clone
|
5018
5337
|
QuantizedCPU, QuantizedCUDA: quantized_clone
|
5019
5338
|
|
@@ -5025,22 +5344,27 @@
|
|
5025
5344
|
variants: function, method
|
5026
5345
|
dispatch:
|
5027
5346
|
CompositeExplicitAutograd: resize_as_
|
5347
|
+
autogen: resize_as.functional, resize_as.out
|
5028
5348
|
|
5029
5349
|
- func: resize_as_sparse_(Tensor(a!) self, Tensor the_template) -> Tensor(a!)
|
5030
5350
|
use_const_ref_for_mutable_tensors: True
|
5031
|
-
variants: function
|
5351
|
+
variants: function, method
|
5032
5352
|
dispatch:
|
5033
5353
|
SparseCPU, SparseCUDA: resize_as_sparse_
|
5034
5354
|
SparseCsrCPU, SparseCsrCUDA: resize_as_sparse_csr_
|
5355
|
+
autogen: resize_as_sparse.functional, resize_as_sparse.out
|
5035
5356
|
|
5036
5357
|
- func: zero_(Tensor(a!) self) -> Tensor(a!)
|
5037
5358
|
device_check: NoCheck # TensorIterator
|
5038
5359
|
variants: method, function
|
5039
5360
|
dispatch:
|
5040
5361
|
CPU, CUDA: zero_
|
5362
|
+
MPS: zero_mps_
|
5041
5363
|
Meta: zero_meta_
|
5042
5364
|
SparseCPU, SparseCUDA: zero_sparse_
|
5365
|
+
SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
|
5043
5366
|
MkldnnCPU: mkldnn_zero_
|
5367
|
+
autogen: zero.functional, zero.out
|
5044
5368
|
|
5045
5369
|
- func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
5046
5370
|
device_check: NoCheck # TensorIterator
|
@@ -5048,6 +5372,7 @@
|
|
5048
5372
|
structured_inherits: TensorIteratorBase
|
5049
5373
|
dispatch:
|
5050
5374
|
CPU, CUDA: sub_out
|
5375
|
+
MPS: sub_out_mps
|
5051
5376
|
SparseCPU, SparseCUDA: sub_out_sparse
|
5052
5377
|
|
5053
5378
|
- func: sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
|
@@ -5056,6 +5381,7 @@
|
|
5056
5381
|
structured_delegate: sub.out
|
5057
5382
|
dispatch:
|
5058
5383
|
SparseCPU, SparseCUDA: sub_sparse
|
5384
|
+
ZeroTensor: sub_zerotensor
|
5059
5385
|
|
5060
5386
|
- func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
5061
5387
|
device_check: NoCheck # TensorIterator
|
@@ -5076,6 +5402,7 @@
|
|
5076
5402
|
variants: method
|
5077
5403
|
dispatch:
|
5078
5404
|
CompositeExplicitAutograd: sub_
|
5405
|
+
autogen: sub.Scalar_out
|
5079
5406
|
|
5080
5407
|
# subtract, alias for sub
|
5081
5408
|
- func: subtract.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
@@ -5125,7 +5452,7 @@
|
|
5125
5452
|
|
5126
5453
|
# Functionally the same as addmm, but we give it a different derivative formula
|
5127
5454
|
# that doesn't propagate gradients to non-present entries on sparse.
|
5128
|
-
- func: _sparse_addmm(Tensor self, Tensor
|
5455
|
+
- func: _sparse_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
5129
5456
|
python_module: sparse
|
5130
5457
|
dispatch:
|
5131
5458
|
CompositeExplicitAutograd: _sparse_addmm
|
@@ -5134,21 +5461,24 @@
|
|
5134
5461
|
python_module: sparse
|
5135
5462
|
dispatch:
|
5136
5463
|
SparseCsrCUDA: sparse_sampled_addmm_out_sparse_csr_cuda
|
5464
|
+
SparseCsrCPU: sparse_sampled_addmm_out_sparse_csr_cpu
|
5137
5465
|
|
5138
5466
|
- func: sparse_sampled_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
5139
5467
|
python_module: sparse
|
5140
5468
|
dispatch:
|
5141
5469
|
SparseCsrCUDA: sparse_sampled_addmm_sparse_csr_cuda
|
5470
|
+
SparseCsrCPU: sparse_sampled_addmm_sparse_csr_cpu
|
5142
5471
|
|
5143
5472
|
- func: addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
5144
5473
|
structured: True
|
5145
5474
|
dispatch:
|
5146
5475
|
CPU: addmm_out_cpu
|
5147
5476
|
CUDA: addmm_out_cuda
|
5477
|
+
MPS: addmm_out_mps
|
5148
5478
|
SparseCPU: addmm_out_sparse_dense_cpu
|
5149
5479
|
SparseCUDA: addmm_out_sparse_dense_cuda
|
5150
|
-
SparseCsrCPU:
|
5151
|
-
SparseCsrCUDA:
|
5480
|
+
SparseCsrCPU: addmm_out_sparse_compressed_cpu
|
5481
|
+
SparseCsrCUDA: addmm_out_sparse_compressed_cuda
|
5152
5482
|
|
5153
5483
|
- func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
5154
5484
|
structured_delegate: addmm.out
|
@@ -5156,7 +5486,7 @@
|
|
5156
5486
|
dispatch:
|
5157
5487
|
SparseCPU: addmm_sparse_dense_cpu
|
5158
5488
|
SparseCUDA: addmm_sparse_dense_cuda
|
5159
|
-
SparseCsrCPU, SparseCsrCUDA:
|
5489
|
+
SparseCsrCPU, SparseCsrCUDA: addmm_sparse_compressed_dense
|
5160
5490
|
|
5161
5491
|
- func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
|
5162
5492
|
structured_delegate: addmm.out
|
@@ -5167,6 +5497,16 @@
|
|
5167
5497
|
SparseCPU: s_addmm_sparse_dense_cpu_
|
5168
5498
|
SparseCUDA: s_addmm_sparse_dense_cuda_
|
5169
5499
|
|
5500
|
+
- func: _addmm_activation.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False, Tensor(a!) out) -> Tensor(a!)
|
5501
|
+
structured: True
|
5502
|
+
dispatch:
|
5503
|
+
CPU: addmm_activation_out_cpu
|
5504
|
+
CUDA: addmm_activation_out_cuda
|
5505
|
+
|
5506
|
+
- func: _addmm_activation(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False) -> Tensor
|
5507
|
+
structured_delegate: _addmm_activation.out
|
5508
|
+
variants: function, method
|
5509
|
+
|
5170
5510
|
# NOTE [ Sparse: autograd and API ]
|
5171
5511
|
#
|
5172
5512
|
#
|
@@ -5278,11 +5618,23 @@
|
|
5278
5618
|
# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
|
5279
5619
|
# the default would never make sense.
|
5280
5620
|
|
5621
|
+
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5281
5622
|
- func: sparse_csr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5623
|
+
- func: sparse_csc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5624
|
+
- func: sparse_bsr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5625
|
+
- func: sparse_bsc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5282
5626
|
|
5627
|
+
- func: sparse_compressed_tensor.comp_plain_value(Tensor compressed_indices, Tensor plain_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5283
5628
|
- func: sparse_csr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5629
|
+
- func: sparse_csc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5630
|
+
- func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5631
|
+
- func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5284
5632
|
|
5633
|
+
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
5285
5634
|
- func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
5635
|
+
- func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
5636
|
+
- func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
5637
|
+
- func: _sparse_bsc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
5286
5638
|
|
5287
5639
|
- func: sparse_coo_tensor.size(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5288
5640
|
|
@@ -5294,7 +5646,11 @@
|
|
5294
5646
|
|
5295
5647
|
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size) -> ()
|
5296
5648
|
|
5649
|
+
- func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout) -> ()
|
5297
5650
|
- func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
5651
|
+
- func: _validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
|
5652
|
+
- func: _validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
5653
|
+
- func: _validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
|
5298
5654
|
|
5299
5655
|
- func: _sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
5300
5656
|
dispatch:
|
@@ -5309,26 +5665,34 @@
|
|
5309
5665
|
variants: method
|
5310
5666
|
dispatch:
|
5311
5667
|
SparseCPU, SparseCUDA: sparse_resize_
|
5668
|
+
autogen: sparse_resize.functional, sparse_resize.out
|
5312
5669
|
|
5313
5670
|
- func: sparse_resize_and_clear_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)
|
5314
5671
|
use_const_ref_for_mutable_tensors: True
|
5315
5672
|
variants: method
|
5316
5673
|
dispatch:
|
5317
5674
|
SparseCPU, SparseCUDA: sparse_resize_and_clear_
|
5675
|
+
autogen: sparse_resize_and_clear.functional, sparse_resize_and_clear.out
|
5318
5676
|
|
5319
5677
|
- func: sparse_mask(Tensor self, Tensor mask) -> Tensor
|
5320
5678
|
variants: method
|
5321
5679
|
dispatch:
|
5322
5680
|
SparseCPU: sparse_mask_cpu
|
5323
5681
|
SparseCUDA: sparse_mask_cuda
|
5682
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_mask_sparse_csr
|
5324
5683
|
|
5325
5684
|
- func: _to_cpu(Tensor[] tensors) -> Tensor[]
|
5326
5685
|
variants: function
|
5327
5686
|
|
5328
5687
|
- func: to_dense(Tensor self, ScalarType? dtype=None) -> Tensor
|
5329
5688
|
variants: method
|
5689
|
+
|
5690
|
+
# Special case of to_dense with custom derivative
|
5691
|
+
- func: _to_dense(Tensor self, ScalarType? dtype=None) -> Tensor
|
5692
|
+
variants: method
|
5330
5693
|
dispatch:
|
5331
|
-
SparseCPU, SparseCUDA
|
5694
|
+
SparseCPU, SparseCUDA: sparse_to_dense
|
5695
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_dense
|
5332
5696
|
MkldnnCPU: mkldnn_to_dense
|
5333
5697
|
|
5334
5698
|
- func: to_dense_backward(Tensor grad, Tensor input) -> Tensor
|
@@ -5414,6 +5778,7 @@
|
|
5414
5778
|
SparseCPU, SparseCUDA: _coalesced_sparse_
|
5415
5779
|
device_check: NoCheck
|
5416
5780
|
device_guard: False
|
5781
|
+
autogen: _coalesced.functional, _coalesced.out
|
5417
5782
|
|
5418
5783
|
- func: indices(Tensor(a) self) -> Tensor(a)
|
5419
5784
|
variants: method
|
@@ -5444,6 +5809,20 @@
|
|
5444
5809
|
device_check: NoCheck
|
5445
5810
|
device_guard: False
|
5446
5811
|
|
5812
|
+
- func: ccol_indices(Tensor(a) self) -> Tensor(a)
|
5813
|
+
variants: method
|
5814
|
+
dispatch:
|
5815
|
+
SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
|
5816
|
+
device_check: NoCheck
|
5817
|
+
device_guard: False
|
5818
|
+
|
5819
|
+
- func: row_indices(Tensor(a) self) -> Tensor(a)
|
5820
|
+
variants: method
|
5821
|
+
dispatch:
|
5822
|
+
SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
|
5823
|
+
device_check: NoCheck
|
5824
|
+
device_guard: False
|
5825
|
+
|
5447
5826
|
- func: hspmm.out(Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
|
5448
5827
|
dispatch:
|
5449
5828
|
SparseCPU: hspmm_out_sparse_cpu
|
@@ -5459,11 +5838,13 @@
|
|
5459
5838
|
variants: function
|
5460
5839
|
dispatch:
|
5461
5840
|
SparseCPU, SparseCUDA: copy_sparse_
|
5841
|
+
autogen: copy_sparse_to_sparse.functional, copy_sparse_to_sparse.out
|
5462
5842
|
|
5463
5843
|
- func: unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[]
|
5464
5844
|
variants: function, method
|
5465
5845
|
dispatch:
|
5466
5846
|
CompositeExplicitAutograd: unbind
|
5847
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
|
5467
5848
|
|
5468
5849
|
- func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
|
5469
5850
|
variants: function, method
|
@@ -5472,11 +5853,41 @@
|
|
5472
5853
|
variants: method
|
5473
5854
|
dispatch:
|
5474
5855
|
CPU, CUDA: dense_to_sparse
|
5856
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
|
5475
5857
|
|
5476
5858
|
- func: to_sparse(Tensor self) -> Tensor
|
5477
5859
|
variants: method
|
5478
5860
|
dispatch:
|
5479
5861
|
CPU, CUDA: dense_to_sparse
|
5862
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
|
5863
|
+
|
5864
|
+
- func: to_sparse_csr(Tensor self) -> Tensor
|
5865
|
+
variants: method
|
5866
|
+
dispatch:
|
5867
|
+
CPU, CUDA: dense_to_sparse_csr
|
5868
|
+
SparseCPU, SparseCUDA: coo_to_sparse_csr
|
5869
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csr
|
5870
|
+
|
5871
|
+
- func: to_sparse_csc(Tensor self) -> Tensor
|
5872
|
+
variants: method
|
5873
|
+
dispatch:
|
5874
|
+
CPU, CUDA: dense_to_sparse_csc
|
5875
|
+
SparseCPU, SparseCUDA: coo_to_sparse_csc
|
5876
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csc
|
5877
|
+
|
5878
|
+
- func: to_sparse_bsr(Tensor self, int[2] blocksize) -> Tensor
|
5879
|
+
variants: method
|
5880
|
+
dispatch:
|
5881
|
+
CPU, CUDA: dense_to_sparse_bsr
|
5882
|
+
SparseCPU, SparseCUDA: coo_to_sparse_bsr
|
5883
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsr
|
5884
|
+
|
5885
|
+
- func: to_sparse_bsc(Tensor self, int[2] blocksize) -> Tensor
|
5886
|
+
variants: method
|
5887
|
+
dispatch:
|
5888
|
+
CPU, CUDA: dense_to_sparse_bsc
|
5889
|
+
SparseCPU, SparseCUDA: coo_to_sparse_bsc
|
5890
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsc
|
5480
5891
|
|
5481
5892
|
- func: to_mkldnn(Tensor self, ScalarType? dtype=None) -> Tensor
|
5482
5893
|
variants: method
|
@@ -5636,7 +6047,7 @@
|
|
5636
6047
|
dispatch:
|
5637
6048
|
CPU: fused_moving_avg_obs_fake_quant_cpu
|
5638
6049
|
CUDA: fused_moving_avg_obs_fake_quant_cuda
|
5639
|
-
|
6050
|
+
autogen: _fused_moving_avg_obs_fq_helper.functional, _fused_moving_avg_obs_fq_helper.out
|
5640
6051
|
|
5641
6052
|
- func: _choose_qparams_per_tensor(Tensor self, bool reduce_range=False) -> (float, int)
|
5642
6053
|
variants: function
|
@@ -5722,16 +6133,33 @@
|
|
5722
6133
|
dispatch:
|
5723
6134
|
CPU: _local_scalar_dense_cpu
|
5724
6135
|
CUDA: _local_scalar_dense_cuda
|
6136
|
+
MPS: _local_scalar_dense_mps
|
5725
6137
|
variants: function
|
5726
6138
|
|
6139
|
+
# MPS LSTM implementation
|
6140
|
+
|
6141
|
+
- func: _lstm_mps(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
|
6142
|
+
dispatch:
|
6143
|
+
MPS: _lstm_mps
|
6144
|
+
|
6145
|
+
- func: lstm_mps_backward(Tensor grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor[], Tensor[])
|
6146
|
+
dispatch:
|
6147
|
+
MPS: lstm_mps_backward
|
6148
|
+
|
6149
|
+
|
5727
6150
|
# Fused RNN kernels
|
5728
6151
|
- func: _thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor, Tensor)
|
5729
6152
|
dispatch:
|
5730
6153
|
CUDA: _thnn_fused_lstm_cell_cuda
|
5731
6154
|
|
5732
|
-
|
6155
|
+
# NB: The composite version of this function below is a simple wrapper that duplicates some of the outputs
|
6156
|
+
# It is necessary to avoid triggering TensorImpl use count checks in debug mode
|
6157
|
+
# NB: this is function is NOT differentiable
|
6158
|
+
- func: _thnn_fused_lstm_cell_backward_impl(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor)
|
5733
6159
|
dispatch:
|
5734
|
-
CUDA:
|
6160
|
+
CUDA: _thnn_fused_lstm_cell_backward_impl_cuda
|
6161
|
+
|
6162
|
+
- func: _thnn_fused_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
|
5735
6163
|
|
5736
6164
|
- func: _thnn_differentiable_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor input_gates, Tensor hidden_gates, Tensor? input_bias, Tensor? hidden_bias, Tensor cx, Tensor cy) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
|
5737
6165
|
|
@@ -5812,36 +6240,55 @@
|
|
5812
6240
|
device_check: NoCheck
|
5813
6241
|
device_guard: False
|
5814
6242
|
dispatch:
|
5815
|
-
CPU, CUDA: set_
|
6243
|
+
CPU, CUDA, Meta, MPS: set_
|
6244
|
+
autogen: set.source_Storage_functional, set.source_Storage_out
|
5816
6245
|
|
5817
6246
|
- func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, int storage_offset, int[] size, int[] stride=[]) -> Tensor(a!)
|
5818
6247
|
variants: method
|
5819
6248
|
device_check: NoCheck
|
5820
6249
|
device_guard: False
|
5821
6250
|
dispatch:
|
5822
|
-
CPU: set_storage_cpu_
|
6251
|
+
CPU, Meta: set_storage_cpu_
|
5823
6252
|
CUDA: set_storage_cuda_
|
6253
|
+
MPS: set_storage_mps_
|
5824
6254
|
QuantizedCPU, QuantizedCUDA: set_storage_quantized_
|
6255
|
+
autogen: set.source_Storage_storage_offset_functional, set.source_Storage_storage_offset_out
|
6256
|
+
|
6257
|
+
- func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, int storage_offset, int[] size, int[] stride=[]) -> Tensor(a!)
|
6258
|
+
variants: method
|
6259
|
+
device_check: NoCheck
|
6260
|
+
device_guard: False
|
5825
6261
|
|
5826
6262
|
- func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
|
5827
6263
|
variants: method
|
5828
6264
|
device_check: NoCheck
|
5829
6265
|
device_guard: False
|
5830
6266
|
dispatch:
|
5831
|
-
CPU, CUDA: set_tensor_
|
6267
|
+
CPU, CUDA, Meta, MPS: set_tensor_
|
6268
|
+
autogen: set.source_Tensor_functional, set.source_Tensor_out
|
5832
6269
|
|
5833
6270
|
- func: set_(Tensor(a!) self) -> Tensor(a!)
|
5834
6271
|
variants: method
|
5835
6272
|
dispatch:
|
5836
6273
|
CPU: set_cpu_
|
5837
6274
|
CUDA: set_cuda_
|
6275
|
+
Meta: set_meta_
|
6276
|
+
MPS: set_mps_
|
6277
|
+
autogen: set.functional, set.out
|
6278
|
+
|
6279
|
+
- func: lift(Tensor self) -> Tensor
|
6280
|
+
variants: method
|
6281
|
+
dispatch:
|
6282
|
+
# Not making it CompositeImplicitAutograd because lift
|
6283
|
+
# should be a primitive w.r.t. functorch
|
6284
|
+
CompositeExplicitAutograd: lift
|
5838
6285
|
|
5839
6286
|
- func: is_set_to(Tensor self, Tensor tensor) -> bool
|
5840
6287
|
variants: method
|
5841
6288
|
device_check: NoCheck
|
5842
6289
|
device_guard: False
|
5843
6290
|
dispatch:
|
5844
|
-
CPU, CUDA: is_set_to
|
6291
|
+
CPU, CUDA, MPS: is_set_to
|
5845
6292
|
|
5846
6293
|
- func: masked_fill_.Scalar(Tensor(a!) self, Tensor mask, Scalar value) -> Tensor(a!)
|
5847
6294
|
device_check: NoCheck # TensorIterator
|
@@ -5849,6 +6296,8 @@
|
|
5849
6296
|
dispatch:
|
5850
6297
|
CPU: masked_fill__cpu
|
5851
6298
|
CUDA: masked_fill__cuda
|
6299
|
+
MPS: masked_fill__mps
|
6300
|
+
autogen: masked_fill.Scalar_out
|
5852
6301
|
|
5853
6302
|
- func: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor
|
5854
6303
|
device_check: NoCheck # TensorIterator
|
@@ -5862,6 +6311,8 @@
|
|
5862
6311
|
dispatch:
|
5863
6312
|
CPU: masked_fill__cpu
|
5864
6313
|
CUDA: masked_fill__cuda
|
6314
|
+
MPS: masked_fill__mps
|
6315
|
+
autogen: masked_fill.Tensor_out
|
5865
6316
|
|
5866
6317
|
- func: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor
|
5867
6318
|
device_check: NoCheck # TensorIterator
|
@@ -5874,23 +6325,29 @@
|
|
5874
6325
|
dispatch:
|
5875
6326
|
CPU: masked_scatter__cpu
|
5876
6327
|
CUDA: masked_scatter__cuda
|
6328
|
+
autogen: masked_scatter.out
|
5877
6329
|
|
5878
6330
|
- func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor
|
5879
6331
|
variants: function, method
|
5880
6332
|
dispatch:
|
5881
6333
|
CompositeExplicitAutograd: masked_scatter
|
5882
6334
|
|
5883
|
-
- func: _masked_softmax(Tensor self, Tensor mask) -> Tensor
|
6335
|
+
- func: _masked_softmax(Tensor self, Tensor mask, int? dim=None) -> Tensor
|
5884
6336
|
dispatch:
|
5885
6337
|
CUDA: masked_softmax_cuda
|
5886
6338
|
CPU: masked_softmax_cpu
|
5887
6339
|
|
6340
|
+
- func: _masked_softmax_backward(Tensor grad_output, Tensor output, Tensor mask, int? dim=None) -> Tensor
|
6341
|
+
dispatch:
|
6342
|
+
CUDA: masked_softmax_backward_cuda
|
6343
|
+
CPU: masked_softmax_backward_cpu
|
6344
|
+
|
5888
6345
|
- func: view(Tensor(a) self, int[] size) -> Tensor(a)
|
5889
6346
|
variants: method
|
5890
6347
|
device_check: NoCheck
|
5891
6348
|
device_guard: False
|
5892
6349
|
dispatch:
|
5893
|
-
ZeroTensor, CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view
|
6350
|
+
ZeroTensor, CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, MPS: view
|
5894
6351
|
MkldnnCPU: mkldnn_view
|
5895
6352
|
|
5896
6353
|
# Warning: If you want to change the name or overload name of this
|
@@ -5909,7 +6366,8 @@
|
|
5909
6366
|
- func: put_(Tensor(a!) self, Tensor index, Tensor source, bool accumulate=False) -> Tensor(a!)
|
5910
6367
|
variants: method
|
5911
6368
|
dispatch:
|
5912
|
-
CPU, CUDA: put_
|
6369
|
+
CPU, CUDA, MPS: put_
|
6370
|
+
autogen: put.out
|
5913
6371
|
|
5914
6372
|
- func: put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor
|
5915
6373
|
variants: function, method
|
@@ -5934,12 +6392,30 @@
|
|
5934
6392
|
- func: index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor
|
5935
6393
|
variants: function, method
|
5936
6394
|
|
6395
|
+
- func: index_reduce.out(Tensor self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True, Tensor(a!) out) -> Tensor(a!)
|
6396
|
+
structured: True
|
6397
|
+
variants: function
|
6398
|
+
precomputed:
|
6399
|
+
- dim -> int dim
|
6400
|
+
dispatch:
|
6401
|
+
CPU: index_reduce_cpu_out
|
6402
|
+
CUDA: index_reduce_cuda_out
|
6403
|
+
|
6404
|
+
- func: index_reduce_(Tensor(a!) self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor(a!)
|
6405
|
+
structured_delegate: index_reduce.out
|
6406
|
+
variants: method
|
6407
|
+
|
6408
|
+
- func: index_reduce(Tensor self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor
|
6409
|
+
structured_delegate: index_reduce.out
|
6410
|
+
variants: function, method
|
6411
|
+
|
5937
6412
|
- func: index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)
|
5938
6413
|
device_check: NoCheck # TensorIterator
|
5939
6414
|
variants: method
|
5940
6415
|
dispatch:
|
5941
6416
|
CPU: index_fill_
|
5942
6417
|
CUDA: index_fill_
|
6418
|
+
autogen: index_fill.int_Scalar_out
|
5943
6419
|
|
5944
6420
|
- func: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
|
5945
6421
|
device_check: NoCheck # TensorIterator
|
@@ -5952,6 +6428,7 @@
|
|
5952
6428
|
variants: method
|
5953
6429
|
dispatch:
|
5954
6430
|
CPU, CUDA: index_fill_
|
6431
|
+
autogen: index_fill.int_Tensor_out
|
5955
6432
|
|
5956
6433
|
- func: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor
|
5957
6434
|
device_check: NoCheck # TensorIterator
|
@@ -5988,6 +6465,7 @@
|
|
5988
6465
|
variants: function
|
5989
6466
|
dispatch:
|
5990
6467
|
CPU, CUDA: scatter_src_out
|
6468
|
+
MPS: scatter_src_out_mps
|
5991
6469
|
|
5992
6470
|
- func: scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
|
5993
6471
|
structured_delegate: scatter.value_out
|
@@ -6002,6 +6480,7 @@
|
|
6002
6480
|
variants: function
|
6003
6481
|
dispatch:
|
6004
6482
|
CPU, CUDA: scatter_value_out
|
6483
|
+
MPS: scatter_value_out_mps
|
6005
6484
|
|
6006
6485
|
- func: scatter.reduce(Tensor self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor
|
6007
6486
|
structured_delegate: scatter.reduce_out
|
@@ -6016,6 +6495,7 @@
|
|
6016
6495
|
variants: function
|
6017
6496
|
dispatch:
|
6018
6497
|
CPU, CUDA: scatter_reduce_out
|
6498
|
+
MPS: scatter_reduce_out_mps
|
6019
6499
|
|
6020
6500
|
- func: scatter.value_reduce(Tensor self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor
|
6021
6501
|
structured_delegate: scatter.value_reduce_out
|
@@ -6030,6 +6510,7 @@
|
|
6030
6510
|
variants: function
|
6031
6511
|
dispatch:
|
6032
6512
|
CPU, CUDA: scatter_value_reduce_out
|
6513
|
+
MPS: scatter_value_reduce_out_mps
|
6033
6514
|
|
6034
6515
|
- func: scatter.dimname_src(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor
|
6035
6516
|
variants: function, method
|
@@ -6050,14 +6531,24 @@
|
|
6050
6531
|
variants: function
|
6051
6532
|
dispatch:
|
6052
6533
|
CPU, CUDA: scatter_add
|
6534
|
+
MPS: scatter_add_mps_out
|
6053
6535
|
|
6054
6536
|
- func: scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor
|
6055
6537
|
variants: function, method
|
6056
6538
|
|
6057
|
-
- func: scatter_reduce.two(Tensor self, int dim, Tensor index, str reduce, *,
|
6539
|
+
- func: scatter_reduce.two(Tensor self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor
|
6540
|
+
structured_delegate: scatter_reduce.two_out
|
6058
6541
|
variants: function, method
|
6542
|
+
|
6543
|
+
- func: scatter_reduce_.two(Tensor(a!) self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor(a!)
|
6544
|
+
structured_delegate: scatter_reduce.two_out
|
6545
|
+
variants: method
|
6546
|
+
|
6547
|
+
- func: scatter_reduce.two_out(Tensor self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True, Tensor(a!) out) -> Tensor(a!)
|
6548
|
+
structured: True
|
6549
|
+
variants: function
|
6059
6550
|
dispatch:
|
6060
|
-
CPU:
|
6551
|
+
CPU, CUDA: scatter_reduce_two
|
6061
6552
|
|
6062
6553
|
- func: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
6063
6554
|
structured_delegate: eq.Scalar_out
|
@@ -6093,6 +6584,12 @@
|
|
6093
6584
|
dispatch:
|
6094
6585
|
CompositeExplicitAutograd: bitwise_and
|
6095
6586
|
|
6587
|
+
- func: bitwise_and.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
6588
|
+
device_check: NoCheck # TensorIterator
|
6589
|
+
variants: function
|
6590
|
+
dispatch:
|
6591
|
+
CompositeExplicitAutograd: bitwise_and
|
6592
|
+
|
6096
6593
|
- func: bitwise_and.Tensor(Tensor self, Tensor other) -> Tensor
|
6097
6594
|
device_check: NoCheck # TensorIterator
|
6098
6595
|
variants: method, function
|
@@ -6141,6 +6638,12 @@
|
|
6141
6638
|
device_check: NoCheck # TensorIterator
|
6142
6639
|
variants: method, function
|
6143
6640
|
|
6641
|
+
- func: bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
6642
|
+
device_check: NoCheck # TensorIterator
|
6643
|
+
variants: function
|
6644
|
+
dispatch:
|
6645
|
+
CompositeExplicitAutograd: bitwise_or
|
6646
|
+
|
6144
6647
|
- func: bitwise_or.Tensor(Tensor self, Tensor other) -> Tensor
|
6145
6648
|
device_check: NoCheck # TensorIterator
|
6146
6649
|
variants: method, function
|
@@ -6189,6 +6692,12 @@
|
|
6189
6692
|
device_check: NoCheck # TensorIterator
|
6190
6693
|
variants: method, function
|
6191
6694
|
|
6695
|
+
- func: bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
6696
|
+
device_check: NoCheck # TensorIterator
|
6697
|
+
variants: function
|
6698
|
+
dispatch:
|
6699
|
+
CompositeExplicitAutograd: bitwise_xor
|
6700
|
+
|
6192
6701
|
- func: bitwise_xor.Tensor(Tensor self, Tensor other) -> Tensor
|
6193
6702
|
device_check: NoCheck # TensorIterator
|
6194
6703
|
variants: method, function
|
@@ -6236,12 +6745,14 @@
|
|
6236
6745
|
variants: method
|
6237
6746
|
dispatch:
|
6238
6747
|
CPU, CUDA: __ilshift__
|
6748
|
+
autogen: __lshift__.Scalar_out
|
6239
6749
|
|
6240
6750
|
- func: __ilshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
6241
6751
|
device_check: NoCheck # TensorIterator
|
6242
6752
|
variants: method
|
6243
6753
|
dispatch:
|
6244
6754
|
CPU, CUDA: __ilshift__
|
6755
|
+
autogen: __lshift__.Tensor_out
|
6245
6756
|
|
6246
6757
|
- func: bitwise_left_shift.Tensor(Tensor self, Tensor other) -> Tensor
|
6247
6758
|
device_check: NoCheck # TensorIterator
|
@@ -6264,25 +6775,25 @@
|
|
6264
6775
|
device_check: NoCheck # TensorIterator
|
6265
6776
|
variants: method, function
|
6266
6777
|
dispatch:
|
6267
|
-
|
6778
|
+
CompositeExplicitAutograd: bitwise_left_shift
|
6268
6779
|
|
6269
6780
|
- func: bitwise_left_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
6270
6781
|
device_check: NoCheck # TensorIterator
|
6271
6782
|
variants: method
|
6272
6783
|
dispatch:
|
6273
|
-
|
6784
|
+
CompositeExplicitAutograd: bitwise_left_shift_
|
6274
6785
|
|
6275
6786
|
- func: bitwise_left_shift.Tensor_Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
6276
6787
|
device_check: NoCheck # TensorIterator
|
6277
6788
|
variants: function
|
6278
6789
|
dispatch:
|
6279
|
-
|
6790
|
+
CompositeExplicitAutograd: bitwise_left_shift_out
|
6280
6791
|
|
6281
6792
|
- func: bitwise_left_shift.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
6282
6793
|
device_check: NoCheck # TensorIterator
|
6283
6794
|
variants: function
|
6284
6795
|
dispatch:
|
6285
|
-
|
6796
|
+
CompositeExplicitAutograd: bitwise_left_shift
|
6286
6797
|
|
6287
6798
|
- func: __rshift__.Scalar(Tensor self, Scalar other) -> Tensor
|
6288
6799
|
device_check: NoCheck # TensorIterator
|
@@ -6301,12 +6812,14 @@
|
|
6301
6812
|
variants: method
|
6302
6813
|
dispatch:
|
6303
6814
|
CPU, CUDA: __irshift__
|
6815
|
+
autogen: __rshift__.Scalar_out
|
6304
6816
|
|
6305
6817
|
- func: __irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
6306
6818
|
device_check: NoCheck # TensorIterator
|
6307
6819
|
variants: method
|
6308
6820
|
dispatch:
|
6309
6821
|
CPU, CUDA: __irshift__
|
6822
|
+
autogen: __rshift__.Tensor_out
|
6310
6823
|
|
6311
6824
|
- func: bitwise_right_shift.Tensor(Tensor self, Tensor other) -> Tensor
|
6312
6825
|
device_check: NoCheck # TensorIterator
|
@@ -6329,25 +6842,25 @@
|
|
6329
6842
|
device_check: NoCheck # TensorIterator
|
6330
6843
|
variants: method, function
|
6331
6844
|
dispatch:
|
6332
|
-
|
6845
|
+
CompositeExplicitAutograd: bitwise_right_shift
|
6333
6846
|
|
6334
6847
|
- func: bitwise_right_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
6335
6848
|
device_check: NoCheck # TensorIterator
|
6336
6849
|
variants: method
|
6337
6850
|
dispatch:
|
6338
|
-
|
6851
|
+
CompositeExplicitAutograd: bitwise_right_shift_
|
6339
6852
|
|
6340
6853
|
- func: bitwise_right_shift.Tensor_Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
6341
6854
|
device_check: NoCheck # TensorIterator
|
6342
6855
|
variants: function
|
6343
6856
|
dispatch:
|
6344
|
-
|
6857
|
+
CompositeExplicitAutograd: bitwise_right_shift_out
|
6345
6858
|
|
6346
6859
|
- func: bitwise_right_shift.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
6347
6860
|
device_check: NoCheck # TensorIterator
|
6348
6861
|
variants: function
|
6349
6862
|
dispatch:
|
6350
|
-
|
6863
|
+
CompositeExplicitAutograd: bitwise_right_shift
|
6351
6864
|
|
6352
6865
|
- func: tril_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
|
6353
6866
|
structured_delegate: tril.out
|
@@ -6376,15 +6889,18 @@
|
|
6376
6889
|
variants: method
|
6377
6890
|
dispatch:
|
6378
6891
|
CPU, CUDA: addbmm_
|
6892
|
+
MPS: addbmm_mps_
|
6379
6893
|
|
6380
6894
|
- func: addbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
6381
6895
|
dispatch:
|
6382
6896
|
CPU, CUDA: addbmm_out
|
6897
|
+
MPS: addbmm_out_mps
|
6383
6898
|
|
6384
6899
|
- func: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
6385
6900
|
variants: method, function
|
6386
6901
|
dispatch:
|
6387
6902
|
CPU, CUDA: addbmm
|
6903
|
+
MPS: addbmm_mps
|
6388
6904
|
|
6389
6905
|
- func: random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)
|
6390
6906
|
device_check: NoCheck # TensorIterator
|
@@ -6392,6 +6908,8 @@
|
|
6392
6908
|
dispatch:
|
6393
6909
|
CPU, CUDA: random_
|
6394
6910
|
Meta: random_meta_
|
6911
|
+
MPS: random_mps_
|
6912
|
+
autogen: random.from_functional, random.from_out
|
6395
6913
|
|
6396
6914
|
- func: random_.to(Tensor(a!) self, int to, *, Generator? generator=None) -> Tensor(a!)
|
6397
6915
|
device_check: NoCheck # TensorIterator
|
@@ -6399,6 +6917,8 @@
|
|
6399
6917
|
dispatch:
|
6400
6918
|
CPU, CUDA: random_
|
6401
6919
|
Meta: random_meta_
|
6920
|
+
MPS: random_mps_
|
6921
|
+
autogen: random.to_functional, random.to_out
|
6402
6922
|
|
6403
6923
|
- func: random_(Tensor(a!) self, *, Generator? generator=None) -> Tensor(a!)
|
6404
6924
|
device_check: NoCheck # TensorIterator
|
@@ -6406,31 +6926,37 @@
|
|
6406
6926
|
dispatch:
|
6407
6927
|
CPU, CUDA: random_
|
6408
6928
|
Meta: random_meta_
|
6929
|
+
autogen: random.functional, random.out
|
6409
6930
|
|
6410
6931
|
- func: uniform_(Tensor(a!) self, float from=0, float to=1, *, Generator? generator=None) -> Tensor(a!)
|
6411
6932
|
device_check: NoCheck # TensorIterator
|
6412
6933
|
variants: method
|
6413
6934
|
dispatch:
|
6414
6935
|
CPU, CUDA: uniform_
|
6936
|
+
MPS: uniform_mps_
|
6415
6937
|
Meta: uniform_meta_
|
6938
|
+
autogen: uniform.functional, uniform.out
|
6416
6939
|
|
6417
6940
|
- func: cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!)
|
6418
6941
|
device_check: NoCheck # TensorIterator
|
6419
6942
|
variants: method
|
6420
6943
|
dispatch:
|
6421
6944
|
CPU, CUDA: cauchy_
|
6945
|
+
autogen: cauchy.functional, cauchy.out
|
6422
6946
|
|
6423
6947
|
- func: log_normal_(Tensor(a!) self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor(a!)
|
6424
6948
|
device_check: NoCheck # TensorIterator
|
6425
6949
|
variants: method
|
6426
6950
|
dispatch:
|
6427
6951
|
CPU, CUDA: log_normal_
|
6952
|
+
autogen: log_normal.functional, log_normal.out
|
6428
6953
|
|
6429
6954
|
- func: exponential_(Tensor(a!) self, float lambd=1, *, Generator? generator=None) -> Tensor(a!)
|
6430
6955
|
device_check: NoCheck # TensorIterator
|
6431
6956
|
variants: method
|
6432
6957
|
dispatch:
|
6433
6958
|
CPU, CUDA: exponential_
|
6959
|
+
autogen: exponential.functional, exponential.out
|
6434
6960
|
|
6435
6961
|
- func: geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!)
|
6436
6962
|
device_check: NoCheck # TensorIterator
|
@@ -6439,11 +6965,13 @@
|
|
6439
6965
|
CPU, CUDA: geometric_
|
6440
6966
|
|
6441
6967
|
# wrappers for TH functions
|
6968
|
+
autogen: geometric.functional, geometric.out
|
6442
6969
|
|
6443
6970
|
- func: diag.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
|
6444
6971
|
dispatch:
|
6445
6972
|
CPU: diag_cpu_out
|
6446
6973
|
CUDA: diag_cuda_out
|
6974
|
+
MPS: diag_mps_out
|
6447
6975
|
|
6448
6976
|
- func: diag(Tensor self, int diagonal=0) -> Tensor
|
6449
6977
|
variants: method, function
|
@@ -6465,6 +6993,7 @@
|
|
6465
6993
|
dispatch:
|
6466
6994
|
CPU: triu_cpu
|
6467
6995
|
CUDA: triu_cuda
|
6996
|
+
MPS: triu_mps_out
|
6468
6997
|
|
6469
6998
|
- func: triu(Tensor self, int diagonal=0) -> Tensor
|
6470
6999
|
structured_delegate: triu.out
|
@@ -6475,6 +7004,7 @@
|
|
6475
7004
|
dispatch:
|
6476
7005
|
CPU: tril_cpu
|
6477
7006
|
CUDA: tril_cuda
|
7007
|
+
MPS: tril_mps_out
|
6478
7008
|
|
6479
7009
|
- func: tril(Tensor self, int diagonal=0) -> Tensor
|
6480
7010
|
structured_delegate: tril.out
|
@@ -6507,6 +7037,7 @@
|
|
6507
7037
|
device_check: NoCheck # TensorIterator
|
6508
7038
|
dispatch:
|
6509
7039
|
CPU, CUDA: ne_Scalar_out
|
7040
|
+
MPS: ne_scalar_out_mps
|
6510
7041
|
QuantizedCPU: ne_out_quantized_cpu
|
6511
7042
|
|
6512
7043
|
- func: ne.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6522,6 +7053,7 @@
|
|
6522
7053
|
device_check: NoCheck # TensorIterator
|
6523
7054
|
dispatch:
|
6524
7055
|
CPU, CUDA: ne_Tensor_out
|
7056
|
+
MPS: ne_tensor_out_mps
|
6525
7057
|
QuantizedCPU: ne_out_quantized_cpu
|
6526
7058
|
|
6527
7059
|
- func: ne.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6568,6 +7100,7 @@
|
|
6568
7100
|
device_check: NoCheck # TensorIterator
|
6569
7101
|
dispatch:
|
6570
7102
|
CPU, CUDA: eq_Scalar_out
|
7103
|
+
MPS: eq_scalar_out_mps
|
6571
7104
|
QuantizedCPU: eq_out_quantized_cpu
|
6572
7105
|
|
6573
7106
|
- func: eq.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6583,6 +7116,7 @@
|
|
6583
7116
|
device_check: NoCheck # TensorIterator
|
6584
7117
|
dispatch:
|
6585
7118
|
CPU, CUDA: eq_Tensor_out
|
7119
|
+
MPS: eq_tensor_out_mps
|
6586
7120
|
QuantizedCPU: eq_out_quantized_cpu
|
6587
7121
|
|
6588
7122
|
- func: eq.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6598,6 +7132,7 @@
|
|
6598
7132
|
device_check: NoCheck # TensorIterator
|
6599
7133
|
dispatch:
|
6600
7134
|
CPU, CUDA: ge_Scalar_out
|
7135
|
+
MPS: ge_scalar_out_mps
|
6601
7136
|
QuantizedCPU: ge_out_quantized_cpu
|
6602
7137
|
|
6603
7138
|
- func: ge.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6613,6 +7148,7 @@
|
|
6613
7148
|
device_check: NoCheck # TensorIterator
|
6614
7149
|
dispatch:
|
6615
7150
|
CPU, CUDA: ge_Tensor_out
|
7151
|
+
MPS: ge_tensor_out_mps
|
6616
7152
|
QuantizedCPU: ge_out_quantized_cpu
|
6617
7153
|
|
6618
7154
|
- func: ge.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6659,6 +7195,7 @@
|
|
6659
7195
|
device_check: NoCheck # TensorIterator
|
6660
7196
|
dispatch:
|
6661
7197
|
CPU, CUDA: le_Scalar_out
|
7198
|
+
MPS: le_scalar_out_mps
|
6662
7199
|
QuantizedCPU: le_out_quantized_cpu
|
6663
7200
|
|
6664
7201
|
- func: le.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6674,6 +7211,7 @@
|
|
6674
7211
|
device_check: NoCheck # TensorIterator
|
6675
7212
|
dispatch:
|
6676
7213
|
CPU, CUDA: le_Tensor_out
|
7214
|
+
MPS: le_tensor_out_mps
|
6677
7215
|
QuantizedCPU: le_out_quantized_cpu
|
6678
7216
|
|
6679
7217
|
- func: le.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6720,6 +7258,7 @@
|
|
6720
7258
|
device_check: NoCheck # TensorIterator
|
6721
7259
|
dispatch:
|
6722
7260
|
CPU, CUDA: gt_Scalar_out
|
7261
|
+
MPS: gt_scalar_out_mps
|
6723
7262
|
QuantizedCPU: gt_out_quantized_cpu
|
6724
7263
|
|
6725
7264
|
- func: gt.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6735,6 +7274,7 @@
|
|
6735
7274
|
device_check: NoCheck # TensorIterator
|
6736
7275
|
dispatch:
|
6737
7276
|
CPU, CUDA: gt_Tensor_out
|
7277
|
+
MPS: gt_tensor_out_mps
|
6738
7278
|
QuantizedCPU: gt_out_quantized_cpu
|
6739
7279
|
|
6740
7280
|
- func: gt.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6781,6 +7321,7 @@
|
|
6781
7321
|
device_check: NoCheck # TensorIterator
|
6782
7322
|
dispatch:
|
6783
7323
|
CPU, CUDA: lt_Scalar_out
|
7324
|
+
MPS: lt_scalar_out_mps
|
6784
7325
|
QuantizedCPU: lt_out_quantized_cpu
|
6785
7326
|
|
6786
7327
|
- func: lt.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -6796,6 +7337,7 @@
|
|
6796
7337
|
device_check: NoCheck # TensorIterator
|
6797
7338
|
dispatch:
|
6798
7339
|
CPU, CUDA: lt_Tensor_out
|
7340
|
+
MPS: lt_tensor_out_mps
|
6799
7341
|
QuantizedCPU: lt_out_quantized_cpu
|
6800
7342
|
|
6801
7343
|
- func: lt.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -6854,15 +7396,18 @@
|
|
6854
7396
|
dispatch:
|
6855
7397
|
CPU, QuantizedCPU: index_select_out_cpu_
|
6856
7398
|
CUDA, QuantizedCUDA: index_select_out_cuda
|
7399
|
+
MPS: index_select_out_mps
|
6857
7400
|
|
6858
7401
|
- func: index_select(Tensor self, int dim, Tensor index) -> Tensor
|
6859
7402
|
variants: method, function
|
6860
7403
|
dispatch:
|
6861
7404
|
CPU: index_select_cpu_
|
6862
7405
|
QuantizedCPU: index_select_quantized_cpu_
|
6863
|
-
CUDA
|
6864
|
-
|
6865
|
-
|
7406
|
+
CUDA: index_select_cuda
|
7407
|
+
QuantizedCUDA: index_select_quantized_cuda
|
7408
|
+
SparseCPU: index_select_sparse_cpu
|
7409
|
+
SparseCUDA: index_select_sparse_cuda
|
7410
|
+
MPS: index_select_mps
|
6866
7411
|
|
6867
7412
|
- func: index_select.dimname_out(Tensor self, Dimname dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!)
|
6868
7413
|
|
@@ -6911,6 +7456,7 @@
|
|
6911
7456
|
structured: True
|
6912
7457
|
dispatch:
|
6913
7458
|
CPU, CUDA: gather_out
|
7459
|
+
MPS: gather_out_mps
|
6914
7460
|
|
6915
7461
|
- func: gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor
|
6916
7462
|
variants: method, function
|
@@ -6934,6 +7480,7 @@
|
|
6934
7480
|
device_check: NoCheck # TensorIterator
|
6935
7481
|
dispatch:
|
6936
7482
|
CPU, CUDA: addcmul_out
|
7483
|
+
MPS: addcmul_out_mps
|
6937
7484
|
|
6938
7485
|
- func: addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
|
6939
7486
|
structured_delegate: addcmul.out
|
@@ -6951,6 +7498,7 @@
|
|
6951
7498
|
device_check: NoCheck # TensorIterator
|
6952
7499
|
dispatch:
|
6953
7500
|
CPU, CUDA: addcdiv_out
|
7501
|
+
MPS: addcdiv_out_mps
|
6954
7502
|
|
6955
7503
|
- func: addcdiv(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
|
6956
7504
|
structured_delegate: addcdiv.out
|
@@ -6998,10 +7546,13 @@
|
|
6998
7546
|
|
6999
7547
|
- func: linalg_solve_triangular(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False) -> Tensor
|
7000
7548
|
python_module: linalg
|
7001
|
-
variants:
|
7549
|
+
variants: function
|
7002
7550
|
dispatch:
|
7003
7551
|
CPU, CUDA: linalg_solve_triangular
|
7004
7552
|
|
7553
|
+
- func: linalg_vander(Tensor x, *, int? N=None) -> Tensor
|
7554
|
+
python_module: linalg
|
7555
|
+
|
7005
7556
|
- func: symeig.e(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)
|
7006
7557
|
dispatch:
|
7007
7558
|
CompositeExplicitAutograd: symeig_out
|
@@ -7079,21 +7630,6 @@
|
|
7079
7630
|
CPU: _cholesky_solve_helper_cpu
|
7080
7631
|
CUDA: _cholesky_solve_helper_cuda
|
7081
7632
|
|
7082
|
-
- func: solve(Tensor self, Tensor A) -> (Tensor solution, Tensor LU)
|
7083
|
-
variants: function, method
|
7084
|
-
dispatch:
|
7085
|
-
CompositeExplicitAutograd: solve
|
7086
|
-
|
7087
|
-
- func: solve.solution(Tensor self, Tensor A, *, Tensor(a!) solution, Tensor(b!) lu) -> (Tensor(a!) solution, Tensor(b!) LU)
|
7088
|
-
dispatch:
|
7089
|
-
CompositeExplicitAutograd: solve_out
|
7090
|
-
|
7091
|
-
- func: _solve_helper(Tensor self, Tensor A) -> (Tensor, Tensor)
|
7092
|
-
variants: function
|
7093
|
-
dispatch:
|
7094
|
-
CPU: _solve_helper_cpu
|
7095
|
-
CUDA: _solve_helper_cuda
|
7096
|
-
|
7097
7633
|
- func: cholesky_inverse(Tensor self, bool upper=False) -> Tensor
|
7098
7634
|
variants: method, function
|
7099
7635
|
dispatch:
|
@@ -7144,13 +7680,14 @@
|
|
7144
7680
|
dispatch:
|
7145
7681
|
CPU, CUDA: lu_solve
|
7146
7682
|
|
7683
|
+
# lu_unpack
|
7147
7684
|
- func: lu_unpack(Tensor LU_data, Tensor LU_pivots, bool unpack_data=True, bool unpack_pivots=True) -> (Tensor P, Tensor L, Tensor U)
|
7685
|
+
structured_delegate: lu_unpack.out
|
7148
7686
|
variants: function
|
7149
|
-
dispatch:
|
7150
|
-
CPU, CUDA: lu_unpack
|
7151
7687
|
|
7152
7688
|
- func: lu_unpack.out(Tensor LU_data, Tensor LU_pivots, bool unpack_data=True, bool unpack_pivots=True, *, Tensor(a!) P, Tensor(b!) L, Tensor(c!) U) -> (Tensor(a!) P, Tensor(b!) L, Tensor(c!) U)
|
7153
7689
|
variants: function
|
7690
|
+
structured: True
|
7154
7691
|
dispatch:
|
7155
7692
|
CPU, CUDA: lu_unpack_out
|
7156
7693
|
|
@@ -7274,6 +7811,7 @@
|
|
7274
7811
|
structured_inherits: TensorIteratorBase
|
7275
7812
|
dispatch:
|
7276
7813
|
CPU, CUDA: sign_out
|
7814
|
+
MPS: sign_out_mps
|
7277
7815
|
SparseCPU, SparseCUDA: sign_sparse_out
|
7278
7816
|
SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_out
|
7279
7817
|
|
@@ -7305,6 +7843,7 @@
|
|
7305
7843
|
structured_inherits: TensorIteratorBase
|
7306
7844
|
dispatch:
|
7307
7845
|
CPU, CUDA: atan2_out
|
7846
|
+
MPS: atan2_mps_out
|
7308
7847
|
|
7309
7848
|
- func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
7310
7849
|
device_check: NoCheck # TensorIterator
|
@@ -7391,6 +7930,12 @@
|
|
7391
7930
|
dispatch:
|
7392
7931
|
CPU: histogramdd_cpu
|
7393
7932
|
|
7933
|
+
- func: histogramdd(Tensor self, int[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)
|
7934
|
+
|
7935
|
+
- func: histogramdd.int_bins(Tensor self, int bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)
|
7936
|
+
|
7937
|
+
- func: histogramdd.TensorList_bins(Tensor self, Tensor[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)
|
7938
|
+
|
7394
7939
|
- func: fmod.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
7395
7940
|
device_check: NoCheck # TensorIterator
|
7396
7941
|
dispatch:
|
@@ -7528,6 +8073,7 @@
|
|
7528
8073
|
variants: method, function
|
7529
8074
|
dispatch:
|
7530
8075
|
CPU, CUDA: min
|
8076
|
+
MPS: min_mps
|
7531
8077
|
QuantizedCPU: min_quantized_cpu
|
7532
8078
|
|
7533
8079
|
- func: fmin(Tensor self, Tensor other) -> Tensor
|
@@ -7547,6 +8093,7 @@
|
|
7547
8093
|
variants: method, function
|
7548
8094
|
dispatch:
|
7549
8095
|
CPU, CUDA: max
|
8096
|
+
MPS: max_mps
|
7550
8097
|
QuantizedCPU: max_quantized_cpu
|
7551
8098
|
|
7552
8099
|
- func: fmax(Tensor self, Tensor other) -> Tensor
|
@@ -7572,6 +8119,7 @@
|
|
7572
8119
|
device_check: NoCheck # TensorIterator
|
7573
8120
|
dispatch:
|
7574
8121
|
CPU, CUDA: maximum_out
|
8122
|
+
MPS: maximum_out_mps
|
7575
8123
|
|
7576
8124
|
# binary max, alias of maximum
|
7577
8125
|
# NOTE: max is not an alias for maximum, since there is also unary max
|
@@ -7593,6 +8141,7 @@
|
|
7593
8141
|
device_check: NoCheck # TensorIterator
|
7594
8142
|
dispatch:
|
7595
8143
|
CPU, CUDA: minimum_out
|
8144
|
+
MPS: minimum_out_mps
|
7596
8145
|
|
7597
8146
|
# binary min, alias for minimum
|
7598
8147
|
# NOTE: min is not an alias for minimum, since there is also unary min
|
@@ -7626,27 +8175,23 @@
|
|
7626
8175
|
- func: sort.values(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
7627
8176
|
device_check: NoCheck # TensorIterator
|
7628
8177
|
dispatch:
|
7629
|
-
|
7630
|
-
CUDA: sort_out_cuda
|
8178
|
+
CompositeExplicitAutograd: sort_out
|
7631
8179
|
|
7632
8180
|
- func: sort.values_stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
8181
|
+
structured: True
|
7633
8182
|
dispatch:
|
7634
|
-
CPU:
|
7635
|
-
CUDA: sort_out_stable_cuda
|
8183
|
+
CPU, CUDA: sort_stable_out
|
7636
8184
|
|
7637
8185
|
- func: sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)
|
7638
8186
|
device_check: NoCheck # TensorIterator
|
7639
8187
|
variants: method, function
|
7640
8188
|
dispatch:
|
7641
|
-
|
7642
|
-
CUDA: sort_cuda
|
7643
|
-
QuantizedCPU: sort_quantized_cpu
|
8189
|
+
CompositeExplicitAutograd: sort
|
7644
8190
|
|
7645
8191
|
- func: sort.stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)
|
8192
|
+
structured_delegate: sort.values_stable
|
7646
8193
|
variants: method, function
|
7647
8194
|
dispatch:
|
7648
|
-
CPU: sort_cpu_stable
|
7649
|
-
CUDA: sort_stable_cuda
|
7650
8195
|
QuantizedCPU: sort_quantized_cpu_stable
|
7651
8196
|
|
7652
8197
|
- func: sort.dimname_values(Tensor self, Dimname dim, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
@@ -7676,6 +8221,7 @@
|
|
7676
8221
|
dispatch:
|
7677
8222
|
CPU: topk_out_cpu
|
7678
8223
|
CUDA: topk_out_cuda
|
8224
|
+
MPS: topk_out_mps
|
7679
8225
|
|
7680
8226
|
- func: topk(Tensor self, int k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices)
|
7681
8227
|
variants: method, function
|
@@ -7693,6 +8239,7 @@
|
|
7693
8239
|
structured: True
|
7694
8240
|
dispatch:
|
7695
8241
|
CPU, CUDA: all_all_out
|
8242
|
+
MPS: all_all_out_mps
|
7696
8243
|
|
7697
8244
|
- func: any(Tensor self) -> Tensor
|
7698
8245
|
device_check: NoCheck # TensorIterator
|
@@ -7706,6 +8253,7 @@
|
|
7706
8253
|
structured: True
|
7707
8254
|
dispatch:
|
7708
8255
|
CPU, CUDA: any_all_out
|
8256
|
+
MPS: any_all_out_mps
|
7709
8257
|
|
7710
8258
|
- func: renorm.out(Tensor self, Scalar p, int dim, Scalar maxnorm, *, Tensor(a!) out) -> Tensor(a!)
|
7711
8259
|
device_check: NoCheck # TensorIterator
|
@@ -7728,7 +8276,7 @@
|
|
7728
8276
|
device_check: NoCheck
|
7729
8277
|
device_guard: False
|
7730
8278
|
dispatch:
|
7731
|
-
CPU, CUDA: unfold
|
8279
|
+
CPU, CUDA, Meta: unfold
|
7732
8280
|
QuantizedCPU, QuantizedCUDA: unfold
|
7733
8281
|
|
7734
8282
|
- func: unfold_backward(Tensor grad_in, int[] input_sizes, int dim, int size, int step) -> Tensor
|
@@ -7749,6 +8297,7 @@
|
|
7749
8297
|
structured_inherits: TensorIteratorBase
|
7750
8298
|
dispatch:
|
7751
8299
|
CPU, CUDA: pow_Tensor_Tensor_out
|
8300
|
+
MPS: pow_tensor_tensor_out_mps
|
7752
8301
|
|
7753
8302
|
- func: pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor
|
7754
8303
|
device_check: NoCheck # TensorIterator
|
@@ -7772,6 +8321,7 @@
|
|
7772
8321
|
dispatch:
|
7773
8322
|
CPU, CUDA: pow_Tensor_Scalar_out
|
7774
8323
|
SparseCPU, SparseCUDA: pow_out_sparse_scalar
|
8324
|
+
MPS: pow_tensor_scalar_out_mps
|
7775
8325
|
|
7776
8326
|
- func: pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor
|
7777
8327
|
device_check: NoCheck # TensorIterator
|
@@ -7815,32 +8365,46 @@
|
|
7815
8365
|
variants: method
|
7816
8366
|
dispatch:
|
7817
8367
|
CPU, CUDA: normal_
|
8368
|
+
MPS: normal_mps_
|
7818
8369
|
Meta: normal_meta_
|
7819
8370
|
SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
|
8371
|
+
autogen: normal.functional, normal.out
|
7820
8372
|
|
7821
8373
|
- func: normal.Tensor_float_out(Tensor mean, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
|
7822
8374
|
dispatch:
|
7823
8375
|
CPU, CUDA: normal_out
|
8376
|
+
MPS: normal_mps_out
|
8377
|
+
Meta: normal_out_meta
|
7824
8378
|
|
7825
8379
|
- func: normal.Tensor_float(Tensor mean, float std=1, *, Generator? generator=None) -> Tensor
|
7826
8380
|
dispatch:
|
7827
8381
|
CPU, CUDA: normal
|
8382
|
+
#MPS: normal_mps
|
8383
|
+
Meta: normal_meta
|
7828
8384
|
|
7829
8385
|
- func: normal.float_Tensor_out(float mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
|
7830
8386
|
dispatch:
|
7831
8387
|
CPU, CUDA: normal_out
|
8388
|
+
Meta: normal_out_meta
|
8389
|
+
MPS: normal_mps_out
|
7832
8390
|
|
7833
8391
|
- func: normal.float_Tensor(float mean, Tensor std, *, Generator? generator=None) -> Tensor
|
7834
8392
|
dispatch:
|
7835
8393
|
CPU, CUDA: normal
|
8394
|
+
Meta: normal_meta
|
8395
|
+
#MPS: normal_mps
|
7836
8396
|
|
7837
8397
|
- func: normal.Tensor_Tensor_out(Tensor mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
|
7838
8398
|
dispatch:
|
7839
8399
|
CPU, CUDA: normal_out
|
8400
|
+
Meta: normal_out_meta
|
8401
|
+
MPS: normal_mps_out
|
7840
8402
|
|
7841
8403
|
- func: normal.Tensor_Tensor(Tensor mean, Tensor std, *, Generator? generator=None) -> Tensor
|
7842
8404
|
dispatch:
|
7843
8405
|
CPU, CUDA: normal
|
8406
|
+
Meta: normal_meta
|
8407
|
+
#MPS: normal_mps
|
7844
8408
|
|
7845
8409
|
- func: normal.float_float(float mean, float std, int[] size, *, Generator? generator=None, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
7846
8410
|
|
@@ -7851,32 +8415,30 @@
|
|
7851
8415
|
dispatch:
|
7852
8416
|
CompositeExplicitAutograd: alias
|
7853
8417
|
|
7854
|
-
- func: _index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
|
7855
|
-
dispatch:
|
7856
|
-
CPU: _index_copy_impl_
|
7857
|
-
CUDA: _index_copy_impl_
|
7858
|
-
|
7859
8418
|
- func: _amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
|
7860
8419
|
variants: function
|
7861
8420
|
dispatch:
|
7862
8421
|
CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
|
8422
|
+
autogen: _amp_foreach_non_finite_check_and_unscale.functional, _amp_foreach_non_finite_check_and_unscale.out
|
7863
8423
|
|
7864
8424
|
- func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
|
7865
8425
|
variants: function
|
7866
8426
|
dispatch:
|
7867
8427
|
CUDA: _amp_update_scale_cuda_
|
8428
|
+
autogen: _amp_update_scale.functional, _amp_update_scale.out
|
7868
8429
|
|
7869
|
-
|
7870
|
-
dispatch:
|
7871
|
-
CPU: _cat_cpu
|
7872
|
-
CUDA: cat_cuda
|
7873
|
-
|
8430
|
+
#- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
|
8431
|
+
#dispatch:
|
8432
|
+
#CPU: _cat_cpu
|
8433
|
+
#CUDA: cat_cuda
|
8434
|
+
#MPS: cat_mps
|
8435
|
+
#QuantizedCPU: cat_quantized_cpu
|
7874
8436
|
|
7875
|
-
|
7876
|
-
dispatch:
|
7877
|
-
CPU: _cat_out_cpu
|
7878
|
-
CUDA: cat_out_cuda
|
7879
|
-
QuantizedCPU: cat_out_quantized_cpu
|
8437
|
+
#- func: _cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
|
8438
|
+
#dispatch:
|
8439
|
+
#CPU: _cat_out_cpu
|
8440
|
+
#CUDA: cat_out_cuda
|
8441
|
+
#QuantizedCPU: cat_out_quantized_cpu
|
7880
8442
|
|
7881
8443
|
- func: _foreach_add.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
7882
8444
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7891,6 +8453,7 @@
|
|
7891
8453
|
dispatch:
|
7892
8454
|
CPU: foreach_tensor_add_scalar_kernel_slow_
|
7893
8455
|
CUDA: foreach_tensor_add_scalar_kernel_cuda_
|
8456
|
+
autogen: _foreach_add.Scalar_functional, _foreach_add.Scalar_out
|
7894
8457
|
|
7895
8458
|
- func: _foreach_sub.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
7896
8459
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7905,6 +8468,7 @@
|
|
7905
8468
|
dispatch:
|
7906
8469
|
CPU: foreach_tensor_sub_scalar_kernel_slow_
|
7907
8470
|
CUDA: foreach_tensor_sub_scalar_kernel_cuda_
|
8471
|
+
autogen: _foreach_sub.Scalar_functional, _foreach_sub.Scalar_out
|
7908
8472
|
|
7909
8473
|
- func: _foreach_mul.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
7910
8474
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7919,6 +8483,7 @@
|
|
7919
8483
|
dispatch:
|
7920
8484
|
CPU: foreach_tensor_mul_scalar_kernel_slow_
|
7921
8485
|
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
|
8486
|
+
autogen: _foreach_mul.Scalar_functional, _foreach_mul.Scalar_out
|
7922
8487
|
|
7923
8488
|
- func: _foreach_div.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
|
7924
8489
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7933,6 +8498,7 @@
|
|
7933
8498
|
dispatch:
|
7934
8499
|
CPU: foreach_tensor_div_scalar_kernel_slow_
|
7935
8500
|
CUDA: foreach_tensor_div_scalar_kernel_cuda_
|
8501
|
+
autogen: _foreach_div.Scalar_functional, _foreach_div.Scalar_out
|
7936
8502
|
|
7937
8503
|
- func: _foreach_add.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
|
7938
8504
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7947,6 +8513,7 @@
|
|
7947
8513
|
dispatch:
|
7948
8514
|
CPU: foreach_tensor_add_list_kernel_slow_
|
7949
8515
|
CUDA: foreach_tensor_add_list_kernel_cuda_
|
8516
|
+
autogen: _foreach_add.List_functional, _foreach_add.List_out
|
7950
8517
|
|
7951
8518
|
- func: _foreach_sub.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
|
7952
8519
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7961,6 +8528,7 @@
|
|
7961
8528
|
dispatch:
|
7962
8529
|
CPU: foreach_tensor_sub_list_kernel_slow_
|
7963
8530
|
CUDA: foreach_tensor_sub_list_kernel_cuda_
|
8531
|
+
autogen: _foreach_sub.List_functional, _foreach_sub.List_out
|
7964
8532
|
|
7965
8533
|
- func: _foreach_mul.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
|
7966
8534
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7975,6 +8543,7 @@
|
|
7975
8543
|
dispatch:
|
7976
8544
|
CPU: foreach_tensor_mul_list_kernel_slow_
|
7977
8545
|
CUDA: foreach_tensor_mul_list_kernel_cuda_
|
8546
|
+
autogen: _foreach_mul.List_functional, _foreach_mul.List_out
|
7978
8547
|
|
7979
8548
|
- func: _foreach_div.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
|
7980
8549
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -7989,6 +8558,7 @@
|
|
7989
8558
|
dispatch:
|
7990
8559
|
CPU: foreach_tensor_div_list_kernel_slow_
|
7991
8560
|
CUDA: foreach_tensor_div_list_kernel_cuda_
|
8561
|
+
autogen: _foreach_div.List_functional, _foreach_div.List_out
|
7992
8562
|
|
7993
8563
|
- func: _foreach_add.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
7994
8564
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8003,6 +8573,7 @@
|
|
8003
8573
|
dispatch:
|
8004
8574
|
CPU: foreach_tensor_add_scalarlist_kernel_slow_
|
8005
8575
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
|
8576
|
+
autogen: _foreach_add.ScalarList_functional, _foreach_add.ScalarList_out
|
8006
8577
|
|
8007
8578
|
- func: _foreach_sub.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
8008
8579
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8017,6 +8588,7 @@
|
|
8017
8588
|
dispatch:
|
8018
8589
|
CPU: foreach_tensor_sub_scalarlist_kernel_slow_
|
8019
8590
|
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
|
8591
|
+
autogen: _foreach_sub.ScalarList_functional, _foreach_sub.ScalarList_out
|
8020
8592
|
|
8021
8593
|
- func: _foreach_div.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
8022
8594
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8031,6 +8603,7 @@
|
|
8031
8603
|
dispatch:
|
8032
8604
|
CPU: foreach_tensor_div_scalarlist_kernel_slow_
|
8033
8605
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
|
8606
|
+
autogen: _foreach_div.ScalarList_functional, _foreach_div.ScalarList_out
|
8034
8607
|
|
8035
8608
|
- func: _foreach_mul.ScalarList(Tensor[] tensors, Scalar[] scalars) -> Tensor[]
|
8036
8609
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8045,6 +8618,7 @@
|
|
8045
8618
|
dispatch:
|
8046
8619
|
CPU: foreach_tensor_mul_scalarlist_kernel_slow_
|
8047
8620
|
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
|
8621
|
+
autogen: _foreach_mul.ScalarList_functional, _foreach_mul.ScalarList_out
|
8048
8622
|
|
8049
8623
|
- func: _foreach_exp(Tensor[] tensors) -> Tensor[]
|
8050
8624
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8059,6 +8633,7 @@
|
|
8059
8633
|
dispatch:
|
8060
8634
|
CPU: foreach_tensor_zero_slow_
|
8061
8635
|
CUDA: foreach_tensor_zero_cuda_
|
8636
|
+
autogen: _foreach_zero.functional, _foreach_zero.out
|
8062
8637
|
|
8063
8638
|
- func: _foreach_exp_(Tensor(a!)[] self) -> ()
|
8064
8639
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8066,6 +8641,7 @@
|
|
8066
8641
|
dispatch:
|
8067
8642
|
CPU: foreach_tensor_exp_slow_
|
8068
8643
|
CUDA: foreach_tensor_exp_cuda_
|
8644
|
+
autogen: _foreach_exp.functional, _foreach_exp.out
|
8069
8645
|
|
8070
8646
|
- func: _foreach_sqrt(Tensor[] tensors) -> Tensor[]
|
8071
8647
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8080,6 +8656,7 @@
|
|
8080
8656
|
dispatch:
|
8081
8657
|
CPU: foreach_tensor_sqrt_slow_
|
8082
8658
|
CUDA: foreach_tensor_sqrt_cuda_
|
8659
|
+
autogen: _foreach_sqrt.functional, _foreach_sqrt.out
|
8083
8660
|
|
8084
8661
|
- func: _foreach_abs(Tensor[] tensors) -> Tensor[]
|
8085
8662
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8094,6 +8671,7 @@
|
|
8094
8671
|
dispatch:
|
8095
8672
|
CPU: foreach_tensor_abs_slow_
|
8096
8673
|
CUDA: foreach_tensor_abs_cuda_
|
8674
|
+
autogen: _foreach_abs.functional, _foreach_abs.out
|
8097
8675
|
|
8098
8676
|
- func: _foreach_acos(Tensor[] tensors) -> Tensor[]
|
8099
8677
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8108,6 +8686,7 @@
|
|
8108
8686
|
dispatch:
|
8109
8687
|
CPU: foreach_tensor_acos_slow_
|
8110
8688
|
CUDA: foreach_tensor_acos_cuda_
|
8689
|
+
autogen: _foreach_acos.functional, _foreach_acos.out
|
8111
8690
|
|
8112
8691
|
- func: _foreach_asin(Tensor[] tensors) -> Tensor[]
|
8113
8692
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8122,6 +8701,7 @@
|
|
8122
8701
|
dispatch:
|
8123
8702
|
CPU: foreach_tensor_asin_slow_
|
8124
8703
|
CUDA: foreach_tensor_asin_cuda_
|
8704
|
+
autogen: _foreach_asin.functional, _foreach_asin.out
|
8125
8705
|
|
8126
8706
|
- func: _foreach_atan(Tensor[] tensors) -> Tensor[]
|
8127
8707
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8136,6 +8716,7 @@
|
|
8136
8716
|
dispatch:
|
8137
8717
|
CPU: foreach_tensor_atan_slow_
|
8138
8718
|
CUDA: foreach_tensor_atan_cuda_
|
8719
|
+
autogen: _foreach_atan.functional, _foreach_atan.out
|
8139
8720
|
|
8140
8721
|
- func: _foreach_ceil(Tensor[] tensors) -> Tensor[]
|
8141
8722
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8150,6 +8731,7 @@
|
|
8150
8731
|
dispatch:
|
8151
8732
|
CPU: foreach_tensor_ceil_slow_
|
8152
8733
|
CUDA: foreach_tensor_ceil_cuda_
|
8734
|
+
autogen: _foreach_ceil.functional, _foreach_ceil.out
|
8153
8735
|
|
8154
8736
|
- func: _foreach_cos(Tensor[] tensors) -> Tensor[]
|
8155
8737
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8164,6 +8746,7 @@
|
|
8164
8746
|
dispatch:
|
8165
8747
|
CPU: foreach_tensor_cos_slow_
|
8166
8748
|
CUDA: foreach_tensor_cos_cuda_
|
8749
|
+
autogen: _foreach_cos.functional, _foreach_cos.out
|
8167
8750
|
|
8168
8751
|
- func: _foreach_cosh(Tensor[] tensors) -> Tensor[]
|
8169
8752
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8178,6 +8761,7 @@
|
|
8178
8761
|
dispatch:
|
8179
8762
|
CPU: foreach_tensor_cosh_slow_
|
8180
8763
|
CUDA: foreach_tensor_cosh_cuda_
|
8764
|
+
autogen: _foreach_cosh.functional, _foreach_cosh.out
|
8181
8765
|
|
8182
8766
|
- func: _foreach_erf(Tensor[] tensors) -> Tensor[]
|
8183
8767
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8192,6 +8776,7 @@
|
|
8192
8776
|
dispatch:
|
8193
8777
|
CPU: foreach_tensor_erf_slow_
|
8194
8778
|
CUDA: foreach_tensor_erf_cuda_
|
8779
|
+
autogen: _foreach_erf.functional, _foreach_erf.out
|
8195
8780
|
|
8196
8781
|
- func: _foreach_erfc(Tensor[] tensors) -> Tensor[]
|
8197
8782
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8206,6 +8791,7 @@
|
|
8206
8791
|
dispatch:
|
8207
8792
|
CPU: foreach_tensor_erfc_slow_
|
8208
8793
|
CUDA: foreach_tensor_erfc_cuda_
|
8794
|
+
autogen: _foreach_erfc.functional, _foreach_erfc.out
|
8209
8795
|
|
8210
8796
|
- func: _foreach_expm1(Tensor[] tensors) -> Tensor[]
|
8211
8797
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8220,6 +8806,7 @@
|
|
8220
8806
|
dispatch:
|
8221
8807
|
CPU: foreach_tensor_expm1_slow_
|
8222
8808
|
CUDA: foreach_tensor_expm1_cuda_
|
8809
|
+
autogen: _foreach_expm1.functional, _foreach_expm1.out
|
8223
8810
|
|
8224
8811
|
- func: _foreach_floor(Tensor[] tensors) -> Tensor[]
|
8225
8812
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8234,6 +8821,7 @@
|
|
8234
8821
|
dispatch:
|
8235
8822
|
CPU: foreach_tensor_floor_slow_
|
8236
8823
|
CUDA: foreach_tensor_floor_cuda_
|
8824
|
+
autogen: _foreach_floor.functional, _foreach_floor.out
|
8237
8825
|
|
8238
8826
|
- func: _foreach_log(Tensor[] tensors) -> Tensor[]
|
8239
8827
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8248,6 +8836,7 @@
|
|
8248
8836
|
dispatch:
|
8249
8837
|
CPU: foreach_tensor_log_slow_
|
8250
8838
|
CUDA: foreach_tensor_log_cuda_
|
8839
|
+
autogen: _foreach_log.functional, _foreach_log.out
|
8251
8840
|
|
8252
8841
|
- func: _foreach_log10(Tensor[] tensors) -> Tensor[]
|
8253
8842
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8262,6 +8851,7 @@
|
|
8262
8851
|
dispatch:
|
8263
8852
|
CPU: foreach_tensor_log10_slow_
|
8264
8853
|
CUDA: foreach_tensor_log10_cuda_
|
8854
|
+
autogen: _foreach_log10.functional, _foreach_log10.out
|
8265
8855
|
|
8266
8856
|
- func: _foreach_log1p(Tensor[] tensors) -> Tensor[]
|
8267
8857
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8276,6 +8866,7 @@
|
|
8276
8866
|
dispatch:
|
8277
8867
|
CPU: foreach_tensor_log1p_slow_
|
8278
8868
|
CUDA: foreach_tensor_log1p_cuda_
|
8869
|
+
autogen: _foreach_log1p.functional, _foreach_log1p.out
|
8279
8870
|
|
8280
8871
|
- func: _foreach_log2(Tensor[] tensors) -> Tensor[]
|
8281
8872
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8290,6 +8881,7 @@
|
|
8290
8881
|
dispatch:
|
8291
8882
|
CPU: foreach_tensor_log2_slow_
|
8292
8883
|
CUDA: foreach_tensor_log2_cuda_
|
8884
|
+
autogen: _foreach_log2.functional, _foreach_log2.out
|
8293
8885
|
|
8294
8886
|
- func: _foreach_neg(Tensor[] tensors) -> Tensor[]
|
8295
8887
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8304,6 +8896,7 @@
|
|
8304
8896
|
dispatch:
|
8305
8897
|
CPU: foreach_tensor_neg_slow_
|
8306
8898
|
CUDA: foreach_tensor_neg_cuda_
|
8899
|
+
autogen: _foreach_neg.functional, _foreach_neg.out
|
8307
8900
|
|
8308
8901
|
- func: _foreach_tan(Tensor[] tensors) -> Tensor[]
|
8309
8902
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8318,6 +8911,7 @@
|
|
8318
8911
|
dispatch:
|
8319
8912
|
CPU: foreach_tensor_tan_slow_
|
8320
8913
|
CUDA: foreach_tensor_tan_cuda_
|
8914
|
+
autogen: _foreach_tan.functional, _foreach_tan.out
|
8321
8915
|
|
8322
8916
|
- func: _foreach_tanh(Tensor[] tensors) -> Tensor[]
|
8323
8917
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8332,6 +8926,7 @@
|
|
8332
8926
|
dispatch:
|
8333
8927
|
CPU: foreach_tensor_tanh_slow_
|
8334
8928
|
CUDA: foreach_tensor_tanh_cuda_
|
8929
|
+
autogen: _foreach_tanh.functional, _foreach_tanh.out
|
8335
8930
|
|
8336
8931
|
- func: _foreach_sin(Tensor[] tensors) -> Tensor[]
|
8337
8932
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8346,6 +8941,7 @@
|
|
8346
8941
|
dispatch:
|
8347
8942
|
CPU: foreach_tensor_sin_slow_
|
8348
8943
|
CUDA: foreach_tensor_sin_cuda_
|
8944
|
+
autogen: _foreach_sin.functional, _foreach_sin.out
|
8349
8945
|
|
8350
8946
|
- func: _foreach_sinh(Tensor[] tensors) -> Tensor[]
|
8351
8947
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8360,6 +8956,7 @@
|
|
8360
8956
|
dispatch:
|
8361
8957
|
CPU: foreach_tensor_sinh_slow_
|
8362
8958
|
CUDA: foreach_tensor_sinh_cuda_
|
8959
|
+
autogen: _foreach_sinh.functional, _foreach_sinh.out
|
8363
8960
|
|
8364
8961
|
- func: _foreach_round(Tensor[] tensors) -> Tensor[]
|
8365
8962
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8374,6 +8971,7 @@
|
|
8374
8971
|
dispatch:
|
8375
8972
|
CPU: foreach_tensor_round_slow_
|
8376
8973
|
CUDA: foreach_tensor_round_cuda_
|
8974
|
+
autogen: _foreach_round.functional, _foreach_round.out
|
8377
8975
|
|
8378
8976
|
- func: _foreach_lgamma(Tensor[] tensors) -> Tensor[]
|
8379
8977
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8388,6 +8986,7 @@
|
|
8388
8986
|
dispatch:
|
8389
8987
|
CPU: foreach_tensor_lgamma_slow_
|
8390
8988
|
CUDA: foreach_tensor_lgamma_cuda_
|
8989
|
+
autogen: _foreach_lgamma.functional, _foreach_lgamma.out
|
8391
8990
|
|
8392
8991
|
- func: _foreach_frac(Tensor[] tensors) -> Tensor[]
|
8393
8992
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8402,6 +9001,7 @@
|
|
8402
9001
|
dispatch:
|
8403
9002
|
CPU: foreach_tensor_frac_slow_
|
8404
9003
|
CUDA: foreach_tensor_frac_cuda_
|
9004
|
+
autogen: _foreach_frac.functional, _foreach_frac.out
|
8405
9005
|
|
8406
9006
|
- func: _foreach_reciprocal(Tensor[] tensors) -> Tensor[]
|
8407
9007
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8416,6 +9016,7 @@
|
|
8416
9016
|
dispatch:
|
8417
9017
|
CPU: foreach_tensor_reciprocal_slow_
|
8418
9018
|
CUDA: foreach_tensor_reciprocal_cuda_
|
9019
|
+
autogen: _foreach_reciprocal.functional, _foreach_reciprocal.out
|
8419
9020
|
|
8420
9021
|
- func: _foreach_sigmoid(Tensor[] tensors) -> Tensor[]
|
8421
9022
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8430,6 +9031,7 @@
|
|
8430
9031
|
dispatch:
|
8431
9032
|
CPU: foreach_tensor_sigmoid_slow_
|
8432
9033
|
CUDA: foreach_tensor_sigmoid_cuda_
|
9034
|
+
autogen: _foreach_sigmoid.functional, _foreach_sigmoid.out
|
8433
9035
|
|
8434
9036
|
- func: _foreach_trunc(Tensor[] tensors) -> Tensor[]
|
8435
9037
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8444,6 +9046,7 @@
|
|
8444
9046
|
dispatch:
|
8445
9047
|
CPU: foreach_tensor_trunc_slow_
|
8446
9048
|
CUDA: foreach_tensor_trunc_cuda_
|
9049
|
+
autogen: _foreach_trunc.functional, _foreach_trunc.out
|
8447
9050
|
|
8448
9051
|
- func: _foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
8449
9052
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8451,6 +9054,7 @@
|
|
8451
9054
|
dispatch:
|
8452
9055
|
CPU: foreach_tensor_addcdiv_scalar_slow_
|
8453
9056
|
CUDA: foreach_tensor_addcdiv_scalar_cuda_
|
9057
|
+
autogen: _foreach_addcdiv.Scalar_functional, _foreach_addcdiv.Scalar_out
|
8454
9058
|
|
8455
9059
|
- func: _foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
8456
9060
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8458,6 +9062,7 @@
|
|
8458
9062
|
dispatch:
|
8459
9063
|
CPU: foreach_tensor_addcmul_scalar_slow_
|
8460
9064
|
CUDA: foreach_tensor_addcmul_scalar_cuda_
|
9065
|
+
autogen: _foreach_addcmul.Scalar_functional, _foreach_addcmul.Scalar_out
|
8461
9066
|
|
8462
9067
|
- func: _foreach_addcdiv_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
|
8463
9068
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8465,6 +9070,7 @@
|
|
8465
9070
|
dispatch:
|
8466
9071
|
CPU: foreach_tensor_addcdiv_scalarlist_slow_
|
8467
9072
|
CUDA: foreach_tensor_addcdiv_scalarlist_cuda_
|
9073
|
+
autogen: _foreach_addcdiv.ScalarList_functional, _foreach_addcdiv.ScalarList_out
|
8468
9074
|
|
8469
9075
|
- func: _foreach_addcmul_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
|
8470
9076
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8472,6 +9078,7 @@
|
|
8472
9078
|
dispatch:
|
8473
9079
|
CPU: foreach_tensor_addcmul_scalarlist_slow_
|
8474
9080
|
CUDA: foreach_tensor_addcmul_scalarlist_cuda_
|
9081
|
+
autogen: _foreach_addcmul.ScalarList_functional, _foreach_addcmul.ScalarList_out
|
8475
9082
|
|
8476
9083
|
- func: _foreach_addcdiv.Scalar(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
|
8477
9084
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -8584,25 +9191,29 @@
|
|
8584
9191
|
|
8585
9192
|
- func: mse_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
|
8586
9193
|
device_check: NoCheck # TensorIterator
|
9194
|
+
structured: True
|
9195
|
+
structured_inherits: TensorIteratorBase
|
8587
9196
|
python_module: nn
|
8588
9197
|
dispatch:
|
8589
9198
|
CPU, CUDA: mse_loss_out
|
9199
|
+
MPS: mse_loss_out_mps
|
8590
9200
|
|
8591
9201
|
- func: mse_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor
|
8592
9202
|
device_check: NoCheck # TensorIterator
|
9203
|
+
structured_delegate: mse_loss.out
|
8593
9204
|
python_module: nn
|
8594
|
-
dispatch:
|
8595
|
-
CPU, CUDA: mse_loss
|
8596
9205
|
|
8597
9206
|
- func: mse_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, *, Tensor(a!) grad_input) -> Tensor(a!)
|
8598
9207
|
python_module: nn
|
8599
9208
|
dispatch:
|
8600
9209
|
CPU, CUDA: mse_loss_backward_out
|
9210
|
+
MPS: mse_loss_backward_out_mps
|
8601
9211
|
|
8602
9212
|
- func: mse_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor
|
8603
9213
|
python_module: nn
|
8604
9214
|
dispatch:
|
8605
9215
|
CPU, CUDA: mse_loss_backward
|
9216
|
+
MPS: mse_loss_backward_mps
|
8606
9217
|
|
8607
9218
|
- func: l1_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
|
8608
9219
|
python_module: nn
|
@@ -8693,6 +9304,7 @@
|
|
8693
9304
|
dispatch:
|
8694
9305
|
CPU: nll_loss_forward_out_cpu
|
8695
9306
|
CUDA: nll_loss_forward_out_cuda
|
9307
|
+
MPS: nll_loss_forward_out_mps
|
8696
9308
|
|
8697
9309
|
- func: nll_loss_forward(Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index) -> (Tensor output, Tensor total_weight)
|
8698
9310
|
python_module: nn
|
@@ -8704,6 +9316,7 @@
|
|
8704
9316
|
dispatch:
|
8705
9317
|
CPU: nll_loss_backward_out_cpu
|
8706
9318
|
CUDA: nll_loss_backward_out_cuda
|
9319
|
+
MPS: nll_loss_backward_out_mps
|
8707
9320
|
|
8708
9321
|
- func: nll_loss_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, Tensor total_weight) -> Tensor
|
8709
9322
|
python_module: nn
|
@@ -8720,24 +9333,28 @@
|
|
8720
9333
|
dispatch:
|
8721
9334
|
CPU: nll_loss2d_forward_out_cpu
|
8722
9335
|
CUDA: nll_loss2d_forward_out_cuda
|
9336
|
+
MPS: nll_loss2d_forward_out_mps
|
8723
9337
|
|
8724
9338
|
- func: nll_loss2d_forward(Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index) -> (Tensor output, Tensor total_weight)
|
8725
9339
|
python_module: nn
|
8726
9340
|
dispatch:
|
8727
9341
|
CPU: nll_loss2d_forward_cpu
|
8728
9342
|
CUDA: nll_loss2d_forward_cuda
|
9343
|
+
MPS: nll_loss2d_forward_mps
|
8729
9344
|
|
8730
9345
|
- func: nll_loss2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, Tensor total_weight, *, Tensor(a!) grad_input) -> Tensor(a!)
|
8731
9346
|
python_module: nn
|
8732
9347
|
dispatch:
|
8733
9348
|
CPU: nll_loss2d_backward_out_cpu
|
8734
9349
|
CUDA: nll_loss2d_backward_out_cuda
|
9350
|
+
MPS: nll_loss2d_backward_out_mps
|
8735
9351
|
|
8736
9352
|
- func: nll_loss2d_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, Tensor total_weight) -> Tensor
|
8737
9353
|
python_module: nn
|
8738
9354
|
dispatch:
|
8739
9355
|
CPU: nll_loss2d_backward_cpu
|
8740
9356
|
CUDA: nll_loss2d_backward_cuda
|
9357
|
+
MPS: nll_loss2d_backward_mps
|
8741
9358
|
|
8742
9359
|
- func: smooth_l1_loss.out(Tensor self, Tensor target, int reduction=Mean, float beta=1.0, *, Tensor(a!) out) -> Tensor(a!)
|
8743
9360
|
device_check: NoCheck # TensorIterator
|
@@ -8746,6 +9363,7 @@
|
|
8746
9363
|
python_module: nn
|
8747
9364
|
dispatch:
|
8748
9365
|
CPU, CUDA: smooth_l1_loss_out
|
9366
|
+
MPS: smooth_l1_loss_out_mps
|
8749
9367
|
|
8750
9368
|
- func: smooth_l1_loss(Tensor self, Tensor target, int reduction=Mean, float beta=1.0) -> Tensor
|
8751
9369
|
device_check: NoCheck # TensorIterator
|
@@ -8757,6 +9375,7 @@
|
|
8757
9375
|
dispatch:
|
8758
9376
|
CPU: smooth_l1_loss_backward_out
|
8759
9377
|
CUDA: smooth_l1_loss_backward_out
|
9378
|
+
MPS: smooth_l1_loss_backward_out_mps
|
8760
9379
|
|
8761
9380
|
- func: smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta) -> Tensor
|
8762
9381
|
python_module: nn
|
@@ -8810,6 +9429,7 @@
|
|
8810
9429
|
python_module: nn
|
8811
9430
|
dispatch:
|
8812
9431
|
CPU, CUDA: elu_out
|
9432
|
+
MPS: elu_out_mps
|
8813
9433
|
|
8814
9434
|
- func: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor
|
8815
9435
|
structured_delegate: elu.out
|
@@ -8822,6 +9442,7 @@
|
|
8822
9442
|
python_module: nn
|
8823
9443
|
dispatch:
|
8824
9444
|
CPU, CUDA: elu_backward_out
|
9445
|
+
MPS: elu_backward_out_mps
|
8825
9446
|
|
8826
9447
|
- func: elu_backward(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result) -> Tensor
|
8827
9448
|
structured_delegate: elu_backward.grad_input
|
@@ -8858,6 +9479,16 @@
|
|
8858
9479
|
CPU: glu_backward_cpu
|
8859
9480
|
CUDA: glu_backward_cuda
|
8860
9481
|
|
9482
|
+
- func: glu_jvp(Tensor glu, Tensor x, Tensor dx, int dim) -> Tensor
|
9483
|
+
python_module: nn
|
9484
|
+
dispatch:
|
9485
|
+
CPU, CUDA: glu_jvp
|
9486
|
+
|
9487
|
+
- func: glu_backward_jvp(Tensor grad_x, Tensor grad_glu, Tensor x, Tensor dgrad_glu, Tensor dx, int dim) -> Tensor
|
9488
|
+
python_module: nn
|
9489
|
+
dispatch:
|
9490
|
+
CPU, CUDA: glu_backward_jvp
|
9491
|
+
|
8861
9492
|
- func: hardsigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
8862
9493
|
structured: True
|
8863
9494
|
structured_inherits: TensorIteratorBase
|
@@ -8894,31 +9525,33 @@
|
|
8894
9525
|
device_check: NoCheck # TensorIterator
|
8895
9526
|
python_module: nn
|
8896
9527
|
dispatch:
|
8897
|
-
CPU, CUDA: hardtanh_out
|
9528
|
+
CPU, CUDA, MPS: hardtanh_out
|
8898
9529
|
QuantizedCPU: hardtanh_out_quantized_cpu
|
8899
9530
|
|
8900
9531
|
- func: hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> Tensor
|
8901
9532
|
device_check: NoCheck # TensorIterator
|
8902
9533
|
python_module: nn
|
8903
9534
|
dispatch:
|
8904
|
-
CPU, CUDA: hardtanh
|
9535
|
+
CPU, CUDA, MPS: hardtanh
|
8905
9536
|
QuantizedCPU: hardtanh_quantized_cpu
|
8906
9537
|
|
8907
9538
|
- func: hardtanh_backward.grad_input(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val, *, Tensor(a!) grad_input) -> Tensor(a!)
|
8908
9539
|
python_module: nn
|
8909
9540
|
dispatch:
|
8910
9541
|
CPU, CUDA: hardtanh_backward_out
|
9542
|
+
MPS: hardtanh_backward_out_mps
|
8911
9543
|
|
8912
9544
|
- func: hardtanh_backward(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val) -> Tensor
|
8913
9545
|
python_module: nn
|
8914
9546
|
dispatch:
|
8915
9547
|
CPU, CUDA: hardtanh_backward
|
9548
|
+
MPS: hardtanh_backward_mps
|
8916
9549
|
|
8917
9550
|
- func: hardtanh_(Tensor(a!) self, Scalar min_val=-1, Scalar max_val=1) -> Tensor(a!)
|
8918
9551
|
device_check: NoCheck # TensorIterator
|
8919
9552
|
python_module: nn
|
8920
9553
|
dispatch:
|
8921
|
-
CPU, CUDA: hardtanh_
|
9554
|
+
CPU, CUDA, MPS: hardtanh_
|
8922
9555
|
QuantizedCPU: hardtanh_quantized_cpu_
|
8923
9556
|
|
8924
9557
|
- func: hardswish.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8951,6 +9584,7 @@
|
|
8951
9584
|
python_module: nn
|
8952
9585
|
dispatch:
|
8953
9586
|
CPU, CUDA: leaky_relu_out
|
9587
|
+
MPS: leaky_relu_out_mps
|
8954
9588
|
QuantizedCPU: leaky_relu_out_quantized_cpu
|
8955
9589
|
|
8956
9590
|
- func: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor
|
@@ -8966,6 +9600,7 @@
|
|
8966
9600
|
python_module: nn
|
8967
9601
|
dispatch:
|
8968
9602
|
CPU, CUDA: leaky_relu_backward_out
|
9603
|
+
MPS: leaky_relu_backward_out_mps
|
8969
9604
|
|
8970
9605
|
- func: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor
|
8971
9606
|
structured_delegate: leaky_relu_backward.grad_input
|
@@ -9088,6 +9723,7 @@
|
|
9088
9723
|
dispatch:
|
9089
9724
|
CPU: adaptive_avg_pool2d_out_cpu
|
9090
9725
|
CUDA: adaptive_avg_pool2d_out_cuda
|
9726
|
+
MPS: adaptive_avg_pool2d_out_mps
|
9091
9727
|
MkldnnCPU: mkldnn_adaptive_avg_pool2d_out
|
9092
9728
|
|
9093
9729
|
- func: adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor
|
@@ -9105,13 +9741,16 @@
|
|
9105
9741
|
dispatch:
|
9106
9742
|
CPU: adaptive_avg_pool2d_cpu
|
9107
9743
|
CUDA: adaptive_avg_pool2d_cuda
|
9744
|
+
MPS: adaptive_avg_pool2d_mps
|
9108
9745
|
QuantizedCPU: adaptive_avg_pool2d_quantized_cpu
|
9746
|
+
QuantizedCUDA: adaptive_avg_pool2d_quantized_cuda
|
9109
9747
|
|
9110
9748
|
- func: _adaptive_avg_pool2d_backward(Tensor grad_output, Tensor self) -> Tensor
|
9111
9749
|
python_module: nn
|
9112
9750
|
dispatch:
|
9113
9751
|
CPU: adaptive_avg_pool2d_backward_cpu
|
9114
9752
|
CUDA: adaptive_avg_pool2d_backward_cuda
|
9753
|
+
MPS: adaptive_avg_pool2d_backward_mps
|
9115
9754
|
|
9116
9755
|
- func: adaptive_avg_pool3d.out(Tensor self, int[3] output_size, *, Tensor(a!) out) -> Tensor(a!)
|
9117
9756
|
python_module: nn
|
@@ -9148,6 +9787,7 @@
|
|
9148
9787
|
dispatch:
|
9149
9788
|
CPU: adaptive_max_pool2d_out_cpu
|
9150
9789
|
CUDA: adaptive_max_pool2d_out_cuda
|
9790
|
+
MPS: adaptive_max_pool2d_out_mps
|
9151
9791
|
|
9152
9792
|
# Return: (Tensor output, Tensor indices)
|
9153
9793
|
- func: adaptive_max_pool2d(Tensor self, int[2] output_size) -> (Tensor, Tensor)
|
@@ -9160,6 +9800,7 @@
|
|
9160
9800
|
dispatch:
|
9161
9801
|
CPU: adaptive_max_pool2d_backward_out_cpu
|
9162
9802
|
CUDA: adaptive_max_pool2d_backward_out_cuda
|
9803
|
+
MPS: adaptive_max_pool2d_backward_out_mps
|
9163
9804
|
|
9164
9805
|
- func: adaptive_max_pool2d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor
|
9165
9806
|
python_module: nn
|
@@ -9199,6 +9840,7 @@
|
|
9199
9840
|
dispatch:
|
9200
9841
|
CPU: avg_pool2d_out_cpu
|
9201
9842
|
CUDA: avg_pool2d_out_cuda
|
9843
|
+
MPS: avg_pool2d_out_mps
|
9202
9844
|
MkldnnCPU: mkldnn_avg_pool2d_out
|
9203
9845
|
|
9204
9846
|
- func: avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor
|
@@ -9214,6 +9856,7 @@
|
|
9214
9856
|
dispatch:
|
9215
9857
|
CPU: avg_pool2d_backward_out_cpu
|
9216
9858
|
CUDA: avg_pool2d_backward_out_cuda
|
9859
|
+
MPS: avg_pool2d_backward_out_mps
|
9217
9860
|
MkldnnCPU: mkldnn_avg_pool2d_backward_out
|
9218
9861
|
|
9219
9862
|
- func: avg_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor
|
@@ -9282,6 +9925,7 @@
|
|
9282
9925
|
precomputed:
|
9283
9926
|
- kernel_size -> int poolSizeT, int poolSizeH, int poolSizeW
|
9284
9927
|
- output_size -> int outputT, int outputH, int outputW
|
9928
|
+
- int numBatch, int numPlanes, int inputT, int inputH, int inputW
|
9285
9929
|
dispatch:
|
9286
9930
|
CPU: fractional_max_pool3d_out_cpu
|
9287
9931
|
CUDA: fractional_max_pool3d_out_cuda
|
@@ -9310,6 +9954,7 @@
|
|
9310
9954
|
dispatch:
|
9311
9955
|
CPU: max_pool2d_with_indices_out_cpu
|
9312
9956
|
CUDA: max_pool2d_with_indices_out_cuda
|
9957
|
+
MPS: max_pool2d_with_indices_out_mps
|
9313
9958
|
|
9314
9959
|
# Return: (Tensor output, Tensor indices)
|
9315
9960
|
- func: max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
|
@@ -9322,6 +9967,7 @@
|
|
9322
9967
|
dispatch:
|
9323
9968
|
CPU: max_pool2d_with_indices_backward_out_cpu
|
9324
9969
|
CUDA: max_pool2d_with_indices_backward_out_cuda
|
9970
|
+
MPS: max_pool2d_with_indices_backward_out_mps
|
9325
9971
|
|
9326
9972
|
- func: max_pool2d_with_indices_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices) -> Tensor
|
9327
9973
|
python_module: nn
|
@@ -9365,18 +10011,6 @@
|
|
9365
10011
|
CPU: max_unpooling2d_forward_cpu
|
9366
10012
|
CUDA: max_unpooling2d_forward_cuda
|
9367
10013
|
|
9368
|
-
- func: max_unpool2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor indices, int[2] output_size, *, Tensor(a!) grad_input) -> Tensor(a!)
|
9369
|
-
python_module: nn
|
9370
|
-
dispatch:
|
9371
|
-
CPU: max_unpooling2d_backward_out_cpu
|
9372
|
-
CUDA: max_unpooling2d_backward_out_cuda
|
9373
|
-
|
9374
|
-
- func: max_unpool2d_backward(Tensor grad_output, Tensor self, Tensor indices, int[2] output_size) -> Tensor
|
9375
|
-
python_module: nn
|
9376
|
-
dispatch:
|
9377
|
-
CPU: max_unpooling2d_backward_cpu
|
9378
|
-
CUDA: max_unpooling2d_backward_cuda
|
9379
|
-
|
9380
10014
|
- func: max_unpool3d.out(Tensor self, Tensor indices, int[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) out) -> Tensor(a!)
|
9381
10015
|
python_module: nn
|
9382
10016
|
dispatch:
|
@@ -9389,30 +10023,18 @@
|
|
9389
10023
|
CPU: max_unpooling3d_forward_cpu
|
9390
10024
|
CUDA: max_unpooling3d_forward_cuda
|
9391
10025
|
|
9392
|
-
- func: max_unpool3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor indices, int[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
|
9393
|
-
python_module: nn
|
9394
|
-
dispatch:
|
9395
|
-
CPU: max_unpooling3d_backward_out_cpu
|
9396
|
-
CUDA: max_unpooling3d_backward_out_cuda
|
9397
|
-
|
9398
|
-
- func: max_unpool3d_backward(Tensor grad_output, Tensor self, Tensor indices, int[3] output_size, int[3] stride, int[3] padding) -> Tensor
|
9399
|
-
python_module: nn
|
9400
|
-
dispatch:
|
9401
|
-
CPU: max_unpooling3d_backward_cpu
|
9402
|
-
CUDA: max_unpooling3d_backward_cuda
|
9403
|
-
|
9404
10026
|
- func: reflection_pad1d.out(Tensor self, int[2] padding, *, Tensor(a!) out) -> Tensor(a!)
|
9405
10027
|
python_module: nn
|
9406
10028
|
structured: True
|
9407
10029
|
dispatch:
|
9408
|
-
CPU
|
10030
|
+
CPU: reflection_pad1d_out_cpu
|
10031
|
+
QuantizedCPU: reflection_pad1d_out_quantized_cpu
|
9409
10032
|
CUDA: reflection_pad1d_out_cuda
|
10033
|
+
MPS: reflection_pad1d_out_mps
|
9410
10034
|
|
9411
10035
|
- func: reflection_pad1d(Tensor self, int[2] padding) -> Tensor
|
9412
10036
|
python_module: nn
|
9413
10037
|
structured_delegate: reflection_pad1d.out
|
9414
|
-
dispatch:
|
9415
|
-
QuantizedCPU: reflection_pad1d_cpu
|
9416
10038
|
|
9417
10039
|
- func: reflection_pad1d_backward.grad_input(Tensor grad_output, Tensor self, int[2] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
|
9418
10040
|
python_module: nn
|
@@ -9420,6 +10042,7 @@
|
|
9420
10042
|
dispatch:
|
9421
10043
|
CPU: reflection_pad1d_backward_out_cpu
|
9422
10044
|
CUDA: reflection_pad1d_backward_out_cuda
|
10045
|
+
MPS: reflection_pad1d_backward_out_mps
|
9423
10046
|
|
9424
10047
|
- func: reflection_pad1d_backward(Tensor grad_output, Tensor self, int[2] padding) -> Tensor
|
9425
10048
|
python_module: nn
|
@@ -9430,24 +10053,29 @@
|
|
9430
10053
|
dispatch:
|
9431
10054
|
CPU, QuantizedCPU: reflection_pad2d_out_cpu
|
9432
10055
|
CUDA: reflection_pad2d_out_cuda
|
10056
|
+
MPS: reflection_pad2d_out_mps
|
9433
10057
|
|
9434
10058
|
- func: reflection_pad2d(Tensor self, int[4] padding) -> Tensor
|
9435
10059
|
python_module: nn
|
9436
10060
|
dispatch:
|
9437
|
-
CPU
|
10061
|
+
CPU: reflection_pad2d_cpu
|
10062
|
+
QuantizedCPU: reflection_pad2d_quantized_cpu
|
9438
10063
|
CUDA: reflection_pad2d_cuda
|
10064
|
+
MPS: reflection_pad2d_mps
|
9439
10065
|
|
9440
10066
|
- func: reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
|
9441
10067
|
python_module: nn
|
9442
10068
|
dispatch:
|
9443
10069
|
CPU: reflection_pad2d_backward_out_cpu
|
9444
10070
|
CUDA: reflection_pad2d_backward_out_cuda
|
10071
|
+
MPS: reflection_pad2d_backward_out_mps
|
9445
10072
|
|
9446
10073
|
- func: reflection_pad2d_backward(Tensor grad_output, Tensor self, int[4] padding) -> Tensor
|
9447
10074
|
python_module: nn
|
9448
10075
|
dispatch:
|
9449
10076
|
CPU: reflection_pad2d_backward_cpu
|
9450
10077
|
CUDA: reflection_pad2d_backward_cuda
|
10078
|
+
MPS: reflection_pad2d_backward_mps
|
9451
10079
|
|
9452
10080
|
- func: reflection_pad3d.out(Tensor self, int[6] padding, *, Tensor(a!) out) -> Tensor(a!)
|
9453
10081
|
python_module: nn
|
@@ -9455,6 +10083,7 @@
|
|
9455
10083
|
dispatch:
|
9456
10084
|
CPU: reflection_pad3d_out_cpu
|
9457
10085
|
CUDA: reflection_pad3d_out_cuda
|
10086
|
+
MPS: reflection_pad3d_out_mps
|
9458
10087
|
|
9459
10088
|
- func: reflection_pad3d(Tensor self, int[6] padding) -> Tensor
|
9460
10089
|
python_module: nn
|
@@ -9466,6 +10095,7 @@
|
|
9466
10095
|
dispatch:
|
9467
10096
|
CPU: reflection_pad3d_backward_out_cpu
|
9468
10097
|
CUDA: reflection_pad3d_backward_out_cuda
|
10098
|
+
MPS: reflection_pad3d_backward_out_mps
|
9469
10099
|
|
9470
10100
|
- func: reflection_pad3d_backward(Tensor grad_output, Tensor self, int[6] padding) -> Tensor
|
9471
10101
|
python_module: nn
|
@@ -9477,6 +10107,7 @@
|
|
9477
10107
|
dispatch:
|
9478
10108
|
CPU: replication_pad1d_out_cpu
|
9479
10109
|
CUDA: replication_pad1d_out_cuda
|
10110
|
+
MPS: replication_pad1d_out_mps
|
9480
10111
|
|
9481
10112
|
- func: replication_pad1d(Tensor self, int[2] padding) -> Tensor
|
9482
10113
|
python_module: nn
|
@@ -9488,6 +10119,7 @@
|
|
9488
10119
|
dispatch:
|
9489
10120
|
CPU: replication_pad1d_backward_out_cpu
|
9490
10121
|
CUDA: replication_pad1d_backward_out_cuda
|
10122
|
+
MPS: replication_pad1d_backward_out_mps
|
9491
10123
|
|
9492
10124
|
- func: replication_pad1d_backward(Tensor grad_output, Tensor self, int[2] padding) -> Tensor
|
9493
10125
|
python_module: nn
|
@@ -9499,6 +10131,7 @@
|
|
9499
10131
|
dispatch:
|
9500
10132
|
CPU: replication_pad2d_out_cpu
|
9501
10133
|
CUDA: replication_pad2d_out_cuda
|
10134
|
+
MPS: replication_pad2d_out_mps
|
9502
10135
|
|
9503
10136
|
- func: replication_pad2d(Tensor self, int[4] padding) -> Tensor
|
9504
10137
|
python_module: nn
|
@@ -9509,12 +10142,14 @@
|
|
9509
10142
|
dispatch:
|
9510
10143
|
CPU: replication_pad2d_backward_out_cpu
|
9511
10144
|
CUDA: replication_pad2d_backward_out_cuda
|
10145
|
+
MPS: replication_pad2d_backward_out_mps
|
9512
10146
|
|
9513
10147
|
- func: replication_pad2d_backward(Tensor grad_output, Tensor self, int[4] padding) -> Tensor
|
9514
10148
|
python_module: nn
|
9515
10149
|
dispatch:
|
9516
10150
|
CPU: replication_pad2d_backward_cpu
|
9517
10151
|
CUDA: replication_pad2d_backward_cuda
|
10152
|
+
MPS: replication_pad2d_backward_mps
|
9518
10153
|
|
9519
10154
|
- func: replication_pad3d.out(Tensor self, int[6] padding, *, Tensor(a!) out) -> Tensor(a!)
|
9520
10155
|
python_module: nn
|
@@ -9522,6 +10157,7 @@
|
|
9522
10157
|
dispatch:
|
9523
10158
|
CPU: replication_pad3d_out_cpu
|
9524
10159
|
CUDA: replication_pad3d_out_cuda
|
10160
|
+
MPS: replication_pad3d_out_mps
|
9525
10161
|
|
9526
10162
|
- func: replication_pad3d(Tensor self, int[6] padding) -> Tensor
|
9527
10163
|
python_module: nn
|
@@ -9532,12 +10168,23 @@
|
|
9532
10168
|
dispatch:
|
9533
10169
|
CPU: replication_pad3d_backward_out_cpu
|
9534
10170
|
CUDA: replication_pad3d_backward_out_cuda
|
10171
|
+
MPS: replication_pad3d_backward_out_mps
|
9535
10172
|
|
9536
10173
|
- func: replication_pad3d_backward(Tensor grad_output, Tensor self, int[6] padding) -> Tensor
|
9537
10174
|
python_module: nn
|
9538
10175
|
dispatch:
|
9539
10176
|
CPU: replication_pad3d_backward_cpu
|
9540
10177
|
CUDA: replication_pad3d_backward_cuda
|
10178
|
+
MPS: replication_pad3d_backward_mps
|
10179
|
+
|
10180
|
+
- func: _pad_circular(Tensor self, int[] pad) -> Tensor
|
10181
|
+
python_module: nn
|
10182
|
+
|
10183
|
+
- func: _pad_enum(Tensor self, int[] pad, int mode, float? value=None) -> Tensor
|
10184
|
+
python_module: nn
|
10185
|
+
|
10186
|
+
- func: pad(Tensor self, int[] pad, str mode="constant", float? value=None) -> Tensor
|
10187
|
+
python_module: nn
|
9541
10188
|
|
9542
10189
|
- func: upsample_linear1d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
|
9543
10190
|
python_module: nn
|
@@ -9694,6 +10341,7 @@
|
|
9694
10341
|
dispatch:
|
9695
10342
|
CPU: upsample_bilinear2d_out_cpu
|
9696
10343
|
CUDA: upsample_bilinear2d_out_cuda
|
10344
|
+
MPS: upsample_bilinear2d_out_mps
|
9697
10345
|
|
9698
10346
|
- func: upsample_bilinear2d(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
|
9699
10347
|
python_module: nn
|
@@ -9707,6 +10355,7 @@
|
|
9707
10355
|
dispatch:
|
9708
10356
|
CPU: upsample_bilinear2d_backward_out_cpu
|
9709
10357
|
CUDA: upsample_bilinear2d_backward_out_cuda
|
10358
|
+
MPS: upsample_bilinear2d_backward_out_mps
|
9710
10359
|
|
9711
10360
|
- func: upsample_bilinear2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
|
9712
10361
|
python_module: nn
|
@@ -9850,6 +10499,7 @@
|
|
9850
10499
|
dispatch:
|
9851
10500
|
CPU: upsample_nearest2d_out_cpu
|
9852
10501
|
CUDA: upsample_nearest2d_out_cuda
|
10502
|
+
MPS: upsample_nearest2d_out_mps
|
9853
10503
|
|
9854
10504
|
- func: _upsample_nearest_exact2d.out(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
|
9855
10505
|
python_module: nn
|
@@ -9857,6 +10507,7 @@
|
|
9857
10507
|
dispatch:
|
9858
10508
|
CPU: _upsample_nearest_exact2d_out_cpu
|
9859
10509
|
CUDA: _upsample_nearest_exact2d_out_cuda
|
10510
|
+
MPS: _upsample_nearest_exact2d_out_mps
|
9860
10511
|
|
9861
10512
|
- func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
|
9862
10513
|
python_module: nn
|
@@ -9876,6 +10527,7 @@
|
|
9876
10527
|
dispatch:
|
9877
10528
|
CPU: upsample_nearest2d_backward_out_cpu
|
9878
10529
|
CUDA: upsample_nearest2d_backward_out_cuda
|
10530
|
+
MPS: upsample_nearest2d_backward_out_mps
|
9879
10531
|
|
9880
10532
|
- func: _upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
|
9881
10533
|
python_module: nn
|
@@ -9883,6 +10535,7 @@
|
|
9883
10535
|
dispatch:
|
9884
10536
|
CPU: _upsample_nearest_exact2d_backward_out_cpu
|
9885
10537
|
CUDA: _upsample_nearest_exact2d_backward_out_cuda
|
10538
|
+
MPS: _upsample_nearest_exact2d_backward_out_mps
|
9886
10539
|
|
9887
10540
|
- func: upsample_nearest2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
|
9888
10541
|
python_module: nn
|
@@ -9946,6 +10599,7 @@
|
|
9946
10599
|
structured_inherits: TensorIteratorBase
|
9947
10600
|
dispatch:
|
9948
10601
|
CPU, CUDA: sigmoid_backward_out
|
10602
|
+
MPS: sigmoid_backward_out_mps
|
9949
10603
|
|
9950
10604
|
- func: sigmoid_backward(Tensor grad_output, Tensor output) -> Tensor
|
9951
10605
|
python_module: nn
|
@@ -9968,6 +10622,7 @@
|
|
9968
10622
|
structured_inherits: TensorIteratorBase
|
9969
10623
|
dispatch:
|
9970
10624
|
CPU, CUDA: tanh_backward_out
|
10625
|
+
MPS: tanh_backward_out_mps
|
9971
10626
|
|
9972
10627
|
- func: tanh_backward(Tensor grad_output, Tensor output) -> Tensor
|
9973
10628
|
python_module: nn
|
@@ -10233,6 +10888,19 @@
|
|
10233
10888
|
dispatch:
|
10234
10889
|
CPU, CUDA: special_ndtri_out
|
10235
10890
|
|
10891
|
+
- func: special_log_ndtr(Tensor self) -> Tensor
|
10892
|
+
structured_delegate: special_log_ndtr.out
|
10893
|
+
python_module: special
|
10894
|
+
variants: function
|
10895
|
+
|
10896
|
+
- func: special_log_ndtr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
10897
|
+
structured: True
|
10898
|
+
structured_inherits: TensorIteratorBase
|
10899
|
+
python_module: special
|
10900
|
+
variants: function
|
10901
|
+
dispatch:
|
10902
|
+
CPU, CUDA: special_log_ndtr_out
|
10903
|
+
|
10236
10904
|
- func: special_expm1(Tensor self) -> Tensor
|
10237
10905
|
python_module: special
|
10238
10906
|
variants: function
|
@@ -10486,7 +11154,7 @@
|
|
10486
11154
|
|
10487
11155
|
- func: special_polygamma(int n, Tensor self) -> Tensor
|
10488
11156
|
python_module: special
|
10489
|
-
variants: function
|
11157
|
+
variants: function
|
10490
11158
|
|
10491
11159
|
- func: special_polygamma.out(int n, Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
10492
11160
|
python_module: special
|
@@ -10782,11 +11450,15 @@
|
|
10782
11450
|
- func: linalg_cross(Tensor self, Tensor other, *, int dim=-1) -> Tensor
|
10783
11451
|
python_module: linalg
|
10784
11452
|
variants: function
|
11453
|
+
structured_delegate: linalg_cross.out
|
10785
11454
|
dispatch:
|
10786
|
-
|
11455
|
+
ZeroTensor: linalg_cross_zerotensor
|
10787
11456
|
|
10788
11457
|
- func: linalg_cross.out(Tensor self, Tensor other, *, int dim=-1, Tensor(a!) out) -> Tensor(a!)
|
10789
11458
|
python_module: linalg
|
11459
|
+
structured: True
|
11460
|
+
precomputed:
|
11461
|
+
- dim -> int dim
|
10790
11462
|
dispatch:
|
10791
11463
|
CPU, CUDA: linalg_cross_out
|
10792
11464
|
|
@@ -10811,6 +11483,20 @@
|
|
10811
11483
|
dispatch:
|
10812
11484
|
CPU, CUDA: linalg_lu_factor_ex_out
|
10813
11485
|
|
11486
|
+
# linalg.lu
|
11487
|
+
- func: linalg_lu(Tensor A, *, bool pivot=True) -> (Tensor P, Tensor L, Tensor U)
|
11488
|
+
python_module: linalg
|
11489
|
+
structured_delegate: linalg_lu.out
|
11490
|
+
variants: function
|
11491
|
+
|
11492
|
+
- func: linalg_lu.out(Tensor A, *, bool pivot=True, Tensor(a!) P, Tensor(b!) L, Tensor(c!) U) -> (Tensor(a!) P, Tensor(b!) L, Tensor(c!) U)
|
11493
|
+
python_module: linalg
|
11494
|
+
variants: function
|
11495
|
+
structured: True
|
11496
|
+
dispatch:
|
11497
|
+
CPU, CUDA: linalg_lu_out
|
11498
|
+
|
11499
|
+
# linalg.det
|
10814
11500
|
- func: linalg_det(Tensor self) -> Tensor
|
10815
11501
|
python_module: linalg
|
10816
11502
|
variants: function
|
@@ -10832,6 +11518,38 @@
|
|
10832
11518
|
dispatch:
|
10833
11519
|
CPU, CUDA: _det_lu_based_helper_backward_helper
|
10834
11520
|
|
11521
|
+
- func: linalg_ldl_factor_ex(Tensor self, *, bool hermitian=False, bool check_errors=False) -> (Tensor LD, Tensor pivots, Tensor info)
|
11522
|
+
structured_delegate: linalg_ldl_factor_ex.out
|
11523
|
+
python_module: linalg
|
11524
|
+
variants: function
|
11525
|
+
|
11526
|
+
- func: linalg_ldl_factor_ex.out(Tensor self, *, bool hermitian=False, bool check_errors=False, Tensor(a!) LD, Tensor(b!) pivots, Tensor(c!) info) -> (Tensor(a!) LD, Tensor(b!) pivots, Tensor(c!) info)
|
11527
|
+
structured: True
|
11528
|
+
python_module: linalg
|
11529
|
+
variants: function
|
11530
|
+
dispatch:
|
11531
|
+
CPU, CUDA: linalg_ldl_factor_ex_out
|
11532
|
+
|
11533
|
+
- func: linalg_ldl_factor(Tensor self, *, bool hermitian=False) -> (Tensor LD, Tensor pivots)
|
11534
|
+
python_module: linalg
|
11535
|
+
variants: function
|
11536
|
+
|
11537
|
+
- func: linalg_ldl_factor.out(Tensor self, *, bool hermitian=False, Tensor(a!) LD, Tensor(b!) pivots) -> (Tensor(a!) LD, Tensor(b!) pivots)
|
11538
|
+
python_module: linalg
|
11539
|
+
variants: function
|
11540
|
+
|
11541
|
+
- func: linalg_ldl_solve(Tensor LD, Tensor pivots, Tensor B, *, bool hermitian=False) -> Tensor
|
11542
|
+
structured_delegate: linalg_ldl_solve.out
|
11543
|
+
python_module: linalg
|
11544
|
+
variants: function
|
11545
|
+
|
11546
|
+
- func: linalg_ldl_solve.out(Tensor LD, Tensor pivots, Tensor B, *, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
|
11547
|
+
structured: True
|
11548
|
+
python_module: linalg
|
11549
|
+
variants: function
|
11550
|
+
dispatch:
|
11551
|
+
CPU, CUDA: linalg_ldl_solve_out
|
11552
|
+
|
10835
11553
|
- func: linalg_lstsq(Tensor self, Tensor b, float? rcond=None, *, str? driver=None) -> (Tensor solution, Tensor residuals, Tensor rank, Tensor singular_values)
|
10836
11554
|
python_module: linalg
|
10837
11555
|
variants: function
|
@@ -10901,7 +11619,7 @@
|
|
10901
11619
|
python_module: linalg
|
10902
11620
|
variants: function
|
10903
11621
|
|
10904
|
-
- func: linalg_eigvalsh.out(Tensor self, str UPLO=
|
11622
|
+
- func: linalg_eigvalsh.out(Tensor self, str UPLO="L", *, Tensor(a!) out) -> Tensor(a!)
|
10905
11623
|
python_module: linalg
|
10906
11624
|
dispatch:
|
10907
11625
|
CPU, CUDA: linalg_eigvalsh_out
|
@@ -10922,6 +11640,7 @@
|
|
10922
11640
|
dispatch:
|
10923
11641
|
CPU: _linalg_inv_out_helper_cpu
|
10924
11642
|
CUDA: _linalg_inv_out_helper_cuda
|
11643
|
+
autogen: _linalg_inv_out_helper.functional, _linalg_inv_out_helper.out
|
10925
11644
|
|
10926
11645
|
- func: linalg_inv_ex(Tensor self, *, bool check_errors=False) -> (Tensor inverse, Tensor info)
|
10927
11646
|
python_module: linalg
|
@@ -10978,11 +11697,11 @@
|
|
10978
11697
|
- func: linalg_vector_norm(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
10979
11698
|
python_module: linalg
|
10980
11699
|
variants: function
|
10981
|
-
|
10982
|
-
CPU, CUDA: linalg_vector_norm
|
11700
|
+
structured_delegate: linalg_vector_norm.out
|
10983
11701
|
|
10984
11702
|
- func: linalg_vector_norm.out(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
|
10985
11703
|
python_module: linalg
|
11704
|
+
structured: True
|
10986
11705
|
dispatch:
|
10987
11706
|
CPU, CUDA: linalg_vector_norm_out
|
10988
11707
|
|
@@ -11106,13 +11825,13 @@
|
|
11106
11825
|
python_module: linalg
|
11107
11826
|
variants: function
|
11108
11827
|
|
11109
|
-
- func: linalg_qr(Tensor
|
11828
|
+
- func: linalg_qr(Tensor A, str mode='reduced') -> (Tensor Q, Tensor R)
|
11110
11829
|
python_module: linalg
|
11111
11830
|
variants: function
|
11112
11831
|
dispatch:
|
11113
11832
|
CompositeExplicitAutograd: linalg_qr
|
11114
11833
|
|
11115
|
-
- func: linalg_qr.out(Tensor
|
11834
|
+
- func: linalg_qr.out(Tensor A, str mode='reduced', *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R)
|
11116
11835
|
python_module: linalg
|
11117
11836
|
variants: function
|
11118
11837
|
dispatch:
|
@@ -11232,3 +11951,447 @@
|
|
11232
11951
|
- func: unflatten_dense_tensors(Tensor flat, Tensor[] tensors) -> Tensor[]
|
11233
11952
|
variants: function
|
11234
11953
|
python_module: nn
|
11954
|
+
|
11955
|
+
- func: nested_tensor(Tensor[] list, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
11956
|
+
variants: function
|
11957
|
+
|
11958
|
+
- func: _fw_primal_copy(Tensor self, int level) -> Tensor
|
11959
|
+
variants: function
|
11960
|
+
dispatch:
|
11961
|
+
CompositeExplicitAutograd: _fw_primal_copy
|
11962
|
+
tags: view_copy
|
11963
|
+
|
11964
|
+
- func: _make_dual_copy(Tensor primal, Tensor tangent, int level) -> Tensor
|
11965
|
+
variants: function
|
11966
|
+
dispatch:
|
11967
|
+
CompositeExplicitAutograd: _make_dual_copy
|
11968
|
+
tags: view_copy
|
11969
|
+
|
11970
|
+
- func: view_as_real_copy(Tensor self) -> Tensor
|
11971
|
+
variants: function
|
11972
|
+
dispatch:
|
11973
|
+
CompositeExplicitAutograd: view_as_real_copy
|
11974
|
+
tags: view_copy
|
11975
|
+
|
11976
|
+
- func: view_as_complex_copy(Tensor self) -> Tensor
|
11977
|
+
variants: function
|
11978
|
+
dispatch:
|
11979
|
+
CompositeExplicitAutograd: view_as_complex_copy
|
11980
|
+
tags: view_copy
|
11981
|
+
|
11982
|
+
- func: _conj_copy(Tensor self) -> Tensor
|
11983
|
+
variants: function
|
11984
|
+
dispatch:
|
11985
|
+
CompositeExplicitAutograd: _conj_copy
|
11986
|
+
tags: view_copy
|
11987
|
+
|
11988
|
+
- func: _neg_view_copy(Tensor self) -> Tensor
|
11989
|
+
variants: function
|
11990
|
+
dispatch:
|
11991
|
+
CompositeExplicitAutograd: _neg_view_copy
|
11992
|
+
tags: view_copy
|
11993
|
+
|
11994
|
+
- func: as_strided_copy(Tensor self, int[] size, int[] stride, int? storage_offset=None) -> Tensor
|
11995
|
+
variants: function
|
11996
|
+
dispatch:
|
11997
|
+
CompositeExplicitAutograd: as_strided_copy
|
11998
|
+
tags: view_copy
|
11999
|
+
|
12000
|
+
- func: _sparse_broadcast_to_copy(Tensor self, int[] size) -> Tensor
|
12001
|
+
variants: function
|
12002
|
+
dispatch:
|
12003
|
+
CompositeExplicitAutograd: _sparse_broadcast_to_copy
|
12004
|
+
tags: view_copy
|
12005
|
+
|
12006
|
+
- func: diagonal_copy(Tensor self, int offset=0, int dim1=0, int dim2=1) -> Tensor
|
12007
|
+
variants: function
|
12008
|
+
dispatch:
|
12009
|
+
CompositeExplicitAutograd: diagonal_copy
|
12010
|
+
tags: view_copy
|
12011
|
+
|
12012
|
+
- func: expand_copy(Tensor self, int[] size, *, bool implicit=False) -> Tensor
|
12013
|
+
variants: function
|
12014
|
+
dispatch:
|
12015
|
+
CompositeExplicitAutograd: expand_copy
|
12016
|
+
tags: view_copy
|
12017
|
+
|
12018
|
+
- func: expand_copy.SymInt(Tensor self, SymInt[] size, *, bool implicit=False) -> Tensor
|
12019
|
+
variants: function
|
12020
|
+
dispatch:
|
12021
|
+
CompositeExplicitAutograd: expand_copy_SymInt
|
12022
|
+
tags: view_copy
|
12023
|
+
|
12024
|
+
- func: permute_copy(Tensor self, int[] dims) -> Tensor
|
12025
|
+
variants: function
|
12026
|
+
dispatch:
|
12027
|
+
CompositeExplicitAutograd: permute_copy
|
12028
|
+
tags: view_copy
|
12029
|
+
|
12030
|
+
- func: _reshape_alias_copy(Tensor self, int[] size, int[] stride) -> Tensor
|
12031
|
+
variants: function
|
12032
|
+
dispatch:
|
12033
|
+
CompositeExplicitAutograd: _reshape_alias_copy
|
12034
|
+
tags: view_copy
|
12035
|
+
|
12036
|
+
- func: select_copy.int(Tensor self, int dim, int index) -> Tensor
|
12037
|
+
variants: function
|
12038
|
+
dispatch:
|
12039
|
+
CompositeExplicitAutograd: select_copy_int
|
12040
|
+
tags: view_copy
|
12041
|
+
|
12042
|
+
- func: detach_copy(Tensor self) -> Tensor
|
12043
|
+
variants: function
|
12044
|
+
dispatch:
|
12045
|
+
CompositeExplicitAutograd: detach_copy
|
12046
|
+
tags: view_copy
|
12047
|
+
|
12048
|
+
- func: slice_copy.Tensor(Tensor self, int dim=0, int? start=None, int? end=None, int step=1) -> Tensor
|
12049
|
+
variants: function
|
12050
|
+
dispatch:
|
12051
|
+
CompositeExplicitAutograd: slice_copy_Tensor
|
12052
|
+
tags: view_copy
|
12053
|
+
|
12054
|
+
- func: split_copy.Tensor(Tensor self, int split_size, int dim=0) -> Tensor[]
|
12055
|
+
variants: function
|
12056
|
+
dispatch:
|
12057
|
+
CompositeExplicitAutograd: split_copy_Tensor
|
12058
|
+
tags: view_copy
|
12059
|
+
|
12060
|
+
- func: split_with_sizes_copy(Tensor self, int[] split_sizes, int dim=0) -> Tensor[]
|
12061
|
+
variants: function
|
12062
|
+
dispatch:
|
12063
|
+
CompositeExplicitAutograd: split_with_sizes_copy
|
12064
|
+
tags: view_copy
|
12065
|
+
|
12066
|
+
- func: squeeze_copy(Tensor self) -> Tensor
|
12067
|
+
variants: function
|
12068
|
+
dispatch:
|
12069
|
+
CompositeExplicitAutograd: squeeze_copy
|
12070
|
+
tags: view_copy
|
12071
|
+
|
12072
|
+
- func: squeeze_copy.dim(Tensor self, int dim) -> Tensor
|
12073
|
+
variants: function
|
12074
|
+
dispatch:
|
12075
|
+
CompositeExplicitAutograd: squeeze_copy_dim
|
12076
|
+
tags: view_copy
|
12077
|
+
|
12078
|
+
- func: t_copy(Tensor self) -> Tensor
|
12079
|
+
variants: function
|
12080
|
+
dispatch:
|
12081
|
+
CompositeExplicitAutograd: t_copy
|
12082
|
+
tags: view_copy
|
12083
|
+
|
12084
|
+
- func: transpose_copy.int(Tensor self, int dim0, int dim1) -> Tensor
|
12085
|
+
variants: function
|
12086
|
+
dispatch:
|
12087
|
+
CompositeExplicitAutograd: transpose_copy_int
|
12088
|
+
tags: view_copy
|
12089
|
+
|
12090
|
+
- func: unsqueeze_copy(Tensor self, int dim) -> Tensor
|
12091
|
+
variants: function
|
12092
|
+
dispatch:
|
12093
|
+
CompositeExplicitAutograd: unsqueeze_copy
|
12094
|
+
tags: view_copy
|
12095
|
+
|
12096
|
+
- func: _indices_copy(Tensor self) -> Tensor
|
12097
|
+
variants: function
|
12098
|
+
dispatch:
|
12099
|
+
CompositeExplicitAutograd: _indices_copy
|
12100
|
+
tags: view_copy
|
12101
|
+
|
12102
|
+
- func: _values_copy(Tensor self) -> Tensor
|
12103
|
+
variants: function
|
12104
|
+
dispatch:
|
12105
|
+
CompositeExplicitAutograd: _values_copy
|
12106
|
+
tags: view_copy
|
12107
|
+
|
12108
|
+
- func: indices_copy(Tensor self) -> Tensor
|
12109
|
+
variants: function
|
12110
|
+
dispatch:
|
12111
|
+
CompositeExplicitAutograd: indices_copy
|
12112
|
+
tags: view_copy
|
12113
|
+
|
12114
|
+
- func: values_copy(Tensor self) -> Tensor
|
12115
|
+
variants: function
|
12116
|
+
dispatch:
|
12117
|
+
CompositeExplicitAutograd: values_copy
|
12118
|
+
tags: view_copy
|
12119
|
+
|
12120
|
+
- func: crow_indices_copy(Tensor self) -> Tensor
|
12121
|
+
variants: function
|
12122
|
+
dispatch:
|
12123
|
+
CompositeExplicitAutograd: crow_indices_copy
|
12124
|
+
tags: view_copy
|
12125
|
+
|
12126
|
+
- func: col_indices_copy(Tensor self) -> Tensor
|
12127
|
+
variants: function
|
12128
|
+
dispatch:
|
12129
|
+
CompositeExplicitAutograd: col_indices_copy
|
12130
|
+
tags: view_copy
|
12131
|
+
|
12132
|
+
- func: ccol_indices_copy(Tensor self) -> Tensor
|
12133
|
+
variants: function
|
12134
|
+
dispatch:
|
12135
|
+
CompositeExplicitAutograd: ccol_indices_copy
|
12136
|
+
tags: view_copy
|
12137
|
+
|
12138
|
+
- func: row_indices_copy(Tensor self) -> Tensor
|
12139
|
+
variants: function
|
12140
|
+
dispatch:
|
12141
|
+
CompositeExplicitAutograd: row_indices_copy
|
12142
|
+
tags: view_copy
|
12143
|
+
|
12144
|
+
- func: unbind_copy.int(Tensor self, int dim=0) -> Tensor[]
|
12145
|
+
variants: function
|
12146
|
+
dispatch:
|
12147
|
+
CompositeExplicitAutograd: unbind_copy_int
|
12148
|
+
tags: view_copy
|
12149
|
+
|
12150
|
+
- func: view_copy(Tensor self, int[] size) -> Tensor
|
12151
|
+
variants: function
|
12152
|
+
dispatch:
|
12153
|
+
CompositeExplicitAutograd: view_copy
|
12154
|
+
tags: view_copy
|
12155
|
+
|
12156
|
+
- func: view_copy.dtype(Tensor self, ScalarType dtype) -> Tensor
|
12157
|
+
variants: function
|
12158
|
+
dispatch:
|
12159
|
+
CompositeExplicitAutograd: view_copy_dtype
|
12160
|
+
tags: view_copy
|
12161
|
+
|
12162
|
+
- func: unfold_copy(Tensor self, int dimension, int size, int step) -> Tensor
|
12163
|
+
variants: function
|
12164
|
+
dispatch:
|
12165
|
+
CompositeExplicitAutograd: unfold_copy
|
12166
|
+
tags: view_copy
|
12167
|
+
|
12168
|
+
- func: alias_copy(Tensor self) -> Tensor
|
12169
|
+
variants: function
|
12170
|
+
dispatch:
|
12171
|
+
CompositeExplicitAutograd: alias_copy
|
12172
|
+
tags: view_copy
|
12173
|
+
|
12174
|
+
- func: _fw_primal_copy.out(Tensor self, int level, *, Tensor(a!) out) -> Tensor(a!)
|
12175
|
+
variants: function
|
12176
|
+
dispatch:
|
12177
|
+
CompositeExplicitAutograd: _fw_primal_copy_out
|
12178
|
+
|
12179
|
+
|
12180
|
+
- func: _make_dual_copy.out(Tensor primal, Tensor tangent, int level, *, Tensor(a!) out) -> Tensor(a!)
|
12181
|
+
variants: function
|
12182
|
+
dispatch:
|
12183
|
+
CompositeExplicitAutograd: _make_dual_copy_out
|
12184
|
+
|
12185
|
+
|
12186
|
+
- func: view_as_real_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12187
|
+
variants: function
|
12188
|
+
dispatch:
|
12189
|
+
CompositeExplicitAutograd: view_as_real_copy_out
|
12190
|
+
|
12191
|
+
|
12192
|
+
- func: view_as_complex_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12193
|
+
variants: function
|
12194
|
+
dispatch:
|
12195
|
+
CompositeExplicitAutograd: view_as_complex_copy_out
|
12196
|
+
|
12197
|
+
|
12198
|
+
- func: _conj_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12199
|
+
variants: function
|
12200
|
+
dispatch:
|
12201
|
+
CompositeExplicitAutograd: _conj_copy_out
|
12202
|
+
|
12203
|
+
|
12204
|
+
- func: _neg_view_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12205
|
+
variants: function
|
12206
|
+
dispatch:
|
12207
|
+
CompositeExplicitAutograd: _neg_view_copy_out
|
12208
|
+
|
12209
|
+
|
12210
|
+
- func: as_strided_copy.out(Tensor self, int[] size, int[] stride, int? storage_offset=None, *, Tensor(a!) out) -> Tensor(a!)
|
12211
|
+
variants: function
|
12212
|
+
dispatch:
|
12213
|
+
CompositeExplicitAutograd: as_strided_copy_out
|
12214
|
+
|
12215
|
+
|
12216
|
+
- func: _sparse_broadcast_to_copy.out(Tensor self, int[] size, *, Tensor(a!) out) -> Tensor(a!)
|
12217
|
+
variants: function
|
12218
|
+
dispatch:
|
12219
|
+
CompositeExplicitAutograd: _sparse_broadcast_to_copy_out
|
12220
|
+
|
12221
|
+
|
12222
|
+
- func: diagonal_copy.out(Tensor self, int offset=0, int dim1=0, int dim2=1, *, Tensor(a!) out) -> Tensor(a!)
|
12223
|
+
variants: function
|
12224
|
+
dispatch:
|
12225
|
+
CompositeExplicitAutograd: diagonal_copy_out
|
12226
|
+
|
12227
|
+
|
12228
|
+
- func: expand_copy.SymInt_out(Tensor self, SymInt[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!)
|
12229
|
+
variants: function
|
12230
|
+
dispatch:
|
12231
|
+
CompositeExplicitAutograd: expand_copy_SymInt_out
|
12232
|
+
|
12233
|
+
|
12234
|
+
- func: expand_copy.out(Tensor self, int[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!)
|
12235
|
+
variants: function
|
12236
|
+
dispatch:
|
12237
|
+
CompositeExplicitAutograd: expand_copy_out
|
12238
|
+
|
12239
|
+
|
12240
|
+
- func: permute_copy.out(Tensor self, int[] dims, *, Tensor(a!) out) -> Tensor(a!)
|
12241
|
+
variants: function
|
12242
|
+
dispatch:
|
12243
|
+
CompositeExplicitAutograd: permute_copy_out
|
12244
|
+
|
12245
|
+
|
12246
|
+
- func: _reshape_alias_copy.out(Tensor self, int[] size, int[] stride, *, Tensor(a!) out) -> Tensor(a!)
|
12247
|
+
variants: function
|
12248
|
+
dispatch:
|
12249
|
+
CompositeExplicitAutograd: _reshape_alias_copy_out
|
12250
|
+
|
12251
|
+
|
12252
|
+
- func: select_copy.int_out(Tensor self, int dim, int index, *, Tensor(a!) out) -> Tensor(a!)
|
12253
|
+
variants: function
|
12254
|
+
dispatch:
|
12255
|
+
CompositeExplicitAutograd: select_copy_int_out
|
12256
|
+
|
12257
|
+
|
12258
|
+
- func: detach_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12259
|
+
variants: function
|
12260
|
+
dispatch:
|
12261
|
+
CompositeExplicitAutograd: detach_copy_out
|
12262
|
+
|
12263
|
+
|
12264
|
+
- func: slice_copy.Tensor_out(Tensor self, int dim=0, int? start=None, int? end=None, int step=1, *, Tensor(a!) out) -> Tensor(a!)
|
12265
|
+
variants: function
|
12266
|
+
dispatch:
|
12267
|
+
CompositeExplicitAutograd: slice_copy_Tensor_out
|
12268
|
+
|
12269
|
+
|
12270
|
+
- func: split_copy.Tensor_out(Tensor self, int split_size, int dim=0, *, Tensor(a!)[] out) -> ()
|
12271
|
+
variants: function
|
12272
|
+
dispatch:
|
12273
|
+
CompositeExplicitAutograd: split_copy_Tensor_out
|
12274
|
+
|
12275
|
+
|
12276
|
+
- func: split_with_sizes_copy.out(Tensor self, int[] split_sizes, int dim=0, *, Tensor(a!)[] out) -> ()
|
12277
|
+
variants: function
|
12278
|
+
dispatch:
|
12279
|
+
CompositeExplicitAutograd: split_with_sizes_copy_out
|
12280
|
+
|
12281
|
+
|
12282
|
+
- func: squeeze_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12283
|
+
variants: function
|
12284
|
+
dispatch:
|
12285
|
+
CompositeExplicitAutograd: squeeze_copy_out
|
12286
|
+
|
12287
|
+
|
12288
|
+
- func: squeeze_copy.dim_out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
|
12289
|
+
variants: function
|
12290
|
+
dispatch:
|
12291
|
+
CompositeExplicitAutograd: squeeze_copy_dim_out
|
12292
|
+
|
12293
|
+
|
12294
|
+
- func: t_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12295
|
+
variants: function
|
12296
|
+
dispatch:
|
12297
|
+
CompositeExplicitAutograd: t_copy_out
|
12298
|
+
|
12299
|
+
|
12300
|
+
- func: transpose_copy.int_out(Tensor self, int dim0, int dim1, *, Tensor(a!) out) -> Tensor(a!)
|
12301
|
+
variants: function
|
12302
|
+
dispatch:
|
12303
|
+
CompositeExplicitAutograd: transpose_copy_int_out
|
12304
|
+
|
12305
|
+
|
12306
|
+
- func: unsqueeze_copy.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
|
12307
|
+
variants: function
|
12308
|
+
dispatch:
|
12309
|
+
CompositeExplicitAutograd: unsqueeze_copy_out
|
12310
|
+
|
12311
|
+
|
12312
|
+
- func: _indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12313
|
+
variants: function
|
12314
|
+
dispatch:
|
12315
|
+
CompositeExplicitAutograd: _indices_copy_out
|
12316
|
+
|
12317
|
+
|
12318
|
+
- func: _values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12319
|
+
variants: function
|
12320
|
+
dispatch:
|
12321
|
+
CompositeExplicitAutograd: _values_copy_out
|
12322
|
+
|
12323
|
+
|
12324
|
+
- func: indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12325
|
+
variants: function
|
12326
|
+
dispatch:
|
12327
|
+
CompositeExplicitAutograd: indices_copy_out
|
12328
|
+
|
12329
|
+
|
12330
|
+
- func: values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12331
|
+
variants: function
|
12332
|
+
dispatch:
|
12333
|
+
CompositeExplicitAutograd: values_copy_out
|
12334
|
+
|
12335
|
+
|
12336
|
+
- func: crow_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12337
|
+
variants: function
|
12338
|
+
dispatch:
|
12339
|
+
CompositeExplicitAutograd: crow_indices_copy_out
|
12340
|
+
|
12341
|
+
|
12342
|
+
- func: col_indices_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12343
|
+
variants: function
|
12344
|
+
dispatch:
|
12345
|
+
CompositeExplicitAutograd: col_indices_copy_out
|
12346
|
+
|
12347
|
+
|
12348
|
+
- func: unbind_copy.int_out(Tensor self, int dim=0, *, Tensor(a!)[] out) -> ()
|
12349
|
+
variants: function
|
12350
|
+
dispatch:
|
12351
|
+
CompositeExplicitAutograd: unbind_copy_int_out
|
12352
|
+
|
12353
|
+
|
12354
|
+
- func: view_copy.out(Tensor self, int[] size, *, Tensor(a!) out) -> Tensor(a!)
|
12355
|
+
variants: function
|
12356
|
+
dispatch:
|
12357
|
+
CompositeExplicitAutograd: view_copy_out
|
12358
|
+
|
12359
|
+
|
12360
|
+
- func: view_copy.dtype_out(Tensor self, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
|
12361
|
+
variants: function
|
12362
|
+
dispatch:
|
12363
|
+
CompositeExplicitAutograd: view_copy_dtype_out
|
12364
|
+
|
12365
|
+
|
12366
|
+
- func: unfold_copy.out(Tensor self, int dimension, int size, int step, *, Tensor(a!) out) -> Tensor(a!)
|
12367
|
+
variants: function
|
12368
|
+
dispatch:
|
12369
|
+
CompositeExplicitAutograd: unfold_copy_out
|
12370
|
+
|
12371
|
+
|
12372
|
+
- func: alias_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
12373
|
+
variants: function
|
12374
|
+
dispatch:
|
12375
|
+
CompositeExplicitAutograd: alias_copy_out
|
12376
|
+
|
12377
|
+
- func: to_padded_tensor(Tensor self, float padding, int[]? output_size=None) -> Tensor
|
12378
|
+
variants: method
|
12379
|
+
dispatch:
|
12380
|
+
NestedTensorCPU: NestedTensor_to_padded_tensor_generic
|
12381
|
+
NestedTensorCUDA: NestedTensor_to_padded_tensor_cuda
|
12382
|
+
|
12383
|
+
- func: _nested_tensor_layer_norm(Tensor self, Tensor? weight, Tensor? bias, float eps) -> Tensor
|
12384
|
+
variants: method
|
12385
|
+
dispatch:
|
12386
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_layer_norm
|
12387
|
+
|
12388
|
+
# Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
|
12389
|
+
- func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None) -> Tensor
|
12390
|
+
variants: function
|
12391
|
+
dispatch:
|
12392
|
+
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: transformer_encoder_layer_forward
|
12393
|
+
|
12394
|
+
- func: _native_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True) -> (Tensor, Tensor)
|
12395
|
+
variants: function
|
12396
|
+
dispatch:
|
12397
|
+
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: native_multi_head_attention
|