torch-rb 0.20.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +13 -12
- data/codegen/generate_functions.rb +5 -1
- data/codegen/native_functions.yaml +513 -384
- data/ext/torch/device.cpp +3 -0
- data/ext/torch/ext.cpp +5 -2
- data/ext/torch/ivalue.cpp +2 -0
- data/ext/torch/nn.cpp +3 -1
- data/ext/torch/ruby_arg_parser.cpp +7 -3
- data/ext/torch/ruby_arg_parser.h +5 -2
- data/ext/torch/templates.h +19 -37
- data/ext/torch/tensor.cpp +11 -8
- data/ext/torch/torch.cpp +6 -3
- data/ext/torch/utils.h +6 -2
- data/lib/torch/nn/conv1d.rb +11 -3
- data/lib/torch/nn/conv2d.rb +11 -3
- data/lib/torch/nn/conv3d.rb +11 -3
- data/lib/torch/nn/convnd.rb +1 -1
- data/lib/torch/nn/embedding.rb +10 -3
- data/lib/torch/nn/embedding_bag.rb +10 -3
- data/lib/torch/nn/functional.rb +20 -6
- data/lib/torch/nn/functional_attention.rb +30 -15
- data/lib/torch/nn/multihead_attention.rb +17 -7
- data/lib/torch/nn/rnn_base.rb +10 -3
- data/lib/torch/nn/transformer.rb +19 -10
- data/lib/torch/nn/transformer_decoder_layer.rb +7 -4
- data/lib/torch/nn/transformer_encoder_layer.rb +7 -4
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +1 -2
- metadata +3 -3
@@ -288,14 +288,16 @@
|
|
288
288
|
dispatch:
|
289
289
|
CPU: native_dropout_cpu
|
290
290
|
CUDA: native_dropout_cuda
|
291
|
-
|
291
|
+
MPS: native_dropout_mps
|
292
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: native_dropout_nested
|
292
293
|
tags: [nondeterministic_seeded, core]
|
293
294
|
autogen: native_dropout.out
|
294
295
|
|
295
296
|
- func: native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor
|
296
297
|
dispatch:
|
297
|
-
CPU, NestedTensorCPU, NestedTensorCUDA: native_dropout_backward
|
298
|
+
CPU, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: native_dropout_backward
|
298
299
|
CUDA: native_dropout_backward_cuda
|
300
|
+
MPS: native_dropout_backward_mps
|
299
301
|
autogen: native_dropout_backward.out
|
300
302
|
tags: pointwise
|
301
303
|
|
@@ -340,9 +342,9 @@
|
|
340
342
|
variants: function, method
|
341
343
|
dispatch:
|
342
344
|
CompositeExplicitAutograd: abs
|
343
|
-
SparseCPU, SparseCUDA: abs_sparse
|
344
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr
|
345
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs
|
345
|
+
SparseCPU, SparseCUDA, SparseMPS: abs_sparse
|
346
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: abs_sparse_csr
|
347
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_abs
|
346
348
|
tags: [core, pointwise]
|
347
349
|
|
348
350
|
- func: abs_(Tensor(a!) self) -> Tensor(a!)
|
@@ -350,17 +352,16 @@
|
|
350
352
|
variants: function, method
|
351
353
|
dispatch:
|
352
354
|
CompositeExplicitAutograd: abs_
|
353
|
-
SparseCPU, SparseCUDA: abs_sparse_
|
354
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_
|
355
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs_
|
355
|
+
SparseCPU, SparseCUDA, SparseMPS: abs_sparse_
|
356
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: abs_sparse_csr_
|
357
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_abs_
|
356
358
|
|
357
359
|
- func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
358
360
|
device_check: NoCheck # TensorIterator
|
359
361
|
dispatch:
|
360
|
-
CPU, CUDA: abs_out
|
361
|
-
|
362
|
-
|
363
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_out
|
362
|
+
CPU, CUDA, MPS, MTIA: abs_out
|
363
|
+
SparseCPU, SparseCUDA, SparseMPS: abs_sparse_out
|
364
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: abs_sparse_csr_out
|
364
365
|
tags: pointwise
|
365
366
|
|
366
367
|
# Note [Adding an alias]
|
@@ -429,18 +430,18 @@
|
|
429
430
|
variants: function, method
|
430
431
|
structured_delegate: sgn.out
|
431
432
|
dispatch:
|
432
|
-
SparseCPU, SparseCUDA: sgn_sparse
|
433
|
+
SparseCPU, SparseCUDA, SparseMPS: sgn_sparse
|
433
434
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr
|
434
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn
|
435
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sgn
|
435
436
|
tags: pointwise
|
436
437
|
|
437
438
|
- func: sgn_(Tensor(a!) self) -> Tensor(a!)
|
438
439
|
variants: method
|
439
440
|
structured_delegate: sgn.out
|
440
441
|
dispatch:
|
441
|
-
SparseCPU, SparseCUDA: sgn_sparse_
|
442
|
+
SparseCPU, SparseCUDA, SparseMPS: sgn_sparse_
|
442
443
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_
|
443
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn_
|
444
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sgn_
|
444
445
|
tags: pointwise
|
445
446
|
|
446
447
|
- func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -449,7 +450,7 @@
|
|
449
450
|
dispatch:
|
450
451
|
CPU, CUDA: sgn_out
|
451
452
|
MPS: sgn_out_mps
|
452
|
-
SparseCPU, SparseCUDA: sgn_sparse_out
|
453
|
+
SparseCPU, SparseCUDA, SparseMPS: sgn_sparse_out
|
453
454
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_out
|
454
455
|
tags: pointwise
|
455
456
|
|
@@ -477,7 +478,7 @@
|
|
477
478
|
variants: function, method
|
478
479
|
dispatch:
|
479
480
|
CompositeExplicitAutograd: _conj_physical
|
480
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr
|
481
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: conj_physical_sparse_csr
|
481
482
|
autogen: _conj_physical.out
|
482
483
|
|
483
484
|
- func: conj_physical(Tensor self) -> Tensor
|
@@ -488,8 +489,8 @@
|
|
488
489
|
dispatch:
|
489
490
|
CPU, CUDA: conj_physical_out
|
490
491
|
MPS: conj_physical_out_mps
|
491
|
-
SparseCPU, SparseCUDA: conj_physical_out_sparse
|
492
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_out
|
492
|
+
SparseCPU, SparseCUDA, SparseMPS: conj_physical_out_sparse
|
493
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: conj_physical_sparse_csr_out
|
493
494
|
tags: pointwise
|
494
495
|
|
495
496
|
- func: conj_physical_(Tensor(a!) self) -> Tensor(a!)
|
@@ -527,8 +528,7 @@
|
|
527
528
|
structured: True
|
528
529
|
structured_inherits: TensorIteratorBase
|
529
530
|
dispatch:
|
530
|
-
CPU, CUDA: acos_out
|
531
|
-
MPS: acos_out_mps
|
531
|
+
CPU, CUDA, MPS: acos_out
|
532
532
|
tags: pointwise
|
533
533
|
|
534
534
|
# arccos, alias of acos
|
@@ -556,11 +556,11 @@
|
|
556
556
|
structured_delegate: add.out
|
557
557
|
variants: function, method
|
558
558
|
dispatch:
|
559
|
-
SparseCPU, SparseCUDA, SparseMeta: add_sparse
|
559
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: add_sparse
|
560
560
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr
|
561
561
|
MkldnnCPU: mkldnn_add
|
562
562
|
ZeroTensor: add_zerotensor
|
563
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
563
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
564
564
|
tags: [core, pointwise]
|
565
565
|
|
566
566
|
- func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
@@ -568,10 +568,10 @@
|
|
568
568
|
variants: method
|
569
569
|
structured_delegate: add.out
|
570
570
|
dispatch:
|
571
|
-
SparseCPU, SparseCUDA, SparseMeta: add_sparse_
|
571
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: add_sparse_
|
572
572
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr_
|
573
573
|
MkldnnCPU: mkldnn_add_
|
574
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
574
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
575
575
|
tags: pointwise
|
576
576
|
|
577
577
|
- func: add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
@@ -584,10 +584,12 @@
|
|
584
584
|
dispatch:
|
585
585
|
SparseCPU, SparseMeta: add_out_sparse_cpu
|
586
586
|
SparseCUDA: add_out_sparse_cuda
|
587
|
+
SparseMPS: add_out_sparse_mps
|
587
588
|
SparseCsrCPU, SparseCsrMeta: add_out_sparse_compressed_cpu
|
588
589
|
SparseCsrCUDA: add_out_sparse_compressed_cuda
|
589
590
|
MkldnnCPU: mkldnn_add_out
|
590
591
|
MPS: add_out_mps
|
592
|
+
MTIA: add_out_mtia
|
591
593
|
tags: pointwise
|
592
594
|
|
593
595
|
- func: _add_relu.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
|
@@ -703,7 +705,7 @@
|
|
703
705
|
structured_delegate: all.out
|
704
706
|
variants: function, method
|
705
707
|
dispatch:
|
706
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_all
|
708
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_all
|
707
709
|
|
708
710
|
|
709
711
|
- func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
|
@@ -720,6 +722,7 @@
|
|
720
722
|
dispatch:
|
721
723
|
CPU, CUDA: all_out
|
722
724
|
MPS: all_out_mps
|
725
|
+
MTIA: all_out_mtia
|
723
726
|
|
724
727
|
- func: all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
725
728
|
device_check: NoCheck # TensorIterator
|
@@ -809,6 +812,7 @@
|
|
809
812
|
CPU, Meta: arange_out
|
810
813
|
CUDA: arange_cuda_out
|
811
814
|
MPS: arange_mps_out
|
815
|
+
MTIA: arange_mtia_out
|
812
816
|
cpp_no_default_args: ['step']
|
813
817
|
|
814
818
|
# This function is a temporary hack to allow tracing of arange like constructs with dynamic
|
@@ -873,7 +877,7 @@
|
|
873
877
|
variants: function, method
|
874
878
|
structured_delegate: asinh.out
|
875
879
|
dispatch:
|
876
|
-
SparseCPU, SparseCUDA: asinh_sparse
|
880
|
+
SparseCPU, SparseCUDA, SparseMPS: asinh_sparse
|
877
881
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr
|
878
882
|
tags: [core, pointwise]
|
879
883
|
|
@@ -881,7 +885,7 @@
|
|
881
885
|
variants: function, method
|
882
886
|
structured_delegate: asinh.out
|
883
887
|
dispatch:
|
884
|
-
SparseCPU, SparseCUDA: asinh_sparse_
|
888
|
+
SparseCPU, SparseCUDA, SparseMPS: asinh_sparse_
|
885
889
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_
|
886
890
|
tags: pointwise
|
887
891
|
|
@@ -891,7 +895,7 @@
|
|
891
895
|
dispatch:
|
892
896
|
CPU, CUDA: asinh_out
|
893
897
|
MPS: asinh_out_mps
|
894
|
-
SparseCPU, SparseCUDA: asinh_sparse_out
|
898
|
+
SparseCPU, SparseCUDA, SparseMPS: asinh_sparse_out
|
895
899
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_out
|
896
900
|
tags: pointwise
|
897
901
|
|
@@ -908,7 +912,7 @@
|
|
908
912
|
structured_delegate: atanh.out
|
909
913
|
variants: function, method
|
910
914
|
dispatch:
|
911
|
-
SparseCPU, SparseCUDA: atanh_sparse
|
915
|
+
SparseCPU, SparseCUDA, SparseMPS: atanh_sparse
|
912
916
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr
|
913
917
|
tags: [core, pointwise]
|
914
918
|
|
@@ -916,7 +920,7 @@
|
|
916
920
|
structured_delegate: atanh.out
|
917
921
|
variants: function, method
|
918
922
|
dispatch:
|
919
|
-
SparseCPU, SparseCUDA: atanh_sparse_
|
923
|
+
SparseCPU, SparseCUDA, SparseMPS: atanh_sparse_
|
920
924
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_
|
921
925
|
tags: pointwise
|
922
926
|
|
@@ -926,7 +930,7 @@
|
|
926
930
|
dispatch:
|
927
931
|
CPU, CUDA: atanh_out
|
928
932
|
MPS: atanh_out_mps
|
929
|
-
SparseCPU, SparseCUDA: atanh_sparse_out
|
933
|
+
SparseCPU, SparseCUDA, SparseMPS: atanh_sparse_out
|
930
934
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_out
|
931
935
|
tags: pointwise
|
932
936
|
# arctanh, alias for atanh
|
@@ -942,9 +946,8 @@
|
|
942
946
|
- func: as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a)
|
943
947
|
variants: function, method
|
944
948
|
dispatch:
|
945
|
-
ZeroTensor, CPU, CUDA: as_strided_tensorimpl
|
949
|
+
ZeroTensor, CPU, CUDA, MTIA, MPS: as_strided_tensorimpl
|
946
950
|
Meta: as_strided_tensorimpl_meta_symint
|
947
|
-
MPS: as_strided_tensorimpl_mps
|
948
951
|
QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
|
949
952
|
device_check: NoCheck
|
950
953
|
device_guard: False
|
@@ -964,7 +967,7 @@
|
|
964
967
|
variants: function, method
|
965
968
|
structured_delegate: asin.out
|
966
969
|
dispatch:
|
967
|
-
SparseCPU, SparseCUDA: asin_sparse
|
970
|
+
SparseCPU, SparseCUDA, SparseMPS: asin_sparse
|
968
971
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr
|
969
972
|
tags: [core, pointwise]
|
970
973
|
|
@@ -973,7 +976,7 @@
|
|
973
976
|
variants: function, method
|
974
977
|
structured_delegate: asin.out
|
975
978
|
dispatch:
|
976
|
-
SparseCPU, SparseCUDA: asin_sparse_
|
979
|
+
SparseCPU, SparseCUDA, SparseMPS: asin_sparse_
|
977
980
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_
|
978
981
|
tags: pointwise
|
979
982
|
|
@@ -982,9 +985,8 @@
|
|
982
985
|
structured: True
|
983
986
|
structured_inherits: TensorIteratorBase
|
984
987
|
dispatch:
|
985
|
-
CPU, CUDA: asin_out
|
986
|
-
|
987
|
-
SparseCPU, SparseCUDA: asin_sparse_out
|
988
|
+
CPU, CUDA, MPS: asin_out
|
989
|
+
SparseCPU, SparseCUDA, SparseMPS: asin_sparse_out
|
988
990
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_out
|
989
991
|
tags: pointwise
|
990
992
|
|
@@ -1002,7 +1004,7 @@
|
|
1002
1004
|
structured_delegate: atan.out
|
1003
1005
|
variants: function, method
|
1004
1006
|
dispatch:
|
1005
|
-
SparseCPU, SparseCUDA: atan_sparse
|
1007
|
+
SparseCPU, SparseCUDA, SparseMPS: atan_sparse
|
1006
1008
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr
|
1007
1009
|
tags: [core, pointwise]
|
1008
1010
|
|
@@ -1011,7 +1013,7 @@
|
|
1011
1013
|
structured_delegate: atan.out
|
1012
1014
|
variants: function, method
|
1013
1015
|
dispatch:
|
1014
|
-
SparseCPU, SparseCUDA: atan_sparse_
|
1016
|
+
SparseCPU, SparseCUDA, SparseMPS: atan_sparse_
|
1015
1017
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_
|
1016
1018
|
tags: pointwise
|
1017
1019
|
|
@@ -1020,9 +1022,8 @@
|
|
1020
1022
|
structured: True
|
1021
1023
|
structured_inherits: TensorIteratorBase
|
1022
1024
|
dispatch:
|
1023
|
-
CPU, CUDA: atan_out
|
1024
|
-
|
1025
|
-
SparseCPU, SparseCUDA: atan_sparse_out
|
1025
|
+
CPU, CUDA, MPS: atan_out
|
1026
|
+
SparseCPU, SparseCUDA, SparseMPS: atan_sparse_out
|
1026
1027
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_out
|
1027
1028
|
tags: pointwise
|
1028
1029
|
|
@@ -1071,8 +1072,19 @@
|
|
1071
1072
|
CUDA: baddbmm_out_cuda
|
1072
1073
|
MPS: baddbmm_out_mps
|
1073
1074
|
XPU: baddbmm_out_xpu
|
1075
|
+
MTIA: baddbmm_out_mtia
|
1074
1076
|
SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
|
1075
1077
|
|
1078
|
+
- func: baddbmm.dtype(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
1079
|
+
variants: function
|
1080
|
+
dispatch:
|
1081
|
+
CUDA: _baddbmm_dtype_cuda
|
1082
|
+
|
1083
|
+
- func: baddbmm.dtype_out(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
1084
|
+
variants: function
|
1085
|
+
dispatch:
|
1086
|
+
CUDA: _baddbmm_out_dtype_cuda
|
1087
|
+
|
1076
1088
|
- func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
1077
1089
|
dispatch:
|
1078
1090
|
CompositeExplicitAutograd: bartlett_window
|
@@ -1185,7 +1197,7 @@
|
|
1185
1197
|
CompositeExplicitAutograd: binary_cross_entropy_with_logits
|
1186
1198
|
autogen: binary_cross_entropy_with_logits.out
|
1187
1199
|
|
1188
|
-
- func: bincount(Tensor self, Tensor? weights=None,
|
1200
|
+
- func: bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor
|
1189
1201
|
variants: function, method
|
1190
1202
|
dispatch:
|
1191
1203
|
CPU: _bincount_cpu
|
@@ -1211,8 +1223,7 @@
|
|
1211
1223
|
structured: True
|
1212
1224
|
structured_inherits: TensorIteratorBase
|
1213
1225
|
dispatch:
|
1214
|
-
CPU, CUDA: bitwise_not_out
|
1215
|
-
MPS: bitwise_not_out_mps
|
1226
|
+
CPU, CUDA, MPS, MTIA: bitwise_not_out
|
1216
1227
|
tags: pointwise
|
1217
1228
|
|
1218
1229
|
- func: copysign.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1262,7 +1273,7 @@
|
|
1262
1273
|
variants: function, method
|
1263
1274
|
dispatch:
|
1264
1275
|
CompositeExplicitAutograd: logical_not
|
1265
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not
|
1276
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_logical_not
|
1266
1277
|
tags: [core, pointwise]
|
1267
1278
|
|
1268
1279
|
- func: logical_not_(Tensor(a!) self) -> Tensor(a!)
|
@@ -1270,13 +1281,13 @@
|
|
1270
1281
|
variants: method
|
1271
1282
|
dispatch:
|
1272
1283
|
CompositeExplicitAutograd: logical_not_
|
1273
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not_
|
1284
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_logical_not_
|
1274
1285
|
tags: pointwise
|
1275
1286
|
|
1276
1287
|
- func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
1277
1288
|
device_check: NoCheck # TensorIterator
|
1278
1289
|
dispatch:
|
1279
|
-
CPU, CUDA: logical_not_out
|
1290
|
+
CPU, CUDA, MTIA: logical_not_out
|
1280
1291
|
MPS: logical_not_out_mps
|
1281
1292
|
tags: pointwise
|
1282
1293
|
|
@@ -1318,7 +1329,7 @@
|
|
1318
1329
|
- func: logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
1319
1330
|
device_check: NoCheck # TensorIterator
|
1320
1331
|
dispatch:
|
1321
|
-
CPU, CUDA: logical_and_out
|
1332
|
+
CPU, CUDA, MTIA: logical_and_out
|
1322
1333
|
MPS: logical_and_out_mps
|
1323
1334
|
tags: pointwise
|
1324
1335
|
|
@@ -1339,7 +1350,7 @@
|
|
1339
1350
|
- func: logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
1340
1351
|
device_check: NoCheck # TensorIterator
|
1341
1352
|
dispatch:
|
1342
|
-
CPU, CUDA: logical_or_out
|
1353
|
+
CPU, CUDA, MTIA: logical_or_out
|
1343
1354
|
MPS: logical_or_out_mps
|
1344
1355
|
tags: pointwise
|
1345
1356
|
|
@@ -1371,10 +1382,21 @@
|
|
1371
1382
|
CUDA: bmm_out_cuda
|
1372
1383
|
MPS: bmm_out_mps
|
1373
1384
|
XPU: bmm_out_xpu
|
1385
|
+
MTIA: bmm_out_mtia
|
1374
1386
|
SparseCPU: bmm_out_sparse_cpu
|
1375
1387
|
SparseCUDA: bmm_out_sparse_cuda
|
1376
1388
|
SparseCsrCUDA: bmm_out_sparse_csr_cuda
|
1377
1389
|
|
1390
|
+
- func: bmm.dtype(Tensor self, Tensor mat2, ScalarType out_dtype) -> Tensor
|
1391
|
+
variants: function
|
1392
|
+
dispatch:
|
1393
|
+
CUDA: _bmm_dtype_cuda
|
1394
|
+
|
1395
|
+
- func: bmm.dtype_out(Tensor self, Tensor mat2, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
|
1396
|
+
variants: function
|
1397
|
+
dispatch:
|
1398
|
+
CUDA: _bmm_out_dtype_cuda
|
1399
|
+
|
1378
1400
|
- func: broadcast_tensors(Tensor[] tensors) -> Tensor[]
|
1379
1401
|
device_check: NoCheck
|
1380
1402
|
device_guard: False
|
@@ -1394,7 +1416,7 @@
|
|
1394
1416
|
dispatch:
|
1395
1417
|
SparseCPU, SparseCUDA: cat_sparse
|
1396
1418
|
QuantizedCPU: cat_quantized_cpu
|
1397
|
-
NestedTensorCPU, NestedTensorCUDA: cat_nested
|
1419
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: cat_nested
|
1398
1420
|
tags: core
|
1399
1421
|
|
1400
1422
|
- func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1440,7 +1462,7 @@
|
|
1440
1462
|
structured_delegate: ceil.out
|
1441
1463
|
variants: function, method
|
1442
1464
|
dispatch:
|
1443
|
-
SparseCPU, SparseCUDA: ceil_sparse
|
1465
|
+
SparseCPU, SparseCUDA, SparseMPS: ceil_sparse
|
1444
1466
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr
|
1445
1467
|
tags: [core, pointwise]
|
1446
1468
|
|
@@ -1449,7 +1471,7 @@
|
|
1449
1471
|
structured_delegate: ceil.out
|
1450
1472
|
variants: function, method
|
1451
1473
|
dispatch:
|
1452
|
-
SparseCPU, SparseCUDA: ceil_sparse_
|
1474
|
+
SparseCPU, SparseCUDA, SparseMPS: ceil_sparse_
|
1453
1475
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_
|
1454
1476
|
tags: pointwise
|
1455
1477
|
|
@@ -1459,7 +1481,7 @@
|
|
1459
1481
|
structured_inherits: TensorIteratorBase
|
1460
1482
|
dispatch:
|
1461
1483
|
CPU, CUDA, MPS: ceil_out
|
1462
|
-
SparseCPU, SparseCUDA: ceil_sparse_out
|
1484
|
+
SparseCPU, SparseCUDA, SparseMPS: ceil_sparse_out
|
1463
1485
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
|
1464
1486
|
tags: pointwise
|
1465
1487
|
|
@@ -1482,7 +1504,7 @@
|
|
1482
1504
|
device_guard: False
|
1483
1505
|
dispatch:
|
1484
1506
|
CompositeImplicitAutograd: chunk
|
1485
|
-
NestedTensorCPU, NestedTensorCUDA: chunk_nested_tensor
|
1507
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: chunk_nested_tensor
|
1486
1508
|
|
1487
1509
|
- func: tensor_split.sections(Tensor(a -> *) self, SymInt sections, int dim=0) -> Tensor(a)[]
|
1488
1510
|
variants: function, method
|
@@ -1529,7 +1551,7 @@
|
|
1529
1551
|
structured: True
|
1530
1552
|
structured_inherits: TensorIteratorBase
|
1531
1553
|
dispatch:
|
1532
|
-
CPU, CUDA: clamp_out
|
1554
|
+
CPU, CUDA, MTIA: clamp_out
|
1533
1555
|
MPS: clamp_out_mps
|
1534
1556
|
tags: pointwise
|
1535
1557
|
|
@@ -1569,7 +1591,7 @@
|
|
1569
1591
|
structured: True
|
1570
1592
|
structured_inherits: TensorIteratorBase
|
1571
1593
|
dispatch:
|
1572
|
-
CPU, CUDA: clamp_max_out
|
1594
|
+
CPU, CUDA, MTIA: clamp_max_out
|
1573
1595
|
MPS: clamp_max_out_mps
|
1574
1596
|
tags: pointwise
|
1575
1597
|
|
@@ -1609,7 +1631,7 @@
|
|
1609
1631
|
structured: True
|
1610
1632
|
structured_inherits: TensorIteratorBase
|
1611
1633
|
dispatch:
|
1612
|
-
CPU, CUDA: clamp_min_out
|
1634
|
+
CPU, CUDA, MTIA: clamp_min_out
|
1613
1635
|
MPS: clamp_min_out_mps
|
1614
1636
|
tags: pointwise
|
1615
1637
|
|
@@ -1658,8 +1680,7 @@
|
|
1658
1680
|
|
1659
1681
|
- func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
|
1660
1682
|
dispatch:
|
1661
|
-
CPU, CUDA: complex_out
|
1662
|
-
MPS: complex_out_mps
|
1683
|
+
CPU, CUDA, MPS: complex_out
|
1663
1684
|
|
1664
1685
|
- func: polar(Tensor abs, Tensor angle) -> Tensor
|
1665
1686
|
variants: function
|
@@ -1668,8 +1689,7 @@
|
|
1668
1689
|
|
1669
1690
|
- func: polar.out(Tensor abs, Tensor angle, *, Tensor(a!) out) -> Tensor(a!)
|
1670
1691
|
dispatch:
|
1671
|
-
CPU, CUDA: polar_out
|
1672
|
-
MPS: polar_out_mps
|
1692
|
+
CPU, CUDA, MPS: polar_out
|
1673
1693
|
|
1674
1694
|
- func: constant_pad_nd(Tensor self, SymInt[] pad, Scalar value=0) -> Tensor
|
1675
1695
|
variants: function
|
@@ -1781,7 +1801,7 @@
|
|
1781
1801
|
SparseCPU, SparseCUDA: copy_sparse_wrapper_
|
1782
1802
|
CompositeExplicitAutograd: copy_
|
1783
1803
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: copy_sparse_compressed_
|
1784
|
-
NestedTensorCPU, NestedTensorCUDA: copy_nested_
|
1804
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: copy_nested_
|
1785
1805
|
autogen: copy.out
|
1786
1806
|
|
1787
1807
|
- func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
|
@@ -1801,7 +1821,7 @@
|
|
1801
1821
|
variants: function, method
|
1802
1822
|
structured_delegate: cos.out
|
1803
1823
|
dispatch:
|
1804
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_cos
|
1824
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_cos
|
1805
1825
|
tags: [core, pointwise]
|
1806
1826
|
|
1807
1827
|
- func: cos_(Tensor(a!) self) -> Tensor(a!)
|
@@ -1815,8 +1835,7 @@
|
|
1815
1835
|
structured: True
|
1816
1836
|
structured_inherits: TensorIteratorBase
|
1817
1837
|
dispatch:
|
1818
|
-
CPU, CUDA: cos_out
|
1819
|
-
MPS: cos_out_mps
|
1838
|
+
CPU, CUDA, MPS, MTIA: cos_out
|
1820
1839
|
tags: pointwise
|
1821
1840
|
|
1822
1841
|
- func: cosh(Tensor self) -> Tensor
|
@@ -1836,8 +1855,7 @@
|
|
1836
1855
|
structured: True
|
1837
1856
|
structured_inherits: TensorIteratorBase
|
1838
1857
|
dispatch:
|
1839
|
-
CPU, CUDA: cosh_out
|
1840
|
-
MPS: cosh_out_mps
|
1858
|
+
CPU, CUDA, MPS: cosh_out
|
1841
1859
|
tags: pointwise
|
1842
1860
|
|
1843
1861
|
- func: cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor
|
@@ -1876,7 +1894,10 @@
|
|
1876
1894
|
- func: cudnn_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor, Tensor)
|
1877
1895
|
dispatch:
|
1878
1896
|
CUDA: cudnn_batch_norm
|
1879
|
-
|
1897
|
+
|
1898
|
+
- func: cudnn_batch_norm.out(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))
|
1899
|
+
dispatch:
|
1900
|
+
CUDA: cudnn_batch_norm_out
|
1880
1901
|
|
1881
1902
|
# NB: You can only use this if you used cudnn_batch_norm training=True
|
1882
1903
|
- func: cudnn_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace) -> (Tensor, Tensor, Tensor)
|
@@ -1951,6 +1972,7 @@
|
|
1951
1972
|
dispatch:
|
1952
1973
|
CPU: cummax_helper_cpu
|
1953
1974
|
CUDA: cummax_helper_cuda
|
1975
|
+
MPS: cummax_helper_mps
|
1954
1976
|
|
1955
1977
|
- func: cummin(Tensor self, int dim) -> (Tensor values, Tensor indices)
|
1956
1978
|
device_check: NoCheck # TensorIterator
|
@@ -1975,6 +1997,7 @@
|
|
1975
1997
|
dispatch:
|
1976
1998
|
CPU: cummin_helper_cpu
|
1977
1999
|
CUDA: cummin_helper_cuda
|
2000
|
+
MPS: cummin_helper_mps
|
1978
2001
|
|
1979
2002
|
- func: cummaxmin_backward(Tensor grad, Tensor input, Tensor indices, int dim) -> Tensor
|
1980
2003
|
variants: function
|
@@ -2139,7 +2162,7 @@
|
|
2139
2162
|
dispatch:
|
2140
2163
|
SparseCPU, SparseCUDA: div_sparse
|
2141
2164
|
ZeroTensor: div_zerotensor
|
2142
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_div_Tensor
|
2165
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_div_Tensor
|
2143
2166
|
tags: [core, pointwise]
|
2144
2167
|
|
2145
2168
|
- func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -2155,8 +2178,7 @@
|
|
2155
2178
|
structured: True
|
2156
2179
|
structured_inherits: TensorIteratorBase
|
2157
2180
|
dispatch:
|
2158
|
-
CPU, CUDA: div_out
|
2159
|
-
MPS: div_out_mps
|
2181
|
+
CPU, CUDA, MPS, MTIA: div_out
|
2160
2182
|
SparseCPU, SparseCUDA: div_out_sparse_zerodim
|
2161
2183
|
tags: pointwise
|
2162
2184
|
|
@@ -2181,8 +2203,7 @@
|
|
2181
2203
|
structured: True
|
2182
2204
|
structured_inherits: TensorIteratorBase
|
2183
2205
|
dispatch:
|
2184
|
-
CPU, CUDA: div_out_mode
|
2185
|
-
MPS: div_out_mode_mps
|
2206
|
+
CPU, CUDA, MPS: div_out_mode
|
2186
2207
|
SparseCPU, SparseCUDA: div_out_sparse_zerodim
|
2187
2208
|
tags: pointwise
|
2188
2209
|
|
@@ -2192,7 +2213,7 @@
|
|
2192
2213
|
variants: function, method
|
2193
2214
|
dispatch:
|
2194
2215
|
CompositeExplicitAutograd: div
|
2195
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_div_Scalar
|
2216
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_div_Scalar
|
2196
2217
|
tags: [core, pointwise]
|
2197
2218
|
|
2198
2219
|
- func: div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
@@ -2292,7 +2313,7 @@
|
|
2292
2313
|
- func: embedding(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor
|
2293
2314
|
dispatch:
|
2294
2315
|
CompositeExplicitAutograd: embedding_symint
|
2295
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_embedding
|
2316
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_embedding
|
2296
2317
|
autogen: embedding.out
|
2297
2318
|
tags: core
|
2298
2319
|
|
@@ -2388,7 +2409,7 @@
|
|
2388
2409
|
MPS: empty_mps
|
2389
2410
|
Meta: empty_meta_symint
|
2390
2411
|
MkldnnCPU: empty_mkldnn
|
2391
|
-
SparseCPU, SparseCUDA: empty_sparse
|
2412
|
+
SparseCPU, SparseCUDA, SparseMPS: empty_sparse
|
2392
2413
|
SparseMeta: empty_sparse_symint
|
2393
2414
|
SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
|
2394
2415
|
SparseCsrMeta: empty_sparse_compressed_symint
|
@@ -2498,7 +2519,7 @@
|
|
2498
2519
|
QuantizedCPU, QuantizedCUDA: empty_like_quantized
|
2499
2520
|
SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
|
2500
2521
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
|
2501
|
-
NestedTensorCPU, NestedTensorCUDA: empty_like_nested
|
2522
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: empty_like_nested
|
2502
2523
|
autogen: empty_like.out
|
2503
2524
|
|
2504
2525
|
- func: empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -2516,7 +2537,7 @@
|
|
2516
2537
|
structured_delegate: erf.out
|
2517
2538
|
variants: function, method
|
2518
2539
|
dispatch:
|
2519
|
-
SparseCPU, SparseCUDA: erf_sparse
|
2540
|
+
SparseCPU, SparseCUDA, SparseMPS: erf_sparse
|
2520
2541
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr
|
2521
2542
|
tags: [core, pointwise]
|
2522
2543
|
|
@@ -2525,7 +2546,7 @@
|
|
2525
2546
|
structured_delegate: erf.out
|
2526
2547
|
variants: function, method
|
2527
2548
|
dispatch:
|
2528
|
-
SparseCPU, SparseCUDA: erf_sparse_
|
2549
|
+
SparseCPU, SparseCUDA, SparseMPS: erf_sparse_
|
2529
2550
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_
|
2530
2551
|
tags: pointwise
|
2531
2552
|
|
@@ -2534,9 +2555,8 @@
|
|
2534
2555
|
structured: True
|
2535
2556
|
structured_inherits: TensorIteratorBase
|
2536
2557
|
dispatch:
|
2537
|
-
CPU, CUDA: erf_out
|
2538
|
-
|
2539
|
-
SparseCPU, SparseCUDA: erf_sparse_out
|
2558
|
+
CPU, CUDA, MPS, MTIA: erf_out
|
2559
|
+
SparseCPU, SparseCUDA, SparseMPS: erf_sparse_out
|
2540
2560
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_out
|
2541
2561
|
tags: pointwise
|
2542
2562
|
|
@@ -2557,7 +2577,7 @@
|
|
2557
2577
|
structured: True
|
2558
2578
|
structured_inherits: TensorIteratorBase
|
2559
2579
|
dispatch:
|
2560
|
-
CPU, CUDA: erfc_out
|
2580
|
+
CPU, CUDA, MPS: erfc_out
|
2561
2581
|
tags: pointwise
|
2562
2582
|
|
2563
2583
|
- func: exp(Tensor self) -> Tensor
|
@@ -2577,7 +2597,7 @@
|
|
2577
2597
|
structured: True
|
2578
2598
|
structured_inherits: TensorIteratorBase
|
2579
2599
|
dispatch:
|
2580
|
-
CPU, CUDA, MPS: exp_out
|
2600
|
+
CPU, CUDA, MPS, MTIA: exp_out
|
2581
2601
|
tags: pointwise
|
2582
2602
|
|
2583
2603
|
- func: exp2(Tensor self) -> Tensor
|
@@ -2594,8 +2614,7 @@
|
|
2594
2614
|
structured: True
|
2595
2615
|
structured_inherits: TensorIteratorBase
|
2596
2616
|
dispatch:
|
2597
|
-
CPU, CUDA: exp2_out
|
2598
|
-
MPS: exp2_out_mps
|
2617
|
+
CPU, CUDA, MPS: exp2_out
|
2599
2618
|
tags: pointwise
|
2600
2619
|
|
2601
2620
|
- func: expm1(Tensor self) -> Tensor
|
@@ -2603,7 +2622,7 @@
|
|
2603
2622
|
structured_delegate: expm1.out
|
2604
2623
|
variants: function, method
|
2605
2624
|
dispatch:
|
2606
|
-
SparseCPU, SparseCUDA: expm1_sparse
|
2625
|
+
SparseCPU, SparseCUDA, SparseMPS: expm1_sparse
|
2607
2626
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr
|
2608
2627
|
tags: [core, pointwise]
|
2609
2628
|
|
@@ -2612,7 +2631,7 @@
|
|
2612
2631
|
structured_delegate: expm1.out
|
2613
2632
|
variants: function, method
|
2614
2633
|
dispatch:
|
2615
|
-
SparseCPU, SparseCUDA: expm1_sparse_
|
2634
|
+
SparseCPU, SparseCUDA, SparseMPS: expm1_sparse_
|
2616
2635
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_
|
2617
2636
|
tags: pointwise
|
2618
2637
|
|
@@ -2621,9 +2640,8 @@
|
|
2621
2640
|
structured: True
|
2622
2641
|
structured_inherits: TensorIteratorBase
|
2623
2642
|
dispatch:
|
2624
|
-
CPU, CUDA: expm1_out
|
2625
|
-
|
2626
|
-
SparseCPU, SparseCUDA: expm1_sparse_out
|
2643
|
+
CPU, CUDA, MPS: expm1_out
|
2644
|
+
SparseCPU, SparseCUDA, SparseMPS: expm1_sparse_out
|
2627
2645
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_out
|
2628
2646
|
tags: pointwise
|
2629
2647
|
|
@@ -2703,7 +2721,7 @@
|
|
2703
2721
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
2704
2722
|
Meta: fill_meta_
|
2705
2723
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: fill_sparse_csr_
|
2706
|
-
NestedTensorCPU, NestedTensorCUDA: fill_nested_
|
2724
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: fill_nested_
|
2707
2725
|
autogen: fill.Scalar_out
|
2708
2726
|
|
2709
2727
|
- func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
|
@@ -2714,7 +2732,7 @@
|
|
2714
2732
|
MPS: fill_tensor_mps_
|
2715
2733
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
2716
2734
|
Meta: fill_meta_
|
2717
|
-
NestedTensorCPU, NestedTensorCUDA: fill_nested_
|
2735
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: fill_nested_
|
2718
2736
|
autogen: fill.Tensor_out
|
2719
2737
|
|
2720
2738
|
- func: floor(Tensor self) -> Tensor
|
@@ -2722,7 +2740,7 @@
|
|
2722
2740
|
structured_delegate: floor.out
|
2723
2741
|
variants: function, method
|
2724
2742
|
dispatch:
|
2725
|
-
SparseCPU, SparseCUDA: floor_sparse
|
2743
|
+
SparseCPU, SparseCUDA, SparseMPS: floor_sparse
|
2726
2744
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr
|
2727
2745
|
tags: [core, pointwise]
|
2728
2746
|
|
@@ -2731,7 +2749,7 @@
|
|
2731
2749
|
structured_delegate: floor.out
|
2732
2750
|
variants: function, method
|
2733
2751
|
dispatch:
|
2734
|
-
SparseCPU, SparseCUDA: floor_sparse_
|
2752
|
+
SparseCPU, SparseCUDA, SparseMPS: floor_sparse_
|
2735
2753
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_
|
2736
2754
|
tags: pointwise
|
2737
2755
|
|
@@ -2741,7 +2759,7 @@
|
|
2741
2759
|
structured_inherits: TensorIteratorBase
|
2742
2760
|
dispatch:
|
2743
2761
|
CPU, CUDA, MPS: floor_out
|
2744
|
-
SparseCPU, SparseCUDA: floor_sparse_out
|
2762
|
+
SparseCPU, SparseCUDA, SparseMPS: floor_sparse_out
|
2745
2763
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
|
2746
2764
|
tags: pointwise
|
2747
2765
|
|
@@ -2749,23 +2767,20 @@
|
|
2749
2767
|
device_check: NoCheck # TensorIterator
|
2750
2768
|
variants: function, method
|
2751
2769
|
dispatch:
|
2752
|
-
CPU, CUDA: floor_divide
|
2753
|
-
MPS: floor_divide_mps
|
2770
|
+
CPU, CUDA, MPS, MTIA: floor_divide
|
2754
2771
|
SparseCPU, SparseCUDA: floor_divide_sparse
|
2755
2772
|
|
2756
2773
|
- func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
2757
2774
|
device_check: NoCheck # TensorIterator
|
2758
2775
|
variants: method
|
2759
2776
|
dispatch:
|
2760
|
-
CPU, CUDA: floor_divide_
|
2761
|
-
MPS: floor_divide_mps_
|
2777
|
+
CPU, CUDA, MPS: floor_divide_
|
2762
2778
|
SparseCPU, SparseCUDA: floor_divide_sparse_
|
2763
2779
|
|
2764
2780
|
- func: floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
2765
2781
|
device_check: NoCheck # TensorIterator
|
2766
2782
|
dispatch:
|
2767
|
-
CPU, CUDA: floor_divide_out
|
2768
|
-
MPS: floor_divide_out_mps
|
2783
|
+
CPU, CUDA, MPS: floor_divide_out
|
2769
2784
|
SparseCPU, SparseCUDA: floor_divide_out_sparse_zerodim
|
2770
2785
|
|
2771
2786
|
- func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -2786,7 +2801,7 @@
|
|
2786
2801
|
structured_delegate: frac.out
|
2787
2802
|
variants: function, method
|
2788
2803
|
dispatch:
|
2789
|
-
SparseCPU, SparseCUDA: frac_sparse
|
2804
|
+
SparseCPU, SparseCUDA, SparseMPS: frac_sparse
|
2790
2805
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr
|
2791
2806
|
tags: pointwise
|
2792
2807
|
|
@@ -2795,7 +2810,7 @@
|
|
2795
2810
|
structured_delegate: frac.out
|
2796
2811
|
variants: function, method
|
2797
2812
|
dispatch:
|
2798
|
-
SparseCPU, SparseCUDA: frac_sparse_
|
2813
|
+
SparseCPU, SparseCUDA, SparseMPS: frac_sparse_
|
2799
2814
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_
|
2800
2815
|
tags: pointwise
|
2801
2816
|
|
@@ -2806,7 +2821,7 @@
|
|
2806
2821
|
dispatch:
|
2807
2822
|
CPU, CUDA: frac_out
|
2808
2823
|
MPS: frac_out_mps
|
2809
|
-
SparseCPU, SparseCUDA: frac_sparse_out
|
2824
|
+
SparseCPU, SparseCUDA, SparseMPS: frac_sparse_out
|
2810
2825
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_out
|
2811
2826
|
tags: pointwise
|
2812
2827
|
|
@@ -2919,6 +2934,7 @@
|
|
2919
2934
|
dispatch:
|
2920
2935
|
CPU: grid_sampler_3d_cpu
|
2921
2936
|
CUDA: grid_sampler_3d_cuda
|
2937
|
+
MPS: grid_sampler_3d_mps
|
2922
2938
|
autogen: grid_sampler_3d.out
|
2923
2939
|
|
2924
2940
|
# `grid_sampler_3d_backward` takes in `output_mask` to optimize performance for
|
@@ -3100,6 +3116,7 @@
|
|
3100
3116
|
- dim -> int dim
|
3101
3117
|
dispatch:
|
3102
3118
|
CPU, CUDA: index_copy_out
|
3119
|
+
MPS: index_copy_out_mps
|
3103
3120
|
|
3104
3121
|
- func: index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
|
3105
3122
|
variants: method
|
@@ -3170,7 +3187,7 @@
|
|
3170
3187
|
variants: function
|
3171
3188
|
structured: True
|
3172
3189
|
dispatch:
|
3173
|
-
CPU, CUDA: isin_Tensor_Scalar_out
|
3190
|
+
CPU, CUDA, MPS: isin_Tensor_Scalar_out
|
3174
3191
|
|
3175
3192
|
- func: isin.Tensor_Scalar(Tensor elements, Scalar test_element, *, bool assume_unique=False, bool invert=False) -> Tensor
|
3176
3193
|
variants: function
|
@@ -3181,6 +3198,7 @@
|
|
3181
3198
|
structured: True
|
3182
3199
|
dispatch:
|
3183
3200
|
CPU, CUDA: isin_Scalar_Tensor_out
|
3201
|
+
MPS: isin_Scalar_Tensor_out_mps
|
3184
3202
|
|
3185
3203
|
- func: isin.Scalar_Tensor(Scalar element, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor
|
3186
3204
|
variants: function
|
@@ -3191,9 +3209,9 @@
|
|
3191
3209
|
device_check: NoCheck
|
3192
3210
|
device_guard: False
|
3193
3211
|
dispatch:
|
3194
|
-
CPU, CUDA, MPS: isnan
|
3195
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isnan
|
3196
|
-
SparseCPU, SparseCUDA: isnan_sparse
|
3212
|
+
CPU, CUDA, MPS, MTIA: isnan
|
3213
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isnan
|
3214
|
+
SparseCPU, SparseCUDA, SparseMPS: isnan_sparse
|
3197
3215
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isnan_sparse_csr
|
3198
3216
|
autogen: isnan.out
|
3199
3217
|
tags: [core, pointwise]
|
@@ -3243,7 +3261,7 @@
|
|
3243
3261
|
device_check: NoCheck
|
3244
3262
|
device_guard: False
|
3245
3263
|
dispatch:
|
3246
|
-
NestedTensorCPU, NestedTensorCUDA: nested_is_same_size
|
3264
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_is_same_size
|
3247
3265
|
CompositeExplicitAutograd: is_same_size
|
3248
3266
|
|
3249
3267
|
- func: is_signed(Tensor self) -> bool
|
@@ -3265,20 +3283,21 @@
|
|
3265
3283
|
|
3266
3284
|
- func: kron.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
3267
3285
|
|
3268
|
-
- func: kthvalue(Tensor self,
|
3286
|
+
- func: kthvalue(Tensor self, SymInt k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
|
3269
3287
|
variants: function, method
|
3270
3288
|
dispatch:
|
3271
3289
|
CompositeExplicitAutograd: kthvalue
|
3272
3290
|
|
3273
|
-
- func: kthvalue.values(Tensor self,
|
3291
|
+
- func: kthvalue.values(Tensor self, SymInt k, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
3274
3292
|
dispatch:
|
3275
3293
|
CPU: kthvalue_out_cpu
|
3276
3294
|
CUDA: kthvalue_out_cuda
|
3295
|
+
MPS: kthvalue_out_mps
|
3277
3296
|
|
3278
|
-
- func: kthvalue.dimname(Tensor self,
|
3297
|
+
- func: kthvalue.dimname(Tensor self, SymInt k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
3279
3298
|
variants: function, method
|
3280
3299
|
|
3281
|
-
- func: kthvalue.dimname_out(Tensor self,
|
3300
|
+
- func: kthvalue.dimname_out(Tensor self, SymInt k, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
3282
3301
|
|
3283
3302
|
- func: layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor
|
3284
3303
|
dispatch:
|
@@ -3290,7 +3309,7 @@
|
|
3290
3309
|
CUDA: layer_norm_cuda
|
3291
3310
|
MPS: layer_norm_mps
|
3292
3311
|
CompositeExplicitAutograd: math_native_layer_norm
|
3293
|
-
NestedTensorCPU, NestedTensorCUDA: nested_layer_norm
|
3312
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_layer_norm
|
3294
3313
|
autogen: native_layer_norm.out
|
3295
3314
|
tags: core
|
3296
3315
|
|
@@ -3299,7 +3318,7 @@
|
|
3299
3318
|
CPU: layer_norm_backward_cpu
|
3300
3319
|
CUDA: layer_norm_backward_cuda
|
3301
3320
|
MPS: layer_norm_backward_mps
|
3302
|
-
NestedTensorCPU, NestedTensorCUDA: layer_norm_backward_nested
|
3321
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: layer_norm_backward_nested
|
3303
3322
|
autogen: native_layer_norm_backward.out
|
3304
3323
|
tags: core
|
3305
3324
|
|
@@ -3307,37 +3326,47 @@
|
|
3307
3326
|
dispatch:
|
3308
3327
|
CompositeImplicitAutograd: rms_norm_symint
|
3309
3328
|
|
3329
|
+
- func: _fused_rms_norm(Tensor input, int[] normalized_shape, Tensor? weight, float? eps) -> (Tensor, Tensor)
|
3330
|
+
dispatch:
|
3331
|
+
CUDA: _fused_rms_norm_cuda
|
3332
|
+
MPS: _fused_rms_norm_mps
|
3333
|
+
CompositeImplicitAutograd: rms_norm_composite
|
3334
|
+
|
3335
|
+
- func: _fused_rms_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor rstd, Tensor? weight, bool[2] output_mask) -> (Tensor, Tensor)
|
3336
|
+
dispatch:
|
3337
|
+
CUDA: _fused_rms_norm_backward_cuda
|
3338
|
+
|
3310
3339
|
- func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
|
3311
3340
|
variants: function, method
|
3312
3341
|
dispatch:
|
3313
3342
|
CompositeExplicitAutograd: nan_to_num
|
3314
|
-
SparseCPU, SparseCUDA: nan_to_num_sparse
|
3343
|
+
SparseCPU, SparseCUDA, SparseMPS: nan_to_num_sparse
|
3315
3344
|
tags: pointwise
|
3316
3345
|
|
3317
3346
|
- func: nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!)
|
3318
3347
|
variants: function, method
|
3319
3348
|
dispatch:
|
3320
3349
|
CompositeExplicitAutograd: nan_to_num_
|
3321
|
-
SparseCPU, SparseCUDA: nan_to_num_sparse_
|
3350
|
+
SparseCPU, SparseCUDA, SparseMPS: nan_to_num_sparse_
|
3322
3351
|
tags: pointwise
|
3323
3352
|
|
3324
3353
|
- func: nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)
|
3325
3354
|
dispatch:
|
3326
|
-
CPU, CUDA: nan_to_num_out
|
3355
|
+
CPU, CUDA, MTIA: nan_to_num_out
|
3327
3356
|
MPS: nan_to_num_out_mps
|
3328
|
-
SparseCPU, SparseCUDA: nan_to_num_sparse_out
|
3357
|
+
SparseCPU, SparseCUDA, SparseMPS: nan_to_num_sparse_out
|
3329
3358
|
tags: pointwise
|
3330
3359
|
|
3331
3360
|
- func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
|
3332
3361
|
python_module: nn
|
3333
3362
|
dispatch:
|
3334
3363
|
CompositeImplicitAutograd: linear
|
3335
|
-
NestedTensorCPU, NestedTensorCUDA: nested_linear
|
3364
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_linear
|
3336
3365
|
MPS: _mps_linear
|
3337
3366
|
|
3338
3367
|
- func: linear_backward(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
3339
3368
|
dispatch:
|
3340
|
-
NestedTensorCPU, NestedTensorCUDA: nested_linear_backward
|
3369
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_linear_backward
|
3341
3370
|
MPS: mps_linear_backward
|
3342
3371
|
autogen: linear_backward.out
|
3343
3372
|
|
@@ -3371,7 +3400,7 @@
|
|
3371
3400
|
dispatch:
|
3372
3401
|
CUDA: _cslt_compress
|
3373
3402
|
|
3374
|
-
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0, int split_k=1,
|
3403
|
+
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0, int split_k=1, int split_k_mode=-1) -> Tensor
|
3375
3404
|
dispatch:
|
3376
3405
|
CUDA: _cslt_sparse_mm
|
3377
3406
|
tags: needs_fixed_stride_order
|
@@ -3421,10 +3450,14 @@
|
|
3421
3450
|
|
3422
3451
|
- func: _wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
|
3423
3452
|
|
3424
|
-
- func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
|
3453
|
+
- func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor? bias) -> Tensor
|
3454
|
+
|
3455
|
+
- func: fbgemm_linear_fp16_weight_fp32_activation.out(Tensor input, Tensor packed_weight, Tensor? bias, Tensor(a!) output) -> Tensor
|
3425
3456
|
|
3426
3457
|
- func: fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
|
3427
3458
|
|
3459
|
+
- func: fbgemm_linear_fp16_weight.out(Tensor input, Tensor packed_weight, Tensor bias, Tensor(a!) output) -> Tensor
|
3460
|
+
|
3428
3461
|
- func: fbgemm_pack_quantized_matrix(Tensor input) -> Tensor
|
3429
3462
|
|
3430
3463
|
- func: fbgemm_pack_quantized_matrix.KN(Tensor input, int K, int N) -> Tensor
|
@@ -3496,8 +3529,7 @@
|
|
3496
3529
|
structured: True
|
3497
3530
|
structured_inherits: TensorIteratorBase
|
3498
3531
|
dispatch:
|
3499
|
-
CPU, CUDA: log_out
|
3500
|
-
MPS: log_out_mps
|
3532
|
+
CPU, CUDA, MPS, MTIA: log_out
|
3501
3533
|
tags: pointwise
|
3502
3534
|
|
3503
3535
|
- func: log10(Tensor self) -> Tensor
|
@@ -3517,8 +3549,7 @@
|
|
3517
3549
|
structured: True
|
3518
3550
|
structured_inherits: TensorIteratorBase
|
3519
3551
|
dispatch:
|
3520
|
-
CPU, CUDA: log10_out
|
3521
|
-
MPS: log10_out_mps
|
3552
|
+
CPU, CUDA, MPS: log10_out
|
3522
3553
|
tags: pointwise
|
3523
3554
|
|
3524
3555
|
- func: log1p(Tensor self) -> Tensor
|
@@ -3526,7 +3557,7 @@
|
|
3526
3557
|
structured_delegate: log1p.out
|
3527
3558
|
variants: function, method
|
3528
3559
|
dispatch:
|
3529
|
-
SparseCPU, SparseCUDA: log1p_sparse
|
3560
|
+
SparseCPU, SparseCUDA, SparseMPS: log1p_sparse
|
3530
3561
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr
|
3531
3562
|
tags: [core, pointwise]
|
3532
3563
|
|
@@ -3535,7 +3566,7 @@
|
|
3535
3566
|
structured_delegate: log1p.out
|
3536
3567
|
variants: function, method
|
3537
3568
|
dispatch:
|
3538
|
-
SparseCPU, SparseCUDA: log1p_sparse_
|
3569
|
+
SparseCPU, SparseCUDA, SparseMPS: log1p_sparse_
|
3539
3570
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_
|
3540
3571
|
tags: pointwise
|
3541
3572
|
|
@@ -3544,9 +3575,8 @@
|
|
3544
3575
|
structured: True
|
3545
3576
|
structured_inherits: TensorIteratorBase
|
3546
3577
|
dispatch:
|
3547
|
-
CPU, CUDA: log1p_out
|
3548
|
-
|
3549
|
-
SparseCPU, SparseCUDA: log1p_sparse_out
|
3578
|
+
CPU, CUDA, MPS: log1p_out
|
3579
|
+
SparseCPU, SparseCUDA, SparseMPS: log1p_sparse_out
|
3550
3580
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_out
|
3551
3581
|
tags: pointwise
|
3552
3582
|
|
@@ -3567,8 +3597,7 @@
|
|
3567
3597
|
structured: True
|
3568
3598
|
structured_inherits: TensorIteratorBase
|
3569
3599
|
dispatch:
|
3570
|
-
CPU, CUDA: log2_out
|
3571
|
-
MPS: log2_out_mps
|
3600
|
+
CPU, CUDA, MPS, MTIA: log2_out
|
3572
3601
|
tags: pointwise
|
3573
3602
|
|
3574
3603
|
- func: logaddexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -3715,6 +3744,7 @@
|
|
3715
3744
|
dispatch:
|
3716
3745
|
CPU: log_softmax_cpu_out
|
3717
3746
|
CUDA: log_softmax_cuda_out
|
3747
|
+
MTIA: log_softmax_mtia_out
|
3718
3748
|
MPS: log_softmax_mps_out
|
3719
3749
|
|
3720
3750
|
- func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
|
@@ -3725,17 +3755,20 @@
|
|
3725
3755
|
dispatch:
|
3726
3756
|
CPU: log_softmax_backward_cpu_out
|
3727
3757
|
CUDA: log_softmax_backward_cuda_out
|
3758
|
+
MTIA: log_softmax_backward_mtia_out
|
3728
3759
|
MPS: log_softmax_backward_mps_out
|
3729
3760
|
|
3730
3761
|
- func: _logcumsumexp(Tensor self, int dim) -> Tensor
|
3731
3762
|
dispatch:
|
3732
3763
|
CPU: _logcumsumexp_cpu
|
3733
3764
|
CUDA: _logcumsumexp_cuda
|
3765
|
+
MPS: _logcumsumexp_mps
|
3734
3766
|
|
3735
3767
|
- func: _logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
|
3736
3768
|
dispatch:
|
3737
3769
|
CPU: _logcumsumexp_out_cpu
|
3738
3770
|
CUDA: _logcumsumexp_out_cuda
|
3771
|
+
MPS: _logcumsumexp_out_mps
|
3739
3772
|
|
3740
3773
|
- func: logcumsumexp(Tensor self, int dim) -> Tensor
|
3741
3774
|
variants: function, method
|
@@ -3776,17 +3809,17 @@
|
|
3776
3809
|
variants: function, method
|
3777
3810
|
dispatch:
|
3778
3811
|
CompositeImplicitAutograd: matmul
|
3779
|
-
NestedTensorCPU, NestedTensorCUDA: matmul_nested
|
3812
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_nested
|
3780
3813
|
|
3781
3814
|
- func: matmul_backward(Tensor grad, Tensor self, Tensor other, bool[2] mask) -> (Tensor, Tensor)
|
3782
3815
|
dispatch:
|
3783
|
-
NestedTensorCPU, NestedTensorCUDA: matmul_backward_nested
|
3816
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_backward_nested
|
3784
3817
|
autogen: matmul_backward.out
|
3785
3818
|
|
3786
3819
|
- func: matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
3787
3820
|
dispatch:
|
3788
3821
|
CompositeImplicitAutograd: matmul_out
|
3789
|
-
NestedTensorCPU, NestedTensorCUDA: matmul_out_nested
|
3822
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_out_nested
|
3790
3823
|
|
3791
3824
|
# Alias to linalg.matrix_power
|
3792
3825
|
- func: matrix_power(Tensor self, int n) -> Tensor
|
@@ -3848,7 +3881,7 @@
|
|
3848
3881
|
precomputed:
|
3849
3882
|
- dim -> int dim
|
3850
3883
|
dispatch:
|
3851
|
-
CPU, CUDA: max_out
|
3884
|
+
CPU, CUDA, MTIA: max_out
|
3852
3885
|
MPS: max_out_mps
|
3853
3886
|
|
3854
3887
|
- func: max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
@@ -4004,6 +4037,7 @@
|
|
4004
4037
|
dispatch:
|
4005
4038
|
CPU: nanmedian_cpu
|
4006
4039
|
CUDA: nanmedian_cuda
|
4040
|
+
MPS: nanmedian_mps
|
4007
4041
|
autogen: nanmedian.out
|
4008
4042
|
|
4009
4043
|
- func: nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
@@ -4015,6 +4049,7 @@
|
|
4015
4049
|
dispatch:
|
4016
4050
|
CPU: nanmedian_out_cpu
|
4017
4051
|
CUDA: nanmedian_out_cuda
|
4052
|
+
MPS: nanmedian_out_mps
|
4018
4053
|
|
4019
4054
|
- func: nanmedian.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
4020
4055
|
variants: function, method
|
@@ -4035,7 +4070,7 @@
|
|
4035
4070
|
precomputed:
|
4036
4071
|
- dim -> int dim
|
4037
4072
|
dispatch:
|
4038
|
-
CPU, CUDA: min_out
|
4073
|
+
CPU, CUDA, MTIA: min_out
|
4039
4074
|
MPS: min_out_mps
|
4040
4075
|
|
4041
4076
|
- func: min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
@@ -4143,20 +4178,31 @@
|
|
4143
4178
|
dispatch:
|
4144
4179
|
CPU: mm_out_cpu
|
4145
4180
|
CUDA: mm_out_cuda
|
4181
|
+
MTIA: mm_out_mtia
|
4146
4182
|
MPS: mm_out_mps
|
4147
4183
|
XPU: mm_out_xpu
|
4148
4184
|
SparseCPU, SparseCUDA: _sparse_mm_out
|
4149
4185
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm_out
|
4150
4186
|
|
4187
|
+
- func: mm.dtype(Tensor self, Tensor mat2, ScalarType out_dtype) -> Tensor
|
4188
|
+
dispatch:
|
4189
|
+
CUDA: _mm_dtype_cuda
|
4190
|
+
|
4191
|
+
- func: mm.dtype_out(Tensor self, Tensor mat2, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
|
4192
|
+
dispatch:
|
4193
|
+
CUDA: _mm_dtype_out_cuda
|
4194
|
+
|
4151
4195
|
- func: _int_mm(Tensor self, Tensor mat2) -> Tensor
|
4152
4196
|
dispatch:
|
4153
4197
|
CPU: _int_mm_cpu
|
4154
4198
|
CUDA: _int_mm_cuda
|
4199
|
+
XPU: _int_mm_xpu
|
4155
4200
|
|
4156
4201
|
- func: _int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
|
4157
4202
|
dispatch:
|
4158
4203
|
CPU: _int_mm_out_cpu
|
4159
4204
|
CUDA: _int_mm_out_cuda
|
4205
|
+
XPU: _int_mm_out_xpu
|
4160
4206
|
|
4161
4207
|
- func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
|
4162
4208
|
dispatch:
|
@@ -4168,6 +4214,10 @@
|
|
4168
4214
|
MPS: _weight_int4pack_mm_mps
|
4169
4215
|
CUDA: _weight_int4pack_mm_cuda
|
4170
4216
|
|
4217
|
+
- func: _weight_int4pack_mm_with_scales_and_zeros(Tensor self, Tensor mat2, int qGroupSize, Tensor qScale, Tensor qZeros) -> Tensor
|
4218
|
+
dispatch:
|
4219
|
+
XPU: _weight_int4pack_mm_xpu
|
4220
|
+
|
4171
4221
|
# Split int4 pack weight between cpu and other devices due to
|
4172
4222
|
# https://github.com/pytorch/ao/issues/1117#issuecomment-2451252756.
|
4173
4223
|
- func: _convert_weight_to_int4pack_for_cpu(Tensor self, int innerKTiles) -> Tensor
|
@@ -4189,6 +4239,7 @@
|
|
4189
4239
|
- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
|
4190
4240
|
dispatch:
|
4191
4241
|
CPU: _weight_int8pack_mm_cpu
|
4242
|
+
CUDA: _weight_int8pack_mm_cuda
|
4192
4243
|
MPS: _weight_int8pack_mm_mps
|
4193
4244
|
|
4194
4245
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
@@ -4226,7 +4277,7 @@
|
|
4226
4277
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr
|
4227
4278
|
MkldnnCPU: mkldnn_mul
|
4228
4279
|
ZeroTensor: mul_zerotensor
|
4229
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
|
4280
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul_Tensor
|
4230
4281
|
tags: [core, pointwise]
|
4231
4282
|
|
4232
4283
|
- func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -4237,7 +4288,7 @@
|
|
4237
4288
|
SparseCPU, SparseCUDA: mul_sparse_
|
4238
4289
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr_
|
4239
4290
|
MkldnnCPU: mkldnn_mul_
|
4240
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Tensor
|
4291
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul__Tensor
|
4241
4292
|
tags: pointwise
|
4242
4293
|
|
4243
4294
|
- func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -4245,8 +4296,7 @@
|
|
4245
4296
|
structured: True
|
4246
4297
|
structured_inherits: TensorIteratorBase
|
4247
4298
|
dispatch:
|
4248
|
-
CPU, CUDA: mul_out
|
4249
|
-
MPS: mul_out_mps
|
4299
|
+
CPU, CUDA, MPS, MTIA: mul_out
|
4250
4300
|
SparseCPU: mul_out_sparse_cpu
|
4251
4301
|
SparseCUDA: mul_out_sparse_cuda
|
4252
4302
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr
|
@@ -4260,7 +4310,7 @@
|
|
4260
4310
|
dispatch:
|
4261
4311
|
CompositeExplicitAutograd: mul
|
4262
4312
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_scalar_sparse_csr
|
4263
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Scalar
|
4313
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul_Scalar
|
4264
4314
|
tags: [core, pointwise]
|
4265
4315
|
|
4266
4316
|
- func: mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
@@ -4269,7 +4319,7 @@
|
|
4269
4319
|
dispatch:
|
4270
4320
|
CompositeExplicitAutograd: mul_
|
4271
4321
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul__scalar_sparse_csr
|
4272
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Scalar
|
4322
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul__Scalar
|
4273
4323
|
autogen: mul.Scalar_out
|
4274
4324
|
tags: pointwise
|
4275
4325
|
# multiply, alias for mul
|
@@ -4335,7 +4385,7 @@
|
|
4335
4385
|
device_guard: False
|
4336
4386
|
dispatch:
|
4337
4387
|
CompositeImplicitAutograd: narrow_symint
|
4338
|
-
NestedTensorCPU, NestedTensorCUDA: narrow_nested_symint
|
4388
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: narrow_nested_symint
|
4339
4389
|
|
4340
4390
|
- func: narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)
|
4341
4391
|
variants: function, method
|
@@ -4474,7 +4524,7 @@
|
|
4474
4524
|
# NB: Although this composite mutates on the inside, it is
|
4475
4525
|
# non-differentiable so NonFunctional doesn't apply
|
4476
4526
|
CompositeExplicitAutograd: ones_like
|
4477
|
-
NestedTensorCPU, NestedTensorCUDA: ones_like
|
4527
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: ones_like
|
4478
4528
|
autogen: ones_like.out
|
4479
4529
|
|
4480
4530
|
- func: pairwise_distance(Tensor x1, Tensor x2, float p=2, float eps=1e-06, bool keepdim=False) -> Tensor
|
@@ -4618,7 +4668,7 @@
|
|
4618
4668
|
variants: function, method
|
4619
4669
|
dispatch:
|
4620
4670
|
CompositeExplicitAutograd: rad2deg
|
4621
|
-
SparseCPU, SparseCUDA: rad2deg_sparse
|
4671
|
+
SparseCPU, SparseCUDA, SparseMPS: rad2deg_sparse
|
4622
4672
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr
|
4623
4673
|
tags: pointwise
|
4624
4674
|
|
@@ -4626,14 +4676,14 @@
|
|
4626
4676
|
variants: function, method
|
4627
4677
|
dispatch:
|
4628
4678
|
CompositeExplicitAutograd: rad2deg_
|
4629
|
-
SparseCPU, SparseCUDA: rad2deg_sparse_
|
4679
|
+
SparseCPU, SparseCUDA, SparseMPS: rad2deg_sparse_
|
4630
4680
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_
|
4631
4681
|
tags: pointwise
|
4632
4682
|
|
4633
4683
|
- func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
4634
4684
|
dispatch:
|
4635
4685
|
CompositeExplicitAutograd: rad2deg_out
|
4636
|
-
SparseCPU, SparseCUDA: rad2deg_sparse_out
|
4686
|
+
SparseCPU, SparseCUDA, SparseMPS: rad2deg_sparse_out
|
4637
4687
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_out
|
4638
4688
|
tags: pointwise
|
4639
4689
|
|
@@ -4641,7 +4691,7 @@
|
|
4641
4691
|
variants: function, method
|
4642
4692
|
dispatch:
|
4643
4693
|
CompositeExplicitAutograd: deg2rad
|
4644
|
-
SparseCPU, SparseCUDA: deg2rad_sparse
|
4694
|
+
SparseCPU, SparseCUDA, SparseMPS: deg2rad_sparse
|
4645
4695
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr
|
4646
4696
|
tags: pointwise
|
4647
4697
|
|
@@ -4649,14 +4699,14 @@
|
|
4649
4699
|
variants: function, method
|
4650
4700
|
dispatch:
|
4651
4701
|
CompositeExplicitAutograd: deg2rad_
|
4652
|
-
SparseCPU, SparseCUDA: deg2rad_sparse_
|
4702
|
+
SparseCPU, SparseCUDA, SparseMPS: deg2rad_sparse_
|
4653
4703
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_
|
4654
4704
|
tags: pointwise
|
4655
4705
|
|
4656
4706
|
- func: deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
4657
4707
|
dispatch:
|
4658
4708
|
CompositeExplicitAutograd: deg2rad_out
|
4659
|
-
SparseCPU, SparseCUDA: deg2rad_sparse_out
|
4709
|
+
SparseCPU, SparseCUDA, SparseMPS: deg2rad_sparse_out
|
4660
4710
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_out
|
4661
4711
|
tags: pointwise
|
4662
4712
|
|
@@ -4756,6 +4806,14 @@
|
|
4756
4806
|
CompositeExplicitAutograd: randint_like
|
4757
4807
|
autogen: randint_like.out
|
4758
4808
|
|
4809
|
+
- func: randint_like.Tensor(Tensor self, Tensor high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
4810
|
+
tags: nondeterministic_seeded
|
4811
|
+
dispatch:
|
4812
|
+
# NB: Although this composite mutates on the inside, it is
|
4813
|
+
# non-differentiable so NonFunctional doesn't apply
|
4814
|
+
CompositeExplicitAutograd: randint_like
|
4815
|
+
autogen: randint_like.Tensor_out
|
4816
|
+
|
4759
4817
|
- func: randint_like.low_dtype(Tensor self, SymInt low, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
4760
4818
|
tags: nondeterministic_seeded
|
4761
4819
|
dispatch:
|
@@ -4865,7 +4923,7 @@
|
|
4865
4923
|
structured: True
|
4866
4924
|
structured_inherits: TensorIteratorBase
|
4867
4925
|
dispatch:
|
4868
|
-
CPU, CUDA: reciprocal_out
|
4926
|
+
CPU, CUDA, MTIA: reciprocal_out
|
4869
4927
|
MPS: reciprocal_out_mps
|
4870
4928
|
tags: pointwise
|
4871
4929
|
|
@@ -4874,9 +4932,9 @@
|
|
4874
4932
|
structured_delegate: neg.out
|
4875
4933
|
variants: function, method
|
4876
4934
|
dispatch:
|
4877
|
-
SparseCPU, SparseCUDA: neg_sparse
|
4935
|
+
SparseCPU, SparseCUDA, SparseMPS: neg_sparse
|
4878
4936
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr
|
4879
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg
|
4937
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_neg
|
4880
4938
|
tags: [core, pointwise]
|
4881
4939
|
|
4882
4940
|
- func: neg_(Tensor(a!) self) -> Tensor(a!)
|
@@ -4884,9 +4942,9 @@
|
|
4884
4942
|
structured_delegate: neg.out
|
4885
4943
|
variants: function, method
|
4886
4944
|
dispatch:
|
4887
|
-
SparseCPU, SparseCUDA: neg_sparse_
|
4945
|
+
SparseCPU, SparseCUDA, SparseMPS: neg_sparse_
|
4888
4946
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_
|
4889
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg_
|
4947
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_neg_
|
4890
4948
|
tags: pointwise
|
4891
4949
|
|
4892
4950
|
- func: neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -4894,9 +4952,8 @@
|
|
4894
4952
|
structured: True
|
4895
4953
|
structured_inherits: TensorIteratorBase
|
4896
4954
|
dispatch:
|
4897
|
-
CPU, CUDA: neg_out
|
4898
|
-
|
4899
|
-
SparseCPU, SparseCUDA: neg_out_sparse
|
4955
|
+
CPU, CUDA, MPS, MTIA: neg_out
|
4956
|
+
SparseCPU, SparseCUDA, SparseMPS: neg_out_sparse
|
4900
4957
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_out
|
4901
4958
|
tags: pointwise
|
4902
4959
|
# Alias for neg
|
@@ -4957,7 +5014,7 @@
|
|
4957
5014
|
device_check: NoCheck
|
4958
5015
|
device_guard: False
|
4959
5016
|
dispatch:
|
4960
|
-
CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor, MPS: _reshape_alias
|
5017
|
+
CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor, MPS, MTIA: _reshape_alias
|
4961
5018
|
# We don't need to support mkldnn since this is handled explicitly by the reshape operator.
|
4962
5019
|
|
4963
5020
|
- func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
|
@@ -4980,7 +5037,7 @@
|
|
4980
5037
|
structured_delegate: round.out
|
4981
5038
|
variants: function, method
|
4982
5039
|
dispatch:
|
4983
|
-
SparseCPU, SparseCUDA: round_sparse
|
5040
|
+
SparseCPU, SparseCUDA, SparseMPS: round_sparse
|
4984
5041
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr
|
4985
5042
|
tags: [core, pointwise]
|
4986
5043
|
|
@@ -4989,7 +5046,7 @@
|
|
4989
5046
|
structured_delegate: round.out
|
4990
5047
|
variants: function, method
|
4991
5048
|
dispatch:
|
4992
|
-
SparseCPU, SparseCUDA: round_sparse_
|
5049
|
+
SparseCPU, SparseCUDA, SparseMPS: round_sparse_
|
4993
5050
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_
|
4994
5051
|
tags: pointwise
|
4995
5052
|
|
@@ -4999,7 +5056,7 @@
|
|
4999
5056
|
structured_inherits: TensorIteratorBase
|
5000
5057
|
dispatch:
|
5001
5058
|
CPU, CUDA, MPS: round_out
|
5002
|
-
SparseCPU, SparseCUDA: round_sparse_out
|
5059
|
+
SparseCPU, SparseCUDA, SparseMPS: round_sparse_out
|
5003
5060
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
|
5004
5061
|
tags: pointwise
|
5005
5062
|
|
@@ -5037,11 +5094,12 @@
|
|
5037
5094
|
dispatch:
|
5038
5095
|
CPU, CUDA: relu
|
5039
5096
|
MPS: relu_mps
|
5097
|
+
MTIA: relu_mtia
|
5040
5098
|
MkldnnCPU: mkldnn_relu
|
5041
5099
|
QuantizedCPU: relu_quantized_cpu
|
5042
5100
|
QuantizedCUDA: relu_quantized_cuda
|
5043
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu
|
5044
|
-
SparseCPU, SparseCUDA: relu_sparse
|
5101
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_relu
|
5102
|
+
SparseCPU, SparseCUDA, SparseMPS: relu_sparse
|
5045
5103
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr
|
5046
5104
|
tags: [core, pointwise]
|
5047
5105
|
|
@@ -5051,11 +5109,12 @@
|
|
5051
5109
|
dispatch:
|
5052
5110
|
CPU, CUDA: relu_
|
5053
5111
|
MPS: relu_mps_
|
5112
|
+
MTIA: relu_mtia_
|
5054
5113
|
MkldnnCPU: mkldnn_relu_
|
5055
5114
|
QuantizedCPU: relu_quantized_cpu_
|
5056
5115
|
QuantizedCUDA: relu_quantized_cuda_
|
5057
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu_
|
5058
|
-
SparseCPU, SparseCUDA: relu_sparse_
|
5116
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_relu_
|
5117
|
+
SparseCPU, SparseCUDA, SparseMPS: relu_sparse_
|
5059
5118
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr_
|
5060
5119
|
autogen: relu.out
|
5061
5120
|
tags: pointwise
|
@@ -5100,7 +5159,7 @@
|
|
5100
5159
|
python_module: nn
|
5101
5160
|
dispatch:
|
5102
5161
|
QuantizedCPU: gelu_quantized_cpu_
|
5103
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
|
5162
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_gelu_
|
5104
5163
|
|
5105
5164
|
- func: gelu(Tensor self, *, str approximate='none') -> Tensor
|
5106
5165
|
structured_delegate: gelu.out
|
@@ -5110,7 +5169,7 @@
|
|
5110
5169
|
MkldnnCPU: mkldnn_gelu
|
5111
5170
|
QuantizedCPU: gelu_quantized_cpu
|
5112
5171
|
QuantizedCUDA: gelu_quantized_cuda
|
5113
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu
|
5172
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_gelu
|
5114
5173
|
tags: [core, pointwise]
|
5115
5174
|
|
5116
5175
|
- func: gelu_backward.grad_input(Tensor grad_output, Tensor self, *, str approximate='none', Tensor(a!) grad_input) -> Tensor(a!)
|
@@ -5127,7 +5186,7 @@
|
|
5127
5186
|
python_module: nn
|
5128
5187
|
dispatch:
|
5129
5188
|
MkldnnCPU: mkldnn_gelu_backward
|
5130
|
-
NestedTensorCPU, NestedTensorCUDA: gelu_backwards_nested
|
5189
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: gelu_backwards_nested
|
5131
5190
|
tags: pointwise
|
5132
5191
|
|
5133
5192
|
- func: infinitely_differentiable_gelu_backward(Tensor grad, Tensor self) -> Tensor
|
@@ -5141,7 +5200,7 @@
|
|
5141
5200
|
structured_inherits: TensorIteratorBase
|
5142
5201
|
device_check: NoCheck # TensorIterator
|
5143
5202
|
dispatch:
|
5144
|
-
CPU, CUDA: hardshrink_out
|
5203
|
+
CPU, CUDA, MPS: hardshrink_out
|
5145
5204
|
|
5146
5205
|
- func: hardshrink(Tensor self, Scalar lambd=0.5) -> Tensor
|
5147
5206
|
structured_delegate: hardshrink.out
|
@@ -5153,7 +5212,7 @@
|
|
5153
5212
|
structured: True
|
5154
5213
|
structured_inherits: TensorIteratorBase
|
5155
5214
|
dispatch:
|
5156
|
-
CPU, CUDA: hardshrink_backward_out
|
5215
|
+
CPU, CUDA, MPS: hardshrink_backward_out
|
5157
5216
|
|
5158
5217
|
- func: hardshrink_backward(Tensor grad_out, Tensor self, Scalar lambd) -> Tensor
|
5159
5218
|
structured_delegate: hardshrink_backward.grad_input
|
@@ -5176,8 +5235,7 @@
|
|
5176
5235
|
structured: True
|
5177
5236
|
structured_inherits: TensorIteratorBase
|
5178
5237
|
dispatch:
|
5179
|
-
CPU, CUDA: rsqrt_out
|
5180
|
-
MPS: rsqrt_out_mps
|
5238
|
+
CPU, CUDA, MPS, MTIA: rsqrt_out
|
5181
5239
|
tags: pointwise
|
5182
5240
|
|
5183
5241
|
- func: select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)
|
@@ -5192,7 +5250,7 @@
|
|
5192
5250
|
dispatch:
|
5193
5251
|
CompositeExplicitAutograd: select_symint
|
5194
5252
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_sparse_csr
|
5195
|
-
NestedTensorCPU, NestedTensorCUDA: select_nested
|
5253
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: select_nested
|
5196
5254
|
tags: core
|
5197
5255
|
|
5198
5256
|
- func: select_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt index) -> Tensor
|
@@ -5208,7 +5266,7 @@
|
|
5208
5266
|
device_check: NoCheck
|
5209
5267
|
device_guard: False
|
5210
5268
|
dispatch:
|
5211
|
-
NestedTensorCPU, NestedTensorCUDA: _nested_select_backward_symint
|
5269
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_select_backward_symint
|
5212
5270
|
|
5213
5271
|
- func: selu(Tensor self) -> Tensor
|
5214
5272
|
device_check: NoCheck # TensorIterator
|
@@ -5233,14 +5291,14 @@
|
|
5233
5291
|
structured_delegate: silu.out
|
5234
5292
|
python_module: nn
|
5235
5293
|
dispatch:
|
5236
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
|
5294
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_silu
|
5237
5295
|
tags: pointwise
|
5238
5296
|
|
5239
5297
|
- func: silu_(Tensor(a!) self) -> Tensor(a!)
|
5240
5298
|
structured_delegate: silu.out
|
5241
5299
|
python_module: nn
|
5242
5300
|
dispatch:
|
5243
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
|
5301
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_silu_
|
5244
5302
|
tags: pointwise
|
5245
5303
|
|
5246
5304
|
- func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -5248,7 +5306,7 @@
|
|
5248
5306
|
structured_inherits: TensorIteratorBase
|
5249
5307
|
python_module: nn
|
5250
5308
|
dispatch:
|
5251
|
-
CPU, CUDA: silu_out
|
5309
|
+
CPU, CUDA, MTIA: silu_out
|
5252
5310
|
MPS: silu_out_mps
|
5253
5311
|
tags: pointwise
|
5254
5312
|
|
@@ -5266,7 +5324,7 @@
|
|
5266
5324
|
python_module: nn
|
5267
5325
|
dispatch:
|
5268
5326
|
CompositeImplicitAutograd: math_silu_backward
|
5269
|
-
NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
|
5327
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: silu_backward_nested
|
5270
5328
|
tags: pointwise
|
5271
5329
|
|
5272
5330
|
- func: mish(Tensor self) -> Tensor
|
@@ -5315,14 +5373,13 @@
|
|
5315
5373
|
structured: True
|
5316
5374
|
structured_inherits: TensorIteratorBase
|
5317
5375
|
dispatch:
|
5318
|
-
CPU, CUDA: sigmoid_out
|
5319
|
-
MPS: sigmoid_out_mps
|
5376
|
+
CPU, CUDA, MPS: sigmoid_out
|
5320
5377
|
tags: pointwise
|
5321
5378
|
|
5322
5379
|
- func: logit(Tensor self, float? eps=None) -> Tensor
|
5323
5380
|
variants: function, method
|
5324
5381
|
dispatch:
|
5325
|
-
CPU, CUDA: logit
|
5382
|
+
CPU, CUDA, MTIA: logit
|
5326
5383
|
MPS: logit_mps
|
5327
5384
|
tags: pointwise
|
5328
5385
|
|
@@ -5344,8 +5401,8 @@
|
|
5344
5401
|
variants: function, method
|
5345
5402
|
dispatch:
|
5346
5403
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr
|
5347
|
-
SparseCPU, SparseCUDA: sin_sparse
|
5348
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sin
|
5404
|
+
SparseCPU, SparseCUDA, SparseMPS: sin_sparse
|
5405
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sin
|
5349
5406
|
tags: [core, pointwise]
|
5350
5407
|
|
5351
5408
|
- func: sin_(Tensor(a!) self) -> Tensor(a!)
|
@@ -5354,7 +5411,7 @@
|
|
5354
5411
|
variants: function, method
|
5355
5412
|
dispatch:
|
5356
5413
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_
|
5357
|
-
SparseCPU, SparseCUDA: sin_sparse_
|
5414
|
+
SparseCPU, SparseCUDA, SparseMPS: sin_sparse_
|
5358
5415
|
tags: pointwise
|
5359
5416
|
|
5360
5417
|
- func: sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -5362,10 +5419,9 @@
|
|
5362
5419
|
structured: True
|
5363
5420
|
structured_inherits: TensorIteratorBase
|
5364
5421
|
dispatch:
|
5365
|
-
CPU, CUDA: sin_out
|
5366
|
-
MPS: sin_out_mps
|
5422
|
+
CPU, CUDA, MPS, MTIA: sin_out
|
5367
5423
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_out
|
5368
|
-
SparseCPU, SparseCUDA: sin_sparse_out
|
5424
|
+
SparseCPU, SparseCUDA, SparseMPS: sin_sparse_out
|
5369
5425
|
tags: pointwise
|
5370
5426
|
|
5371
5427
|
- func: sinc(Tensor self) -> Tensor
|
@@ -5390,7 +5446,7 @@
|
|
5390
5446
|
structured_delegate: sinh.out
|
5391
5447
|
variants: function, method
|
5392
5448
|
dispatch:
|
5393
|
-
SparseCPU, SparseCUDA: sinh_sparse
|
5449
|
+
SparseCPU, SparseCUDA, SparseMPS: sinh_sparse
|
5394
5450
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr
|
5395
5451
|
tags: [core, pointwise]
|
5396
5452
|
|
@@ -5399,7 +5455,7 @@
|
|
5399
5455
|
structured_delegate: sinh.out
|
5400
5456
|
variants: function, method
|
5401
5457
|
dispatch:
|
5402
|
-
SparseCPU, SparseCUDA: sinh_sparse_
|
5458
|
+
SparseCPU, SparseCUDA, SparseMPS: sinh_sparse_
|
5403
5459
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_
|
5404
5460
|
tags: pointwise
|
5405
5461
|
|
@@ -5408,9 +5464,8 @@
|
|
5408
5464
|
structured: True
|
5409
5465
|
structured_inherits: TensorIteratorBase
|
5410
5466
|
dispatch:
|
5411
|
-
CPU, CUDA: sinh_out
|
5412
|
-
|
5413
|
-
SparseCPU, SparseCUDA: sinh_sparse_out
|
5467
|
+
CPU, CUDA, MPS: sinh_out
|
5468
|
+
SparseCPU, SparseCUDA, SparseMPS: sinh_sparse_out
|
5414
5469
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_out
|
5415
5470
|
|
5416
5471
|
# Returns a copy of this `Variable` that is detached from its autograd graph.
|
@@ -5429,7 +5484,7 @@
|
|
5429
5484
|
variants: function, method
|
5430
5485
|
dispatch:
|
5431
5486
|
CompositeExplicitAutograd: detach
|
5432
|
-
NestedTensorCPU, NestedTensorCUDA: detach
|
5487
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: detach
|
5433
5488
|
|
5434
5489
|
# Like `detach()`, but modifies this `Variable` in-place. This method may
|
5435
5490
|
# only be called on non-view `Variable`s. You can use `is_view()` to check
|
@@ -5458,6 +5513,13 @@
|
|
5458
5513
|
tags: core
|
5459
5514
|
manual_cpp_binding: True
|
5460
5515
|
|
5516
|
+
- func: sym_is_contiguous(Tensor self, MemoryFormat memory_format=contiguous_format) -> SymBool
|
5517
|
+
variants: function
|
5518
|
+
device_check: NoCheck
|
5519
|
+
device_guard: False
|
5520
|
+
tags: core
|
5521
|
+
manual_cpp_binding: True
|
5522
|
+
|
5461
5523
|
- func: sym_numel(Tensor self) -> SymInt
|
5462
5524
|
variants: function
|
5463
5525
|
device_check: NoCheck
|
@@ -5559,7 +5621,7 @@
|
|
5559
5621
|
structured_delegate: _softmax.out
|
5560
5622
|
dispatch:
|
5561
5623
|
MkldnnCPU: mkldnn_softmax
|
5562
|
-
NestedTensorCPU, NestedTensorCUDA: softmax_nested
|
5624
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: softmax_nested
|
5563
5625
|
tags: core
|
5564
5626
|
|
5565
5627
|
- func: _softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -5572,7 +5634,7 @@
|
|
5572
5634
|
- func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
|
5573
5635
|
structured_delegate: _softmax_backward_data.out
|
5574
5636
|
dispatch:
|
5575
|
-
NestedTensorCPU, NestedTensorCUDA: nested_softmax_backward
|
5637
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_softmax_backward
|
5576
5638
|
|
5577
5639
|
- func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!)
|
5578
5640
|
structured: True
|
@@ -5616,7 +5678,7 @@
|
|
5616
5678
|
device_guard: False
|
5617
5679
|
dispatch:
|
5618
5680
|
CompositeExplicitAutograd: split_with_sizes
|
5619
|
-
NestedTensorCPU, NestedTensorCUDA: split_with_sizes_nested
|
5681
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: split_with_sizes_nested
|
5620
5682
|
tags: core
|
5621
5683
|
|
5622
5684
|
- func: hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
|
@@ -5644,7 +5706,7 @@
|
|
5644
5706
|
dispatch:
|
5645
5707
|
CompositeExplicitAutograd: squeeze
|
5646
5708
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
5647
|
-
NestedTensorCPU, NestedTensorCUDA: squeeze_nested
|
5709
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_nested
|
5648
5710
|
|
5649
5711
|
- func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
|
5650
5712
|
variants: function, method
|
@@ -5653,7 +5715,7 @@
|
|
5653
5715
|
dispatch:
|
5654
5716
|
CompositeExplicitAutograd: squeeze
|
5655
5717
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
5656
|
-
NestedTensorCPU, NestedTensorCUDA: squeeze_dim_nested
|
5718
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_dim_nested
|
5657
5719
|
tags: core
|
5658
5720
|
|
5659
5721
|
- func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
|
@@ -5669,7 +5731,7 @@
|
|
5669
5731
|
dispatch:
|
5670
5732
|
CompositeExplicitAutograd: squeeze
|
5671
5733
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
5672
|
-
NestedTensorCPU, NestedTensorCUDA: squeeze_dim_nested
|
5734
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_dim_nested
|
5673
5735
|
tags: core
|
5674
5736
|
|
5675
5737
|
- func: squeeze_(Tensor(a!) self) -> Tensor(a!)
|
@@ -5831,6 +5893,15 @@
|
|
5831
5893
|
CPU, CUDA: nansum_out
|
5832
5894
|
MPS: nansum_out_mps
|
5833
5895
|
|
5896
|
+
- func: hash_tensor(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0) -> Tensor
|
5897
|
+
variants: function, method
|
5898
|
+
structured_delegate: hash_tensor.out
|
5899
|
+
|
5900
|
+
- func: hash_tensor.out(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0, Tensor(a!) out) -> Tensor(a!)
|
5901
|
+
structured: True
|
5902
|
+
dispatch:
|
5903
|
+
CPU, CUDA: hash_tensor_out
|
5904
|
+
|
5834
5905
|
- func: sum_to_size(Tensor self, SymInt[] size) -> Tensor
|
5835
5906
|
variants: method
|
5836
5907
|
device_check: NoCheck
|
@@ -5843,8 +5914,8 @@
|
|
5843
5914
|
structured_delegate: sqrt.out
|
5844
5915
|
variants: function, method
|
5845
5916
|
dispatch:
|
5846
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sqrt
|
5847
|
-
SparseCPU, SparseCUDA: sqrt_sparse
|
5917
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sqrt
|
5918
|
+
SparseCPU, SparseCUDA, SparseMPS: sqrt_sparse
|
5848
5919
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr
|
5849
5920
|
tags: [core, pointwise]
|
5850
5921
|
|
@@ -5853,7 +5924,7 @@
|
|
5853
5924
|
structured_delegate: sqrt.out
|
5854
5925
|
variants: function, method
|
5855
5926
|
dispatch:
|
5856
|
-
SparseCPU, SparseCUDA: sqrt_sparse_
|
5927
|
+
SparseCPU, SparseCUDA, SparseMPS: sqrt_sparse_
|
5857
5928
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_
|
5858
5929
|
tags: pointwise
|
5859
5930
|
|
@@ -5862,8 +5933,8 @@
|
|
5862
5933
|
structured: True
|
5863
5934
|
structured_inherits: TensorIteratorBase
|
5864
5935
|
dispatch:
|
5865
|
-
CPU, CUDA, MPS: sqrt_out
|
5866
|
-
SparseCPU, SparseCUDA: sqrt_sparse_out
|
5936
|
+
CPU, CUDA, MPS, MTIA: sqrt_out
|
5937
|
+
SparseCPU, SparseCUDA, SparseMPS: sqrt_sparse_out
|
5867
5938
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
|
5868
5939
|
tags: pointwise
|
5869
5940
|
|
@@ -6001,7 +6072,7 @@
|
|
6001
6072
|
structured_delegate: tan.out
|
6002
6073
|
variants: function, method
|
6003
6074
|
dispatch:
|
6004
|
-
SparseCPU, SparseCUDA: tan_sparse
|
6075
|
+
SparseCPU, SparseCUDA, SparseMPS: tan_sparse
|
6005
6076
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr
|
6006
6077
|
tags: [core, pointwise]
|
6007
6078
|
|
@@ -6010,7 +6081,7 @@
|
|
6010
6081
|
structured_delegate: tan.out
|
6011
6082
|
variants: function, method
|
6012
6083
|
dispatch:
|
6013
|
-
SparseCPU, SparseCUDA: tan_sparse_
|
6084
|
+
SparseCPU, SparseCUDA, SparseMPS: tan_sparse_
|
6014
6085
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_
|
6015
6086
|
tags: pointwise
|
6016
6087
|
|
@@ -6019,9 +6090,8 @@
|
|
6019
6090
|
structured: True
|
6020
6091
|
structured_inherits: TensorIteratorBase
|
6021
6092
|
dispatch:
|
6022
|
-
CPU, CUDA: tan_out
|
6023
|
-
|
6024
|
-
SparseCPU, SparseCUDA: tan_sparse_out
|
6093
|
+
CPU, CUDA, MPS: tan_out
|
6094
|
+
SparseCPU, SparseCUDA, SparseMPS: tan_sparse_out
|
6025
6095
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_out
|
6026
6096
|
tags: pointwise
|
6027
6097
|
|
@@ -6032,9 +6102,9 @@
|
|
6032
6102
|
dispatch:
|
6033
6103
|
QuantizedCPU: tanh_quantized_cpu
|
6034
6104
|
MkldnnCPU: mkldnn_tanh
|
6035
|
-
SparseCPU, SparseCUDA: tanh_sparse
|
6105
|
+
SparseCPU, SparseCUDA, SparseMPS: tanh_sparse
|
6036
6106
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr
|
6037
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh
|
6107
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_tanh
|
6038
6108
|
tags: [core, pointwise]
|
6039
6109
|
|
6040
6110
|
- func: tanh_(Tensor(a!) self) -> Tensor(a!)
|
@@ -6043,9 +6113,9 @@
|
|
6043
6113
|
variants: function, method
|
6044
6114
|
dispatch:
|
6045
6115
|
MkldnnCPU: mkldnn_tanh_
|
6046
|
-
SparseCPU, SparseCUDA: tanh_sparse_
|
6116
|
+
SparseCPU, SparseCUDA, SparseMPS: tanh_sparse_
|
6047
6117
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_
|
6048
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh_
|
6118
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_tanh_
|
6049
6119
|
tags: pointwise
|
6050
6120
|
|
6051
6121
|
- func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -6053,8 +6123,8 @@
|
|
6053
6123
|
structured: True
|
6054
6124
|
structured_inherits: TensorIteratorBase
|
6055
6125
|
dispatch:
|
6056
|
-
CPU, CUDA, MPS: tanh_out
|
6057
|
-
SparseCPU, SparseCUDA: tanh_sparse_out
|
6126
|
+
CPU, CUDA, MPS, MTIA: tanh_out
|
6127
|
+
SparseCPU, SparseCUDA, SparseMPS: tanh_sparse_out
|
6058
6128
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
|
6059
6129
|
tags: pointwise
|
6060
6130
|
|
@@ -6102,7 +6172,7 @@
|
|
6102
6172
|
MkldnnCPU: mkldnn_relu_backward
|
6103
6173
|
SparseCPU, SparseCUDA: threshold_backward_sparse
|
6104
6174
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed
|
6105
|
-
NestedTensorCPU, NestedTensorCUDA: threshold_backwards_nested
|
6175
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: threshold_backwards_nested
|
6106
6176
|
tags: pointwise
|
6107
6177
|
|
6108
6178
|
- func: tile(Tensor self, SymInt[] dims) -> Tensor
|
@@ -6116,7 +6186,7 @@
|
|
6116
6186
|
device_guard: False
|
6117
6187
|
dispatch:
|
6118
6188
|
CompositeExplicitAutograd: transpose
|
6119
|
-
NestedTensorCPU, NestedTensorCUDA: transpose_nested
|
6189
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: transpose_nested
|
6120
6190
|
|
6121
6191
|
- func: transpose.Dimname(Tensor(a) self, Dimname dim0, Dimname dim1) -> Tensor(a)
|
6122
6192
|
variants: function, method
|
@@ -6213,13 +6283,13 @@
|
|
6213
6283
|
- func: _nested_tensor_size(Tensor self) -> Tensor
|
6214
6284
|
variants: method
|
6215
6285
|
dispatch:
|
6216
|
-
NestedTensorCPU, NestedTensorCUDA: _nested_tensor_size
|
6286
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_tensor_size
|
6217
6287
|
autogen: _nested_tensor_size.out
|
6218
6288
|
|
6219
6289
|
- func: _nested_tensor_strides(Tensor self) -> Tensor
|
6220
6290
|
variants: method
|
6221
6291
|
dispatch:
|
6222
|
-
NestedTensorCPU, NestedTensorCUDA: _nested_tensor_strides
|
6292
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_tensor_strides
|
6223
6293
|
autogen: _nested_tensor_strides.out
|
6224
6294
|
|
6225
6295
|
- func: _nested_tensor_storage_offsets(Tensor self) -> Tensor
|
@@ -6232,7 +6302,7 @@
|
|
6232
6302
|
# _nested_from_padded_and_nested_example is available for testing.
|
6233
6303
|
- func: _nested_from_padded_and_nested_example(Tensor padded, Tensor nt_example) -> Tensor
|
6234
6304
|
dispatch:
|
6235
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_from_padded_and_nested_example
|
6305
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_from_padded_and_nested_example
|
6236
6306
|
autogen: _nested_from_padded_and_nested_example.out
|
6237
6307
|
|
6238
6308
|
# The input arguments' types to this functions are temporary. When nested tensors switch to using SymInts for their metadata representation
|
@@ -6326,8 +6396,8 @@
|
|
6326
6396
|
device_check: NoCheck # TensorIterator
|
6327
6397
|
variants: function, method
|
6328
6398
|
dispatch:
|
6329
|
-
SparseCPU, SparseCUDA: trunc_sparse
|
6330
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr
|
6399
|
+
SparseCPU, SparseCUDA, SparseMPS: trunc_sparse
|
6400
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: trunc_sparse_csr
|
6331
6401
|
tags: [core, pointwise]
|
6332
6402
|
|
6333
6403
|
- func: trunc_(Tensor(a!) self) -> Tensor(a!)
|
@@ -6335,8 +6405,8 @@
|
|
6335
6405
|
device_check: NoCheck # TensorIterator
|
6336
6406
|
variants: function, method
|
6337
6407
|
dispatch:
|
6338
|
-
SparseCPU, SparseCUDA: trunc_sparse_
|
6339
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_
|
6408
|
+
SparseCPU, SparseCUDA, SparseMPS: trunc_sparse_
|
6409
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: trunc_sparse_csr_
|
6340
6410
|
tags: pointwise
|
6341
6411
|
|
6342
6412
|
- func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -6345,8 +6415,8 @@
|
|
6345
6415
|
device_check: NoCheck # TensorIterator
|
6346
6416
|
dispatch:
|
6347
6417
|
CPU, CUDA, MPS: trunc_out
|
6348
|
-
SparseCPU, SparseCUDA: trunc_sparse_out
|
6349
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
|
6418
|
+
SparseCPU, SparseCUDA, SparseMPS: trunc_sparse_out
|
6419
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: trunc_sparse_csr_out
|
6350
6420
|
tags: pointwise
|
6351
6421
|
# Alias for trunc
|
6352
6422
|
|
@@ -6423,7 +6493,7 @@
|
|
6423
6493
|
CompositeExplicitAutograd: unsqueeze
|
6424
6494
|
SparseCPU, SparseCUDA: unsqueeze_sparse
|
6425
6495
|
QuantizedCPU, QuantizedCUDA: unsqueeze_quantized
|
6426
|
-
NestedTensorCPU, NestedTensorCUDA: unsqueeze_nested
|
6496
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: unsqueeze_nested
|
6427
6497
|
tags: core
|
6428
6498
|
|
6429
6499
|
- func: unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)
|
@@ -6517,15 +6587,15 @@
|
|
6517
6587
|
device_check: NoCheck # TensorIterator
|
6518
6588
|
variants: function, method
|
6519
6589
|
dispatch:
|
6520
|
-
CPU, CUDA, MPS: where
|
6521
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_where
|
6590
|
+
CPU, CUDA, MPS, MTIA: where
|
6591
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_where
|
6522
6592
|
tags: [core, pointwise]
|
6523
6593
|
|
6524
6594
|
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
6525
6595
|
device_check: NoCheck # TensorIterator
|
6526
6596
|
dispatch:
|
6527
|
-
CPU, CUDA, MPS: where_self_out
|
6528
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_where_out
|
6597
|
+
CPU, CUDA, MPS, MTIA: where_self_out
|
6598
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_where_out
|
6529
6599
|
|
6530
6600
|
- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
|
6531
6601
|
variants: function
|
@@ -6856,11 +6926,11 @@
|
|
6856
6926
|
variants: function, method
|
6857
6927
|
dispatch:
|
6858
6928
|
CompositeExplicitAutograd: clone
|
6859
|
-
SparseCPU, SparseCUDA: clone_sparse
|
6929
|
+
SparseCPU, SparseCUDA, SparseMPS: clone_sparse
|
6860
6930
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: clone_sparse_compressed
|
6861
6931
|
MkldnnCPU: mkldnn_clone
|
6862
6932
|
QuantizedCPU, QuantizedCUDA: quantized_clone
|
6863
|
-
NestedTensorCPU, NestedTensorCUDA: clone_nested
|
6933
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: clone_nested
|
6864
6934
|
autogen: clone.out
|
6865
6935
|
tags: [core, pointwise]
|
6866
6936
|
|
@@ -6891,10 +6961,10 @@
|
|
6891
6961
|
CPU, CUDA: zero_
|
6892
6962
|
MPS: zero_mps_
|
6893
6963
|
Meta: zero_meta_
|
6894
|
-
SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
|
6964
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: zero_sparse_
|
6895
6965
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
|
6896
6966
|
MkldnnCPU: mkldnn_zero_
|
6897
|
-
NestedTensorCPU, NestedTensorCUDA: zero_nested_
|
6967
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: zero_nested_
|
6898
6968
|
autogen: zero, zero.out
|
6899
6969
|
|
6900
6970
|
- func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
@@ -6904,6 +6974,7 @@
|
|
6904
6974
|
dispatch:
|
6905
6975
|
CPU, CUDA: sub_out
|
6906
6976
|
MPS: sub_out_mps
|
6977
|
+
MTIA: sub_out_mtia
|
6907
6978
|
SparseCPU, SparseCUDA: sub_out_sparse
|
6908
6979
|
tags: pointwise
|
6909
6980
|
|
@@ -6914,7 +6985,7 @@
|
|
6914
6985
|
dispatch:
|
6915
6986
|
SparseCPU, SparseCUDA: sub_sparse
|
6916
6987
|
ZeroTensor: sub_zerotensor
|
6917
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sub_Tensor
|
6988
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sub_Tensor
|
6918
6989
|
tags: [core, pointwise]
|
6919
6990
|
|
6920
6991
|
- func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
@@ -6961,7 +7032,7 @@
|
|
6961
7032
|
device_check: NoCheck # TensorIterator
|
6962
7033
|
variants: function
|
6963
7034
|
dispatch:
|
6964
|
-
CPU, CUDA: rsub
|
7035
|
+
CPU, CUDA, MPS, MTIA: rsub
|
6965
7036
|
autogen: rsub.Tensor_out
|
6966
7037
|
|
6967
7038
|
- func: heaviside.out(Tensor self, Tensor values, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -7029,6 +7100,7 @@
|
|
7029
7100
|
CUDA: addmm_out_cuda
|
7030
7101
|
MPS: addmm_out_mps
|
7031
7102
|
XPU: addmm_out_xpu
|
7103
|
+
MTIA: addmm_out_mtia
|
7032
7104
|
SparseCPU: addmm_out_sparse_dense_cpu
|
7033
7105
|
SparseCUDA: addmm_out_sparse_dense_cuda
|
7034
7106
|
SparseCsrCPU: addmm_out_sparse_compressed_cpu
|
@@ -7043,6 +7115,14 @@
|
|
7043
7115
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: addmm_sparse_compressed_dense
|
7044
7116
|
tags: core
|
7045
7117
|
|
7118
|
+
- func: addmm.dtype(Tensor self, Tensor mat1, Tensor mat2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
7119
|
+
dispatch:
|
7120
|
+
CUDA: _addmm_dtype_cuda
|
7121
|
+
|
7122
|
+
- func: addmm.dtype_out(Tensor self, Tensor mat1, Tensor mat2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
7123
|
+
dispatch:
|
7124
|
+
CUDA: _addmm_dtype_out_cuda
|
7125
|
+
|
7046
7126
|
- func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
|
7047
7127
|
structured_delegate: addmm.out
|
7048
7128
|
variants: method
|
@@ -7066,18 +7146,29 @@
|
|
7066
7146
|
- func: _scaled_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
|
7067
7147
|
variants: function
|
7068
7148
|
dispatch:
|
7149
|
+
CPU: _scaled_mm_cpu
|
7069
7150
|
CUDA: _scaled_mm_cuda
|
7151
|
+
tags: needs_exact_strides
|
7070
7152
|
|
7071
7153
|
- func: _scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
|
7072
7154
|
variants: function
|
7073
7155
|
dispatch:
|
7156
|
+
CPU: _scaled_mm_out_cpu
|
7074
7157
|
CUDA: _scaled_mm_out_cuda
|
7158
|
+
tags: needs_exact_strides
|
7075
7159
|
|
7076
7160
|
|
7077
7161
|
- func: _scaled_grouped_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? offs=None, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
|
7078
7162
|
variants: function
|
7079
7163
|
dispatch:
|
7080
7164
|
CUDA: _scaled_grouped_mm_cuda
|
7165
|
+
tags: needs_exact_strides
|
7166
|
+
|
7167
|
+
- func: _grouped_mm(Tensor self, Tensor mat2, Tensor? offs=None, Tensor? bias=None, ScalarType? out_dtype=None) -> Tensor
|
7168
|
+
variants: function
|
7169
|
+
dispatch:
|
7170
|
+
CompositeExplicitAutograd: _grouped_mm
|
7171
|
+
CUDA: _grouped_mm_cuda
|
7081
7172
|
|
7082
7173
|
# NOTE [ Sparse: autograd and API ]
|
7083
7174
|
#
|
@@ -7233,36 +7324,36 @@
|
|
7233
7324
|
dispatch:
|
7234
7325
|
CompositeImplicitAutograd: _sparse_coo_tensor_unsafe_symint
|
7235
7326
|
|
7236
|
-
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None) -> ()
|
7327
|
+
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None, bool? check_pinning=None) -> ()
|
7237
7328
|
|
7238
|
-
- func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout) -> ()
|
7239
|
-
- func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
7240
|
-
- func: _validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
|
7241
|
-
- func: _validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
7242
|
-
- func: _validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
|
7329
|
+
- func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout, bool? check_pinning=None) -> ()
|
7330
|
+
- func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7331
|
+
- func: _validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7332
|
+
- func: _validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7333
|
+
- func: _validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7243
7334
|
|
7244
7335
|
- func: _sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
7245
7336
|
dispatch:
|
7246
|
-
SparseCPU, SparseCUDA, SparseMeta, Meta: new_with_dims_sparse
|
7337
|
+
SparseCPU, SparseCUDA, SparseMeta, SparseMPS, Meta: new_with_dims_sparse
|
7247
7338
|
autogen: _sparse_coo_tensor_with_dims.out
|
7248
7339
|
|
7249
7340
|
- func: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor
|
7250
7341
|
dispatch:
|
7251
|
-
SparseCPU, SparseCUDA, SparseMeta, Meta: new_with_dims_and_tensor_sparse_symint
|
7342
|
+
SparseCPU, SparseCUDA, SparseMeta, SparseMPS, Meta: new_with_dims_and_tensor_sparse_symint
|
7252
7343
|
autogen: _sparse_coo_tensor_with_dims_and_tensors.out
|
7253
7344
|
|
7254
7345
|
- func: sparse_resize_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)
|
7255
7346
|
use_const_ref_for_mutable_tensors: True
|
7256
7347
|
variants: method
|
7257
7348
|
dispatch:
|
7258
|
-
SparseCPU, SparseCUDA, SparseMeta: sparse_resize_
|
7349
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: sparse_resize_
|
7259
7350
|
autogen: sparse_resize, sparse_resize.out
|
7260
7351
|
|
7261
7352
|
- func: sparse_resize_and_clear_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)
|
7262
7353
|
use_const_ref_for_mutable_tensors: True
|
7263
7354
|
variants: method
|
7264
7355
|
dispatch:
|
7265
|
-
SparseCPU, SparseCUDA, SparseMeta: sparse_resize_and_clear_
|
7356
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: sparse_resize_and_clear_
|
7266
7357
|
autogen: sparse_resize_and_clear, sparse_resize_and_clear.out
|
7267
7358
|
|
7268
7359
|
- func: sparse_mask(Tensor self, Tensor mask) -> Tensor
|
@@ -7288,8 +7379,8 @@
|
|
7288
7379
|
- func: _to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor
|
7289
7380
|
variants: method
|
7290
7381
|
dispatch:
|
7291
|
-
SparseCPU, SparseCUDA: sparse_to_dense
|
7292
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_dense
|
7382
|
+
SparseCPU, SparseCUDA, SparseMPS: sparse_to_dense
|
7383
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: sparse_compressed_to_dense
|
7293
7384
|
MkldnnCPU: mkldnn_to_dense
|
7294
7385
|
autogen: _to_dense.out
|
7295
7386
|
|
@@ -7298,8 +7389,8 @@
|
|
7298
7389
|
- func: sparse_dim(Tensor self) -> int
|
7299
7390
|
variants: method
|
7300
7391
|
dispatch:
|
7301
|
-
SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
|
7302
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
|
7392
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: sparse_dim_sparse
|
7393
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: sparse_dim_sparse_csr
|
7303
7394
|
CompositeExplicitAutograd: sparse_dim_default
|
7304
7395
|
device_check: NoCheck
|
7305
7396
|
device_guard: False
|
@@ -7315,8 +7406,8 @@
|
|
7315
7406
|
- func: dense_dim(Tensor self) -> int
|
7316
7407
|
variants: method
|
7317
7408
|
dispatch:
|
7318
|
-
SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
|
7319
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
|
7409
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: dense_dim_sparse
|
7410
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: dense_dim_sparse_csr
|
7320
7411
|
CompositeExplicitAutograd: dense_dim_default
|
7321
7412
|
device_check: NoCheck
|
7322
7413
|
device_guard: False
|
@@ -7332,8 +7423,8 @@
|
|
7332
7423
|
- func: _nnz(Tensor self) -> int
|
7333
7424
|
variants: method
|
7334
7425
|
dispatch:
|
7335
|
-
SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
|
7336
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
|
7426
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _nnz_sparse
|
7427
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: _nnz_sparse_csr
|
7337
7428
|
device_check: NoCheck
|
7338
7429
|
device_guard: False
|
7339
7430
|
|
@@ -7349,12 +7440,13 @@
|
|
7349
7440
|
dispatch:
|
7350
7441
|
SparseCPU: _coalesce_sparse_cpu
|
7351
7442
|
SparseCUDA: _coalesce_sparse_cuda
|
7443
|
+
SparseMPS: _coalesce_sparse_mps
|
7352
7444
|
autogen: _coalesce.out
|
7353
7445
|
|
7354
7446
|
- func: is_coalesced(Tensor self) -> bool
|
7355
7447
|
variants: method
|
7356
7448
|
dispatch:
|
7357
|
-
SparseCPU, SparseCUDA, SparseMeta: is_coalesced_sparse
|
7449
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: is_coalesced_sparse
|
7358
7450
|
CompositeExplicitAutograd: is_coalesced_default
|
7359
7451
|
device_check: NoCheck
|
7360
7452
|
device_guard: False
|
@@ -7362,14 +7454,14 @@
|
|
7362
7454
|
- func: _indices(Tensor(a) self) -> Tensor(a)
|
7363
7455
|
variants: method
|
7364
7456
|
dispatch:
|
7365
|
-
SparseCPU, SparseCUDA, SparseMeta: _indices_sparse
|
7457
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _indices_sparse
|
7366
7458
|
device_check: NoCheck
|
7367
7459
|
device_guard: False
|
7368
7460
|
|
7369
7461
|
- func: _values(Tensor(a) self) -> Tensor(a)
|
7370
7462
|
variants: method
|
7371
7463
|
dispatch:
|
7372
|
-
SparseCPU, SparseCUDA, SparseMeta: _values_sparse
|
7464
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _values_sparse
|
7373
7465
|
device_check: NoCheck
|
7374
7466
|
device_guard: False
|
7375
7467
|
|
@@ -7379,7 +7471,7 @@
|
|
7379
7471
|
- func: _coalesced_(Tensor(a!) self, bool coalesced) -> Tensor(a!)
|
7380
7472
|
variants: method
|
7381
7473
|
dispatch:
|
7382
|
-
SparseCPU, SparseCUDA, SparseMeta: _coalesced_sparse_
|
7474
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _coalesced_sparse_
|
7383
7475
|
device_check: NoCheck
|
7384
7476
|
device_guard: False
|
7385
7477
|
autogen: _coalesced, _coalesced.out
|
@@ -7387,7 +7479,7 @@
|
|
7387
7479
|
- func: indices(Tensor(a) self) -> Tensor(a)
|
7388
7480
|
variants: method
|
7389
7481
|
dispatch:
|
7390
|
-
SparseCPU, SparseCUDA, SparseMeta: indices_sparse
|
7482
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: indices_sparse
|
7391
7483
|
CompositeExplicitAutograd: indices_default
|
7392
7484
|
device_check: NoCheck
|
7393
7485
|
device_guard: False
|
@@ -7395,9 +7487,9 @@
|
|
7395
7487
|
- func: values(Tensor(a) self) -> Tensor(a)
|
7396
7488
|
variants: method
|
7397
7489
|
dispatch:
|
7398
|
-
SparseCPU, SparseCUDA, SparseMeta: values_sparse
|
7490
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: values_sparse
|
7399
7491
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
|
7400
|
-
NestedTensorCPU, NestedTensorCUDA: values_nested
|
7492
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: values_nested
|
7401
7493
|
CompositeExplicitAutograd: values_default
|
7402
7494
|
device_check: NoCheck
|
7403
7495
|
device_guard: False
|
@@ -7448,7 +7540,7 @@
|
|
7448
7540
|
device_check: NoCheck # Allows copy into different device
|
7449
7541
|
variants: function
|
7450
7542
|
dispatch:
|
7451
|
-
SparseCPU, SparseCUDA, SparseMeta: copy_sparse_
|
7543
|
+
SparseCPU, SparseCUDA, SparseMPS, SparseMeta: copy_sparse_
|
7452
7544
|
autogen: copy_sparse_to_sparse, copy_sparse_to_sparse.out
|
7453
7545
|
|
7454
7546
|
# By adding the AutogradNestedTensor this makes this function CompositeImplicit-like for nested tensors
|
@@ -7456,7 +7548,7 @@
|
|
7456
7548
|
variants: function, method
|
7457
7549
|
dispatch:
|
7458
7550
|
CompositeExplicitAutograd: unbind
|
7459
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
|
7551
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_unbind
|
7460
7552
|
|
7461
7553
|
- func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
|
7462
7554
|
variants: function, method
|
@@ -7468,9 +7560,9 @@
|
|
7468
7560
|
- func: _to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
|
7469
7561
|
variants: method
|
7470
7562
|
dispatch:
|
7471
|
-
CPU, CUDA: dense_to_sparse
|
7472
|
-
SparseCPU, SparseCUDA: sparse_coo_to_sparse
|
7473
|
-
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
|
7563
|
+
CPU, CUDA, MPS: dense_to_sparse
|
7564
|
+
SparseCPU, SparseCUDA, SparseMPS: sparse_coo_to_sparse
|
7565
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta, SparseCsrMPS: sparse_compressed_to_sparse
|
7474
7566
|
autogen: _to_sparse.sparse_dim_out
|
7475
7567
|
|
7476
7568
|
- func: to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
|
@@ -7480,8 +7572,8 @@
|
|
7480
7572
|
- func: _to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
|
7481
7573
|
variants: method
|
7482
7574
|
dispatch:
|
7483
|
-
CPU, CUDA: dense_to_sparse
|
7484
|
-
SparseCPU, SparseCUDA: sparse_coo_to_sparse
|
7575
|
+
CPU, CUDA, MPS: dense_to_sparse
|
7576
|
+
SparseCPU, SparseCUDA, SparseMPS: sparse_coo_to_sparse
|
7485
7577
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
|
7486
7578
|
autogen: _to_sparse.out
|
7487
7579
|
|
@@ -7744,7 +7836,7 @@
|
|
7744
7836
|
device_guard: False
|
7745
7837
|
dispatch:
|
7746
7838
|
CompositeExplicitAutograd: _to_copy
|
7747
|
-
NestedTensorCPU, NestedTensorCUDA: _to_copy_nested
|
7839
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _to_copy_nested
|
7748
7840
|
autogen: _to_copy.out
|
7749
7841
|
tags: core
|
7750
7842
|
|
@@ -8030,7 +8122,7 @@
|
|
8030
8122
|
variants: function, method
|
8031
8123
|
dispatch:
|
8032
8124
|
CompositeExplicitAutograd: masked_fill
|
8033
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_masked_fill
|
8125
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_masked_fill
|
8034
8126
|
tags: pointwise
|
8035
8127
|
|
8036
8128
|
- func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
|
@@ -8085,9 +8177,9 @@
|
|
8085
8177
|
device_check: NoCheck
|
8086
8178
|
device_guard: False
|
8087
8179
|
dispatch:
|
8088
|
-
ZeroTensor, Meta, CPU, CUDA, QuantizedCPU, QuantizedCUDA, MPS: view
|
8180
|
+
ZeroTensor, Meta, CPU, CUDA, QuantizedCPU, QuantizedCUDA, MPS, MTIA: view
|
8089
8181
|
MkldnnCPU: mkldnn_view
|
8090
|
-
NestedTensorCPU, NestedTensorCUDA: view_nested
|
8182
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: view_nested
|
8091
8183
|
tags: core
|
8092
8184
|
|
8093
8185
|
# Warning: If you want to change the name or overload name of this
|
@@ -8315,7 +8407,7 @@
|
|
8315
8407
|
structured_inherits: TensorIteratorBase
|
8316
8408
|
variants: function
|
8317
8409
|
dispatch:
|
8318
|
-
CPU, CUDA: bitwise_and_out
|
8410
|
+
CPU, CUDA, MTIA: bitwise_and_out
|
8319
8411
|
MPS: bitwise_and_out_mps
|
8320
8412
|
tags: pointwise
|
8321
8413
|
|
@@ -8382,7 +8474,7 @@
|
|
8382
8474
|
structured_inherits: TensorIteratorBase
|
8383
8475
|
variants: function
|
8384
8476
|
dispatch:
|
8385
|
-
CPU, CUDA: bitwise_or_out
|
8477
|
+
CPU, CUDA, MTIA: bitwise_or_out
|
8386
8478
|
MPS: bitwise_or_out_mps
|
8387
8479
|
tags: pointwise
|
8388
8480
|
|
@@ -8854,7 +8946,7 @@
|
|
8854
8946
|
structured_inherits: TensorIteratorBase
|
8855
8947
|
device_check: NoCheck # TensorIterator
|
8856
8948
|
dispatch:
|
8857
|
-
CPU, CUDA: ne_Scalar_out
|
8949
|
+
CPU, CUDA, MTIA: ne_Scalar_out
|
8858
8950
|
MPS: ne_scalar_out_mps
|
8859
8951
|
QuantizedCPU: ne_out_quantized_cpu
|
8860
8952
|
tags: pointwise
|
@@ -8872,7 +8964,7 @@
|
|
8872
8964
|
structured_inherits: TensorIteratorBase
|
8873
8965
|
device_check: NoCheck # TensorIterator
|
8874
8966
|
dispatch:
|
8875
|
-
CPU, CUDA: ne_Tensor_out
|
8967
|
+
CPU, CUDA, MTIA: ne_Tensor_out
|
8876
8968
|
MPS: ne_tensor_out_mps
|
8877
8969
|
QuantizedCPU: ne_out_quantized_cpu
|
8878
8970
|
tags: pointwise
|
@@ -8917,7 +9009,7 @@
|
|
8917
9009
|
structured_inherits: TensorIteratorBase
|
8918
9010
|
device_check: NoCheck # TensorIterator
|
8919
9011
|
dispatch:
|
8920
|
-
CPU, CUDA: eq_Scalar_out
|
9012
|
+
CPU, CUDA, MTIA: eq_Scalar_out
|
8921
9013
|
MPS: eq_scalar_out_mps
|
8922
9014
|
QuantizedCPU: eq_out_quantized_cpu
|
8923
9015
|
tags: pointwise
|
@@ -8928,7 +9020,7 @@
|
|
8928
9020
|
variants: method, function
|
8929
9021
|
dispatch:
|
8930
9022
|
QuantizedCPU: eq_quantized_cpu
|
8931
|
-
NestedTensorCPU, NestedTensorCUDA: eq_scalar_nested
|
9023
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: eq_scalar_nested
|
8932
9024
|
tags: [core, pointwise]
|
8933
9025
|
|
8934
9026
|
- func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8936,7 +9028,7 @@
|
|
8936
9028
|
structured_inherits: TensorIteratorBase
|
8937
9029
|
device_check: NoCheck # TensorIterator
|
8938
9030
|
dispatch:
|
8939
|
-
CPU, CUDA: eq_Tensor_out
|
9031
|
+
CPU, CUDA, MTIA: eq_Tensor_out
|
8940
9032
|
MPS: eq_tensor_out_mps
|
8941
9033
|
QuantizedCPU: eq_out_quantized_cpu
|
8942
9034
|
tags: pointwise
|
@@ -8947,7 +9039,7 @@
|
|
8947
9039
|
variants: method, function
|
8948
9040
|
dispatch:
|
8949
9041
|
QuantizedCPU: eq_quantized_cpu
|
8950
|
-
NestedTensorCPU, NestedTensorCUDA: eq_tensor_nested
|
9042
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: eq_tensor_nested
|
8951
9043
|
tags: [core, pointwise]
|
8952
9044
|
|
8953
9045
|
- func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8955,7 +9047,7 @@
|
|
8955
9047
|
structured_inherits: TensorIteratorBase
|
8956
9048
|
device_check: NoCheck # TensorIterator
|
8957
9049
|
dispatch:
|
8958
|
-
CPU, CUDA: ge_Scalar_out
|
9050
|
+
CPU, CUDA, MTIA: ge_Scalar_out
|
8959
9051
|
MPS: ge_scalar_out_mps
|
8960
9052
|
QuantizedCPU: ge_out_quantized_cpu
|
8961
9053
|
tags: pointwise
|
@@ -8966,7 +9058,7 @@
|
|
8966
9058
|
variants: method, function
|
8967
9059
|
dispatch:
|
8968
9060
|
QuantizedCPU: ge_quantized_cpu
|
8969
|
-
NestedTensorCPU, NestedTensorCUDA: ge_scalar_nested
|
9061
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: ge_scalar_nested
|
8970
9062
|
tags: [core, pointwise]
|
8971
9063
|
|
8972
9064
|
- func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8974,7 +9066,7 @@
|
|
8974
9066
|
structured_inherits: TensorIteratorBase
|
8975
9067
|
device_check: NoCheck # TensorIterator
|
8976
9068
|
dispatch:
|
8977
|
-
CPU, CUDA: ge_Tensor_out
|
9069
|
+
CPU, CUDA, MTIA: ge_Tensor_out
|
8978
9070
|
MPS: ge_tensor_out_mps
|
8979
9071
|
QuantizedCPU: ge_out_quantized_cpu
|
8980
9072
|
tags: pointwise
|
@@ -9019,7 +9111,7 @@
|
|
9019
9111
|
structured_inherits: TensorIteratorBase
|
9020
9112
|
device_check: NoCheck # TensorIterator
|
9021
9113
|
dispatch:
|
9022
|
-
CPU, CUDA: le_Scalar_out
|
9114
|
+
CPU, CUDA, MTIA: le_Scalar_out
|
9023
9115
|
MPS: le_scalar_out_mps
|
9024
9116
|
QuantizedCPU: le_out_quantized_cpu
|
9025
9117
|
tags: pointwise
|
@@ -9037,7 +9129,7 @@
|
|
9037
9129
|
structured_inherits: TensorIteratorBase
|
9038
9130
|
device_check: NoCheck # TensorIterator
|
9039
9131
|
dispatch:
|
9040
|
-
CPU, CUDA: le_Tensor_out
|
9132
|
+
CPU, CUDA, MTIA: le_Tensor_out
|
9041
9133
|
MPS: le_tensor_out_mps
|
9042
9134
|
QuantizedCPU: le_out_quantized_cpu
|
9043
9135
|
tags: pointwise
|
@@ -9082,7 +9174,7 @@
|
|
9082
9174
|
structured_inherits: TensorIteratorBase
|
9083
9175
|
device_check: NoCheck # TensorIterator
|
9084
9176
|
dispatch:
|
9085
|
-
CPU, CUDA: gt_Scalar_out
|
9177
|
+
CPU, CUDA,MTIA: gt_Scalar_out
|
9086
9178
|
MPS: gt_scalar_out_mps
|
9087
9179
|
QuantizedCPU: gt_out_quantized_cpu
|
9088
9180
|
tags: pointwise
|
@@ -9093,7 +9185,7 @@
|
|
9093
9185
|
variants: method, function
|
9094
9186
|
dispatch:
|
9095
9187
|
QuantizedCPU: gt_quantized_cpu
|
9096
|
-
NestedTensorCPU, NestedTensorCUDA: gt_scalar_nested
|
9188
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: gt_scalar_nested
|
9097
9189
|
tags: [core, pointwise]
|
9098
9190
|
|
9099
9191
|
- func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9101,7 +9193,7 @@
|
|
9101
9193
|
structured_inherits: TensorIteratorBase
|
9102
9194
|
device_check: NoCheck # TensorIterator
|
9103
9195
|
dispatch:
|
9104
|
-
CPU, CUDA: gt_Tensor_out
|
9196
|
+
CPU, CUDA, MTIA: gt_Tensor_out
|
9105
9197
|
MPS: gt_tensor_out_mps
|
9106
9198
|
QuantizedCPU: gt_out_quantized_cpu
|
9107
9199
|
tags: pointwise
|
@@ -9146,7 +9238,7 @@
|
|
9146
9238
|
structured_inherits: TensorIteratorBase
|
9147
9239
|
device_check: NoCheck # TensorIterator
|
9148
9240
|
dispatch:
|
9149
|
-
CPU, CUDA: lt_Scalar_out
|
9241
|
+
CPU, CUDA, MTIA: lt_Scalar_out
|
9150
9242
|
MPS: lt_scalar_out_mps
|
9151
9243
|
QuantizedCPU: lt_out_quantized_cpu
|
9152
9244
|
tags: pointwise
|
@@ -9164,7 +9256,7 @@
|
|
9164
9256
|
structured_inherits: TensorIteratorBase
|
9165
9257
|
device_check: NoCheck # TensorIterator
|
9166
9258
|
dispatch:
|
9167
|
-
CPU, CUDA: lt_Tensor_out
|
9259
|
+
CPU, CUDA, MTIA: lt_Tensor_out
|
9168
9260
|
MPS: lt_tensor_out_mps
|
9169
9261
|
QuantizedCPU: lt_out_quantized_cpu
|
9170
9262
|
tags: pointwise
|
@@ -9329,7 +9421,7 @@
|
|
9329
9421
|
structured_inherits: TensorIteratorBase
|
9330
9422
|
device_check: NoCheck # TensorIterator
|
9331
9423
|
dispatch:
|
9332
|
-
CPU, CUDA: addcmul_out
|
9424
|
+
CPU, CUDA, MTIA: addcmul_out
|
9333
9425
|
MPS: addcmul_out_mps
|
9334
9426
|
tags: pointwise
|
9335
9427
|
|
@@ -9350,7 +9442,7 @@
|
|
9350
9442
|
structured_inherits: TensorIteratorBase
|
9351
9443
|
device_check: NoCheck # TensorIterator
|
9352
9444
|
dispatch:
|
9353
|
-
CPU, CUDA: addcdiv_out
|
9445
|
+
CPU, CUDA, MTIA: addcdiv_out
|
9354
9446
|
MPS: addcdiv_out_mps
|
9355
9447
|
tags: pointwise
|
9356
9448
|
|
@@ -9436,14 +9528,12 @@
|
|
9436
9528
|
|
9437
9529
|
- func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
|
9438
9530
|
dispatch:
|
9439
|
-
CPU, CUDA: cholesky_out
|
9440
|
-
MPS: cholesky_mps_out
|
9531
|
+
CPU, CUDA, MPS: cholesky_out
|
9441
9532
|
|
9442
9533
|
- func: cholesky(Tensor self, bool upper=False) -> Tensor
|
9443
9534
|
variants: method, function
|
9444
9535
|
dispatch:
|
9445
|
-
CPU, CUDA: cholesky
|
9446
|
-
MPS: cholesky_mps
|
9536
|
+
CPU, CUDA, MPS: cholesky
|
9447
9537
|
|
9448
9538
|
- func: cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
|
9449
9539
|
dispatch:
|
@@ -9520,13 +9610,13 @@
|
|
9520
9610
|
MPS: lu_unpack_out_mps
|
9521
9611
|
|
9522
9612
|
# TODO: remove dispatch section when porting TH CUDA to ATen
|
9523
|
-
- func: multinomial.out(Tensor self,
|
9613
|
+
- func: multinomial.out(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
|
9524
9614
|
tags: nondeterministic_seeded
|
9525
9615
|
dispatch:
|
9526
9616
|
CPU, CUDA: multinomial_out
|
9527
9617
|
MPS: multinomial_out_mps
|
9528
9618
|
|
9529
|
-
- func: multinomial(Tensor self,
|
9619
|
+
- func: multinomial(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor
|
9530
9620
|
variants: method, function
|
9531
9621
|
dispatch:
|
9532
9622
|
CPU, CUDA: multinomial
|
@@ -9641,7 +9731,7 @@
|
|
9641
9731
|
structured_delegate: sign.out
|
9642
9732
|
variants: function, method
|
9643
9733
|
dispatch:
|
9644
|
-
SparseCPU, SparseCUDA: sign_sparse
|
9734
|
+
SparseCPU, SparseCUDA, SparseMPS: sign_sparse
|
9645
9735
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr
|
9646
9736
|
tags: [core, pointwise]
|
9647
9737
|
|
@@ -9650,7 +9740,7 @@
|
|
9650
9740
|
structured_delegate: sign.out
|
9651
9741
|
variants: method
|
9652
9742
|
dispatch:
|
9653
|
-
SparseCPU, SparseCUDA: sign_sparse_
|
9743
|
+
SparseCPU, SparseCUDA, SparseMPS: sign_sparse_
|
9654
9744
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_
|
9655
9745
|
tags: pointwise
|
9656
9746
|
|
@@ -9661,7 +9751,7 @@
|
|
9661
9751
|
dispatch:
|
9662
9752
|
CPU, CUDA: sign_out
|
9663
9753
|
MPS: sign_out_mps
|
9664
|
-
SparseCPU, SparseCUDA: sign_sparse_out
|
9754
|
+
SparseCPU, SparseCUDA, SparseMPS: sign_sparse_out
|
9665
9755
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_out
|
9666
9756
|
tags: pointwise
|
9667
9757
|
|
@@ -9669,7 +9759,7 @@
|
|
9669
9759
|
variants: function, method
|
9670
9760
|
structured_delegate: signbit.out
|
9671
9761
|
dispatch:
|
9672
|
-
SparseCPU, SparseCUDA: signbit_sparse
|
9762
|
+
SparseCPU, SparseCUDA, SparseMPS: signbit_sparse
|
9673
9763
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr
|
9674
9764
|
tags: pointwise
|
9675
9765
|
|
@@ -9680,7 +9770,7 @@
|
|
9680
9770
|
CPU: signbit_out
|
9681
9771
|
CUDA: signbit_out
|
9682
9772
|
MPS: signbit_out_mps
|
9683
|
-
SparseCPU, SparseCUDA: signbit_sparse_out
|
9773
|
+
SparseCPU, SparseCUDA, SparseMPS: signbit_sparse_out
|
9684
9774
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr_out
|
9685
9775
|
tags: pointwise
|
9686
9776
|
|
@@ -9727,8 +9817,7 @@
|
|
9727
9817
|
structured: True
|
9728
9818
|
structured_inherits: TensorIteratorBase
|
9729
9819
|
dispatch:
|
9730
|
-
CPU, CUDA: lerp_Scalar
|
9731
|
-
MPS: lerp_Scalar_mps
|
9820
|
+
CPU, CUDA, MPS: lerp_Scalar
|
9732
9821
|
tags: pointwise
|
9733
9822
|
|
9734
9823
|
- func: lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9827,8 +9916,7 @@
|
|
9827
9916
|
structured: True
|
9828
9917
|
structured_inherits: TensorIteratorBase
|
9829
9918
|
dispatch:
|
9830
|
-
CPU, CUDA: fmod_out
|
9831
|
-
MPS: fmod_mps_out
|
9919
|
+
CPU, CUDA, MPS, MTIA: fmod_out
|
9832
9920
|
tags: pointwise
|
9833
9921
|
|
9834
9922
|
- func: fmod.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -9865,7 +9953,7 @@
|
|
9865
9953
|
structured: True
|
9866
9954
|
structured_inherits: TensorIteratorBase
|
9867
9955
|
dispatch:
|
9868
|
-
CPU, CUDA: igamma_out
|
9956
|
+
CPU, CUDA, MPS: igamma_out
|
9869
9957
|
tags: pointwise
|
9870
9958
|
|
9871
9959
|
- func: igamma(Tensor self, Tensor other) -> Tensor
|
@@ -9882,7 +9970,7 @@
|
|
9882
9970
|
structured: True
|
9883
9971
|
structured_inherits: TensorIteratorBase
|
9884
9972
|
dispatch:
|
9885
|
-
CPU, CUDA: igammac_out
|
9973
|
+
CPU, CUDA, MPS: igammac_out
|
9886
9974
|
tags: pointwise
|
9887
9975
|
|
9888
9976
|
- func: igammac(Tensor self, Tensor other) -> Tensor
|
@@ -9934,8 +10022,7 @@
|
|
9934
10022
|
structured: True
|
9935
10023
|
structured_inherits: TensorIteratorBase
|
9936
10024
|
dispatch:
|
9937
|
-
CPU, CUDA: remainder_out
|
9938
|
-
MPS: remainder_out_mps
|
10025
|
+
CPU, CUDA, MPS, MTIA: remainder_out
|
9939
10026
|
tags: pointwise
|
9940
10027
|
|
9941
10028
|
- func: remainder.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -10019,7 +10106,7 @@
|
|
10019
10106
|
structured_inherits: TensorIteratorBase
|
10020
10107
|
device_check: NoCheck # TensorIterator
|
10021
10108
|
dispatch:
|
10022
|
-
CPU, CUDA: maximum_out
|
10109
|
+
CPU, CUDA, MTIA: maximum_out
|
10023
10110
|
MPS: maximum_out_mps
|
10024
10111
|
tags: pointwise
|
10025
10112
|
|
@@ -10051,7 +10138,7 @@
|
|
10051
10138
|
structured_inherits: TensorIteratorBase
|
10052
10139
|
device_check: NoCheck # TensorIterator
|
10053
10140
|
dispatch:
|
10054
|
-
CPU, CUDA: minimum_out
|
10141
|
+
CPU, CUDA, MTIA: minimum_out
|
10055
10142
|
MPS: minimum_out_mps
|
10056
10143
|
tags: pointwise
|
10057
10144
|
|
@@ -10203,7 +10290,7 @@
|
|
10203
10290
|
device_check: NoCheck
|
10204
10291
|
device_guard: False
|
10205
10292
|
dispatch:
|
10206
|
-
CPU, CUDA, Meta, MPS: unfold
|
10293
|
+
CPU, CUDA, Meta, MPS, MTIA: unfold
|
10207
10294
|
QuantizedCPU, QuantizedCUDA: unfold
|
10208
10295
|
|
10209
10296
|
- func: unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor
|
@@ -10316,7 +10403,7 @@
|
|
10316
10403
|
MPS: normal_mps_
|
10317
10404
|
Meta: normal_meta_
|
10318
10405
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: normal_sparse_csr_
|
10319
|
-
NestedTensorCPU, NestedTensorCUDA: normal_nested_
|
10406
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: normal_nested_
|
10320
10407
|
autogen: normal.out
|
10321
10408
|
|
10322
10409
|
# Only used by the functionalization pass.
|
@@ -10384,7 +10471,7 @@
|
|
10384
10471
|
variants: method, function
|
10385
10472
|
dispatch:
|
10386
10473
|
CompositeExplicitAutograd: alias
|
10387
|
-
NestedTensorCPU, NestedTensorCUDA: alias_nested
|
10474
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: alias_nested
|
10388
10475
|
tags: core
|
10389
10476
|
|
10390
10477
|
- func: _amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
|
@@ -10392,6 +10479,7 @@
|
|
10392
10479
|
dispatch:
|
10393
10480
|
CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
|
10394
10481
|
CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
|
10482
|
+
MPS: _amp_foreach_non_finite_check_and_unscale_mps_
|
10395
10483
|
autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
|
10396
10484
|
|
10397
10485
|
- func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
|
@@ -10399,6 +10487,7 @@
|
|
10399
10487
|
dispatch:
|
10400
10488
|
CUDA: _amp_update_scale_cuda_
|
10401
10489
|
CPU: _amp_update_scale_cpu_
|
10490
|
+
MPS: _amp_update_scale_mps_
|
10402
10491
|
autogen: _amp_update_scale, _amp_update_scale.out
|
10403
10492
|
|
10404
10493
|
#- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
|
@@ -10427,6 +10516,7 @@
|
|
10427
10516
|
dispatch:
|
10428
10517
|
CompositeExplicitAutograd: foreach_tensor_add_scalar_kernel_slow_
|
10429
10518
|
CUDA: foreach_tensor_add_scalar_kernel_cuda_
|
10519
|
+
MTIA: foreach_tensor_add_scalar_kernel_mtia_
|
10430
10520
|
autogen: _foreach_add.Scalar_out
|
10431
10521
|
|
10432
10522
|
- func: _foreach_add.List(Tensor[] self, Tensor[] other, *, Scalar alpha=1) -> Tensor[]
|
@@ -10435,6 +10525,7 @@
|
|
10435
10525
|
dispatch:
|
10436
10526
|
CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow
|
10437
10527
|
CUDA: foreach_tensor_add_list_kernel_cuda
|
10528
|
+
MTIA: foreach_tensor_add_list_kernel_mtia
|
10438
10529
|
|
10439
10530
|
- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
10440
10531
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -10442,6 +10533,7 @@
|
|
10442
10533
|
dispatch:
|
10443
10534
|
CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow_
|
10444
10535
|
CUDA: foreach_tensor_add_list_kernel_cuda_
|
10536
|
+
MTIA: foreach_tensor_add_list_kernel_mtia_
|
10445
10537
|
autogen: _foreach_add.List_out
|
10446
10538
|
|
10447
10539
|
- func: _foreach_add.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
@@ -10472,6 +10564,7 @@
|
|
10472
10564
|
dispatch:
|
10473
10565
|
CompositeExplicitAutograd: foreach_tensor_add_tensor_kernel_slow_
|
10474
10566
|
CUDA: foreach_tensor_add_tensor_kernel_cuda_
|
10567
|
+
MTIA: foreach_tensor_add_tensor_kernel_mtia_
|
10475
10568
|
autogen: _foreach_add.Tensor_out
|
10476
10569
|
|
10477
10570
|
- func: _foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
@@ -10532,6 +10625,7 @@
|
|
10532
10625
|
dispatch:
|
10533
10626
|
CompositeExplicitAutograd: foreach_tensor_mul_scalar_kernel_slow_
|
10534
10627
|
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
|
10628
|
+
MTIA: foreach_tensor_mul_scalar_kernel_mtia_
|
10535
10629
|
autogen: _foreach_mul.Scalar_out
|
10536
10630
|
|
10537
10631
|
- func: _foreach_mul.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
@@ -10540,6 +10634,7 @@
|
|
10540
10634
|
dispatch:
|
10541
10635
|
CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow
|
10542
10636
|
CUDA: foreach_tensor_mul_list_kernel_cuda
|
10637
|
+
MTIA: foreach_tensor_mul_list_kernel_mtia
|
10543
10638
|
|
10544
10639
|
- func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10545
10640
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -10547,6 +10642,7 @@
|
|
10547
10642
|
dispatch:
|
10548
10643
|
CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow_
|
10549
10644
|
CUDA: foreach_tensor_mul_list_kernel_cuda_
|
10645
|
+
MTIA: foreach_tensor_mul_list_kernel_mtia_
|
10550
10646
|
autogen: _foreach_mul.List_out
|
10551
10647
|
|
10552
10648
|
- func: _foreach_mul.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
@@ -10570,6 +10666,7 @@
|
|
10570
10666
|
dispatch:
|
10571
10667
|
CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow
|
10572
10668
|
CUDA: foreach_tensor_mul_tensor_kernel_cuda
|
10669
|
+
MTIA: foreach_tensor_mul_tensor_kernel_mtia
|
10573
10670
|
|
10574
10671
|
- func: _foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
10575
10672
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -10577,6 +10674,7 @@
|
|
10577
10674
|
dispatch:
|
10578
10675
|
CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow_
|
10579
10676
|
CUDA: foreach_tensor_mul_tensor_kernel_cuda_
|
10677
|
+
MTIA: foreach_tensor_mul_tensor_kernel_mtia_
|
10580
10678
|
autogen: _foreach_mul.Tensor_out
|
10581
10679
|
|
10582
10680
|
- func: _foreach_div.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
@@ -10873,6 +10971,7 @@
|
|
10873
10971
|
dispatch:
|
10874
10972
|
CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow
|
10875
10973
|
CUDA: foreach_tensor_addcmul_scalar_cuda
|
10974
|
+
MTIA: foreach_tensor_addcmul_scalar_mtia
|
10876
10975
|
|
10877
10976
|
- func: _foreach_addcmul.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10878
10977
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -10894,6 +10993,7 @@
|
|
10894
10993
|
dispatch:
|
10895
10994
|
CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow_
|
10896
10995
|
CUDA: foreach_tensor_addcmul_scalar_cuda_
|
10996
|
+
MTIA: foreach_tensor_addcmul_scalar_mtia_
|
10897
10997
|
autogen: _foreach_addcmul.Scalar_out
|
10898
10998
|
|
10899
10999
|
- func: _foreach_addcmul_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
|
@@ -10918,6 +11018,7 @@
|
|
10918
11018
|
dispatch:
|
10919
11019
|
CompositeExplicitAutograd: foreach_tensor_abs_slow
|
10920
11020
|
CUDA: foreach_tensor_abs_cuda
|
11021
|
+
MTIA: foreach_tensor_abs_mtia
|
10921
11022
|
|
10922
11023
|
- func: _foreach_abs_(Tensor(a!)[] self) -> ()
|
10923
11024
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -10925,6 +11026,7 @@
|
|
10925
11026
|
dispatch:
|
10926
11027
|
CompositeExplicitAutograd: foreach_tensor_abs_slow_
|
10927
11028
|
CUDA: foreach_tensor_abs_cuda_
|
11029
|
+
MTIA: foreach_tensor_abs_mtia_
|
10928
11030
|
autogen: _foreach_abs.out
|
10929
11031
|
|
10930
11032
|
- func: _foreach_acos(Tensor[] self) -> Tensor[]
|
@@ -11259,6 +11361,7 @@
|
|
11259
11361
|
dispatch:
|
11260
11362
|
CompositeExplicitAutograd: foreach_tensor_norm_slow
|
11261
11363
|
CUDA: foreach_tensor_norm_cuda
|
11364
|
+
MTIA: foreach_tensor_norm_mtia
|
11262
11365
|
autogen: _foreach_norm.Scalar_out
|
11263
11366
|
|
11264
11367
|
- func: _foreach_pow.List(Tensor[] self, Tensor[] exponent) -> Tensor[]
|
@@ -11431,6 +11534,7 @@
|
|
11431
11534
|
dispatch:
|
11432
11535
|
CompositeExplicitAutograd: foreach_tensor_sqrt_slow_
|
11433
11536
|
CUDA: foreach_tensor_sqrt_cuda_
|
11537
|
+
MTIA: foreach_tensor_sqrt_mtia_
|
11434
11538
|
autogen: _foreach_sqrt.out
|
11435
11539
|
|
11436
11540
|
- func: _foreach_tan(Tensor[] self) -> Tensor[]
|
@@ -11492,6 +11596,7 @@
|
|
11492
11596
|
dispatch:
|
11493
11597
|
CompositeExplicitAutograd: foreach_tensor_copy_list_kernel_slow_
|
11494
11598
|
CUDA: foreach_tensor_copy_list_kernel_cuda_
|
11599
|
+
MTIA: foreach_tensor_copy_list_kernel_mtia_
|
11495
11600
|
autogen: _foreach_copy.out
|
11496
11601
|
|
11497
11602
|
- func: _foreach_copy(Tensor[] self, Tensor[] src, bool non_blocking=False) -> Tensor[] self_out
|
@@ -11499,6 +11604,7 @@
|
|
11499
11604
|
variants: function
|
11500
11605
|
dispatch:
|
11501
11606
|
CompositeExplicitAutograd: _foreach_copy
|
11607
|
+
MTIA: foreach_tensor_copy_list_kernel_mtia
|
11502
11608
|
|
11503
11609
|
- func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
|
11504
11610
|
dispatch:
|
@@ -11801,7 +11907,7 @@
|
|
11801
11907
|
structured_delegate: elu.out
|
11802
11908
|
device_check: NoCheck # TensorIterator
|
11803
11909
|
python_module: nn
|
11804
|
-
tags: pointwise
|
11910
|
+
tags: [core, pointwise]
|
11805
11911
|
|
11806
11912
|
- func: elu_backward.grad_input(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11807
11913
|
structured: True
|
@@ -11865,8 +11971,7 @@
|
|
11865
11971
|
device_check: NoCheck # TensorIterator
|
11866
11972
|
python_module: nn
|
11867
11973
|
dispatch:
|
11868
|
-
CPU, CUDA: hardsigmoid_out
|
11869
|
-
MPS: hardsigmoid_out_mps
|
11974
|
+
CPU, CUDA, MPS: hardsigmoid_out
|
11870
11975
|
QuantizedCPU: hardsigmoid_out_quantized_cpu
|
11871
11976
|
|
11872
11977
|
- func: hardsigmoid(Tensor self) -> Tensor
|
@@ -11887,8 +11992,7 @@
|
|
11887
11992
|
structured_inherits: TensorIteratorBase
|
11888
11993
|
python_module: nn
|
11889
11994
|
dispatch:
|
11890
|
-
CPU, CUDA: hardsigmoid_backward_out
|
11891
|
-
MPS: hardsigmoid_backward_out_mps
|
11995
|
+
CPU, CUDA, MPS: hardsigmoid_backward_out
|
11892
11996
|
|
11893
11997
|
- func: hardsigmoid_backward(Tensor grad_output, Tensor self) -> Tensor
|
11894
11998
|
structured_delegate: hardsigmoid_backward.grad_input
|
@@ -11932,28 +12036,24 @@
|
|
11932
12036
|
device_check: NoCheck # TensorIterator
|
11933
12037
|
python_module: nn
|
11934
12038
|
dispatch:
|
11935
|
-
CPU, CUDA: hardswish_out
|
11936
|
-
MPS: hardswish_out_mps
|
12039
|
+
CPU, CUDA, MPS: hardswish_out
|
11937
12040
|
|
11938
12041
|
- func: hardswish(Tensor self) -> Tensor
|
11939
12042
|
device_check: NoCheck # TensorIterator
|
11940
12043
|
python_module: nn
|
11941
12044
|
dispatch:
|
11942
|
-
CPU, CUDA: hardswish
|
11943
|
-
MPS: hardswish_mps
|
12045
|
+
CPU, CUDA, MPS: hardswish
|
11944
12046
|
|
11945
12047
|
- func: hardswish_(Tensor(a!) self) -> Tensor(a!)
|
11946
12048
|
device_check: NoCheck # TensorIterator
|
11947
12049
|
python_module: nn
|
11948
12050
|
dispatch:
|
11949
|
-
CPU, CUDA: hardswish_
|
11950
|
-
MPS: hardswish_mps_
|
12051
|
+
CPU, CUDA, MPS: hardswish_
|
11951
12052
|
|
11952
12053
|
- func: hardswish_backward(Tensor grad_output, Tensor self) -> Tensor
|
11953
12054
|
python_module: nn
|
11954
12055
|
dispatch:
|
11955
|
-
CPU, CUDA: hardswish_backward
|
11956
|
-
MPS: hardswish_backward_mps
|
12056
|
+
CPU, CUDA, MPS: hardswish_backward
|
11957
12057
|
autogen: hardswish_backward.out
|
11958
12058
|
|
11959
12059
|
- func: leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -11962,8 +12062,7 @@
|
|
11962
12062
|
device_check: NoCheck # TensorIterator
|
11963
12063
|
python_module: nn
|
11964
12064
|
dispatch:
|
11965
|
-
CPU, CUDA: leaky_relu_out
|
11966
|
-
MPS: leaky_relu_out_mps
|
12065
|
+
CPU, CUDA, MPS: leaky_relu_out
|
11967
12066
|
QuantizedCPU: leaky_relu_out_quantized_cpu
|
11968
12067
|
|
11969
12068
|
- func: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor
|
@@ -11979,8 +12078,7 @@
|
|
11979
12078
|
structured_inherits: TensorIteratorBase
|
11980
12079
|
python_module: nn
|
11981
12080
|
dispatch:
|
11982
|
-
CPU, CUDA: leaky_relu_backward_out
|
11983
|
-
MPS: leaky_relu_backward_out_mps
|
12081
|
+
CPU, CUDA, MPS: leaky_relu_backward_out
|
11984
12082
|
|
11985
12083
|
- func: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor
|
11986
12084
|
structured_delegate: leaky_relu_backward.grad_input
|
@@ -12092,8 +12190,7 @@
|
|
12092
12190
|
device_check: NoCheck # TensorIterator
|
12093
12191
|
python_module: nn
|
12094
12192
|
dispatch:
|
12095
|
-
CPU, CUDA: softshrink_out
|
12096
|
-
MPS: softshrink_out_mps
|
12193
|
+
CPU, CUDA, MPS: softshrink_out
|
12097
12194
|
|
12098
12195
|
- func: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
|
12099
12196
|
structured_delegate: softshrink.out
|
@@ -12106,8 +12203,7 @@
|
|
12106
12203
|
structured_inherits: TensorIteratorBase
|
12107
12204
|
python_module: nn
|
12108
12205
|
dispatch:
|
12109
|
-
CPU, CUDA: softshrink_backward_out
|
12110
|
-
MPS: softshrink_backward_out_mps
|
12206
|
+
CPU, CUDA, MPS: softshrink_backward_out
|
12111
12207
|
|
12112
12208
|
- func: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
|
12113
12209
|
structured_delegate: softshrink_backward.grad_input
|
@@ -12284,6 +12380,7 @@
|
|
12284
12380
|
dispatch:
|
12285
12381
|
CPU: avg_pool3d_out_cpu
|
12286
12382
|
CUDA: avg_pool3d_out_cuda
|
12383
|
+
MPS: avg_pool3d_out_mps
|
12287
12384
|
MkldnnCPU: mkldnn_avg_pool3d_out
|
12288
12385
|
|
12289
12386
|
- func: avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor
|
@@ -12300,6 +12397,7 @@
|
|
12300
12397
|
dispatch:
|
12301
12398
|
CPU: avg_pool3d_backward_out_cpu
|
12302
12399
|
CUDA: avg_pool3d_backward_out_cuda
|
12400
|
+
MPS: avg_pool3d_backward_out_mps
|
12303
12401
|
MkldnnCPU: mkldnn_avg_pool3d_backward_out
|
12304
12402
|
|
12305
12403
|
- func: avg_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor
|
@@ -12395,6 +12493,7 @@
|
|
12395
12493
|
dispatch:
|
12396
12494
|
CPU: max_pool3d_with_indices_out_cpu
|
12397
12495
|
CUDA: max_pool3d_with_indices_out_cuda
|
12496
|
+
MPS: max_pool3d_with_indices_out_mps
|
12398
12497
|
|
12399
12498
|
# Return: (Tensor output, Tensor indices)
|
12400
12499
|
- func: max_pool3d_with_indices(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
|
@@ -12402,6 +12501,7 @@
|
|
12402
12501
|
dispatch:
|
12403
12502
|
CPU: max_pool3d_with_indices_cpu
|
12404
12503
|
CUDA: max_pool3d_with_indices_cuda
|
12504
|
+
MPS: max_pool3d_with_indices_mps
|
12405
12505
|
tags: core
|
12406
12506
|
|
12407
12507
|
- func: max_pool3d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
|
@@ -12409,36 +12509,42 @@
|
|
12409
12509
|
dispatch:
|
12410
12510
|
CPU: max_pool3d_with_indices_backward_out_cpu
|
12411
12511
|
CUDA: max_pool3d_with_indices_backward_out_cuda
|
12512
|
+
MPS: max_pool3d_with_indices_backward_out_mps
|
12412
12513
|
|
12413
12514
|
- func: max_pool3d_with_indices_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices) -> Tensor
|
12414
12515
|
python_module: nn
|
12415
12516
|
dispatch:
|
12416
12517
|
CPU: max_pool3d_with_indices_backward_cpu
|
12417
12518
|
CUDA: max_pool3d_with_indices_backward_cuda
|
12519
|
+
MPS: max_pool3d_with_indices_backward_mps
|
12418
12520
|
|
12419
12521
|
- func: max_unpool2d.out(Tensor self, Tensor indices, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)
|
12420
12522
|
python_module: nn
|
12421
12523
|
dispatch:
|
12422
12524
|
CPU: max_unpooling2d_forward_out_cpu
|
12423
12525
|
CUDA: max_unpooling2d_forward_out_cuda
|
12526
|
+
MPS: max_unpooling2d_forward_out_mps
|
12424
12527
|
|
12425
12528
|
- func: max_unpool2d(Tensor self, Tensor indices, SymInt[2] output_size) -> Tensor
|
12426
12529
|
python_module: nn
|
12427
12530
|
dispatch:
|
12428
12531
|
CPU: max_unpooling2d_forward_cpu
|
12429
12532
|
CUDA: max_unpooling2d_forward_cuda
|
12533
|
+
MPS: max_unpooling2d_forward_mps
|
12430
12534
|
|
12431
12535
|
- func: max_unpool3d.out(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) out) -> Tensor(a!)
|
12432
12536
|
python_module: nn
|
12433
12537
|
dispatch:
|
12434
12538
|
CPU: max_unpooling3d_forward_out_cpu
|
12435
12539
|
CUDA: max_unpooling3d_forward_out_cuda
|
12540
|
+
MPS: max_unpooling3d_forward_out_mps
|
12436
12541
|
|
12437
12542
|
- func: max_unpool3d(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding) -> Tensor
|
12438
12543
|
python_module: nn
|
12439
12544
|
dispatch:
|
12440
12545
|
CPU: max_unpooling3d_forward_cpu
|
12441
12546
|
CUDA: max_unpooling3d_forward_cuda
|
12547
|
+
MPS: max_unpooling3d_forward_mps
|
12442
12548
|
|
12443
12549
|
- func: reflection_pad1d.out(Tensor self, SymInt[2] padding, *, Tensor(a!) out) -> Tensor(a!)
|
12444
12550
|
python_module: nn
|
@@ -12769,6 +12875,7 @@
|
|
12769
12875
|
dispatch:
|
12770
12876
|
CPU: _upsample_bicubic2d_aa_out_cpu
|
12771
12877
|
CUDA: _upsample_bicubic2d_aa_out_cuda
|
12878
|
+
MPS: _upsample_bicubic2d_aa_out_mps
|
12772
12879
|
|
12773
12880
|
- func: _upsample_bicubic2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
|
12774
12881
|
python_module: nn
|
@@ -12791,6 +12898,7 @@
|
|
12791
12898
|
dispatch:
|
12792
12899
|
CPU: upsample_trilinear3d_out_cpu
|
12793
12900
|
CUDA: upsample_trilinear3d_out_cuda
|
12901
|
+
MPS: upsample_trilinear3d_out_mps
|
12794
12902
|
|
12795
12903
|
- func: upsample_trilinear3d(Tensor self, SymInt[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12796
12904
|
python_module: nn
|
@@ -12802,6 +12910,7 @@
|
|
12802
12910
|
dispatch:
|
12803
12911
|
CPU: upsample_trilinear3d_backward_out_cpu
|
12804
12912
|
CUDA: upsample_trilinear3d_backward_out_cuda
|
12913
|
+
MPS: upsample_trilinear3d_backward_out_mps
|
12805
12914
|
|
12806
12915
|
- func: upsample_trilinear3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12807
12916
|
python_module: nn
|
@@ -12913,6 +13022,7 @@
|
|
12913
13022
|
dispatch:
|
12914
13023
|
CPU: upsample_nearest3d_out_cpu
|
12915
13024
|
CUDA: upsample_nearest3d_out_cuda
|
13025
|
+
MPS: upsample_nearest3d_out_mps
|
12916
13026
|
|
12917
13027
|
- func: _upsample_nearest_exact3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
|
12918
13028
|
python_module: nn
|
@@ -12920,6 +13030,7 @@
|
|
12920
13030
|
dispatch:
|
12921
13031
|
CPU: _upsample_nearest_exact3d_out_cpu
|
12922
13032
|
CUDA: _upsample_nearest_exact3d_out_cuda
|
13033
|
+
MPS: _upsample_nearest_exact3d_out_mps
|
12923
13034
|
|
12924
13035
|
- func: upsample_nearest3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12925
13036
|
python_module: nn
|
@@ -12939,6 +13050,7 @@
|
|
12939
13050
|
dispatch:
|
12940
13051
|
CPU: upsample_nearest3d_backward_out_cpu
|
12941
13052
|
CUDA: upsample_nearest3d_backward_out_cuda
|
13053
|
+
MPS: upsample_nearest3d_backward_out_mps
|
12942
13054
|
|
12943
13055
|
- func: _upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
|
12944
13056
|
python_module: nn
|
@@ -12946,6 +13058,7 @@
|
|
12946
13058
|
dispatch:
|
12947
13059
|
CPU: _upsample_nearest_exact3d_backward_out_cpu
|
12948
13060
|
CUDA: _upsample_nearest_exact3d_backward_out_cuda
|
13061
|
+
MPS: _upsample_nearest_exact3d_backward_out_mps
|
12949
13062
|
|
12950
13063
|
- func: upsample_nearest3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12951
13064
|
python_module: nn
|
@@ -12988,7 +13101,7 @@
|
|
12988
13101
|
structured: True
|
12989
13102
|
structured_inherits: TensorIteratorBase
|
12990
13103
|
dispatch:
|
12991
|
-
CPU, CUDA: tanh_backward_out
|
13104
|
+
CPU, CUDA, MTIA: tanh_backward_out
|
12992
13105
|
MPS: tanh_backward_out_mps
|
12993
13106
|
tags: pointwise
|
12994
13107
|
|
@@ -13120,12 +13233,14 @@
|
|
13120
13233
|
dispatch:
|
13121
13234
|
CPU: col2im_out_cpu
|
13122
13235
|
CUDA: col2im_out_cuda
|
13236
|
+
MPS: col2im_out_mps
|
13123
13237
|
|
13124
13238
|
- func: col2im(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
|
13125
13239
|
python_module: nn
|
13126
13240
|
dispatch:
|
13127
13241
|
CPU: col2im_cpu
|
13128
13242
|
CUDA: col2im_cuda
|
13243
|
+
MPS: col2im_mps
|
13129
13244
|
tags: core
|
13130
13245
|
|
13131
13246
|
- func: column_stack(Tensor[] tensors) -> Tensor
|
@@ -13158,8 +13273,8 @@
|
|
13158
13273
|
device_guard: False
|
13159
13274
|
dispatch:
|
13160
13275
|
CompositeExplicitAutograd: isinf
|
13161
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isinf
|
13162
|
-
SparseCPU, SparseCUDA: isinf_sparse
|
13276
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isinf
|
13277
|
+
SparseCPU, SparseCUDA, SparseMPS: isinf_sparse
|
13163
13278
|
SparseMeta: isinf_sparse_meta
|
13164
13279
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isinf_sparse_csr
|
13165
13280
|
autogen: isinf.out
|
@@ -13174,8 +13289,8 @@
|
|
13174
13289
|
variants: function, method
|
13175
13290
|
structured_delegate: isposinf.out
|
13176
13291
|
dispatch:
|
13177
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isposinf
|
13178
|
-
SparseCPU, SparseCUDA: isposinf_sparse
|
13292
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isposinf
|
13293
|
+
SparseCPU, SparseCUDA, SparseMPS: isposinf_sparse
|
13179
13294
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr
|
13180
13295
|
tags: pointwise
|
13181
13296
|
|
@@ -13184,7 +13299,7 @@
|
|
13184
13299
|
structured_inherits: TensorIteratorBase
|
13185
13300
|
dispatch:
|
13186
13301
|
CPU, CUDA, MPS: isposinf_out
|
13187
|
-
SparseCPU, SparseCUDA: isposinf_sparse_out
|
13302
|
+
SparseCPU, SparseCUDA, SparseMPS: isposinf_sparse_out
|
13188
13303
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr_out
|
13189
13304
|
tags: pointwise
|
13190
13305
|
|
@@ -13192,8 +13307,8 @@
|
|
13192
13307
|
variants: function, method
|
13193
13308
|
structured_delegate: isneginf.out
|
13194
13309
|
dispatch:
|
13195
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isneginf
|
13196
|
-
SparseCPU, SparseCUDA: isneginf_sparse
|
13310
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isneginf
|
13311
|
+
SparseCPU, SparseCUDA, SparseMPS: isneginf_sparse
|
13197
13312
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr
|
13198
13313
|
tags: pointwise
|
13199
13314
|
|
@@ -13202,7 +13317,7 @@
|
|
13202
13317
|
structured_inherits: TensorIteratorBase
|
13203
13318
|
dispatch:
|
13204
13319
|
CPU, CUDA, MPS: isneginf_out
|
13205
|
-
SparseCPU, SparseCUDA: isneginf_sparse_out
|
13320
|
+
SparseCPU, SparseCUDA, SparseMPS: isneginf_sparse_out
|
13206
13321
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr_out
|
13207
13322
|
tags: pointwise
|
13208
13323
|
|
@@ -13500,7 +13615,7 @@
|
|
13500
13615
|
structured: True
|
13501
13616
|
structured_inherits: TensorIteratorBase
|
13502
13617
|
dispatch:
|
13503
|
-
CPU, CUDA: special_i0e_out
|
13618
|
+
CPU, CUDA, MPS: special_i0e_out
|
13504
13619
|
tags: pointwise
|
13505
13620
|
|
13506
13621
|
- func: special_i1(Tensor self) -> Tensor
|
@@ -13528,7 +13643,7 @@
|
|
13528
13643
|
structured: True
|
13529
13644
|
structured_inherits: TensorIteratorBase
|
13530
13645
|
dispatch:
|
13531
|
-
CPU, CUDA: special_i1e_out
|
13646
|
+
CPU, CUDA, MPS: special_i1e_out
|
13532
13647
|
tags: pointwise
|
13533
13648
|
|
13534
13649
|
- func: special_logit(Tensor self, float? eps=None) -> Tensor
|
@@ -13897,8 +14012,7 @@
|
|
13897
14012
|
python_module: linalg
|
13898
14013
|
structured: True
|
13899
14014
|
dispatch:
|
13900
|
-
CPU, CUDA: linalg_cholesky_ex_out
|
13901
|
-
MPS: linalg_cholesky_ex_out_mps
|
14015
|
+
CPU, CUDA, MPS: linalg_cholesky_ex_out
|
13902
14016
|
|
13903
14017
|
- func: linalg_cholesky(Tensor self, *, bool upper=False) -> Tensor
|
13904
14018
|
python_module: linalg
|
@@ -14468,13 +14582,13 @@
|
|
14468
14582
|
dispatch:
|
14469
14583
|
# the NestedTensor keys are necessary because NestedTensor has been removed
|
14470
14584
|
# from the CompositeExplicitAutograd keyset see Note [NestedTensor Not Included in Backend Keys]
|
14471
|
-
CompositeExplicitAutograd, NestedTensorCPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_fullcoverage
|
14585
|
+
CompositeExplicitAutograd, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_fullcoverage
|
14472
14586
|
autogen: _test_autograd_multiple_dispatch.fullcoverage_out
|
14473
14587
|
|
14474
14588
|
# Note: this function is only for testing.
|
14475
14589
|
- func: _test_autograd_multiple_dispatch.ntonly(Tensor self, bool b) -> Tensor
|
14476
14590
|
dispatch:
|
14477
|
-
CompositeImplicitAutograd, NestedTensorCPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_ntonly
|
14591
|
+
CompositeImplicitAutograd, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_ntonly
|
14478
14592
|
|
14479
14593
|
# Note: this function is only for testing.
|
14480
14594
|
- func: _test_autograd_multiple_dispatch_view(Tensor(a) self) -> Tensor(a)
|
@@ -14819,13 +14933,13 @@
|
|
14819
14933
|
- func: _safe_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
|
14820
14934
|
dispatch:
|
14821
14935
|
CompositeExplicitAutograd: _safe_softmax
|
14822
|
-
NestedTensorCPU, NestedTensorCUDA: _safe_softmax
|
14936
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _safe_softmax
|
14823
14937
|
|
14824
14938
|
# Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
|
14825
14939
|
- func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
|
14826
14940
|
variants: function
|
14827
14941
|
dispatch:
|
14828
|
-
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: transformer_encoder_layer_forward
|
14942
|
+
CPU, CUDA, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: transformer_encoder_layer_forward
|
14829
14943
|
autogen: _transformer_encoder_layer_fwd.out
|
14830
14944
|
|
14831
14945
|
- func: _native_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None) -> (Tensor, Tensor)
|
@@ -14916,6 +15030,7 @@
|
|
14916
15030
|
- func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14917
15031
|
dispatch:
|
14918
15032
|
CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
|
15033
|
+
NestedTensorCUDA: _scaled_dot_product_cudnn_attention_nestedtensor_backward_cuda
|
14919
15034
|
tags: nondeterministic_seeded
|
14920
15035
|
|
14921
15036
|
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
|
@@ -14948,6 +15063,11 @@
|
|
14948
15063
|
CUDA: _cudnn_attention_forward
|
14949
15064
|
tags: nondeterministic_seeded
|
14950
15065
|
|
15066
|
+
- func: _cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
15067
|
+
dispatch:
|
15068
|
+
CUDA: _cudnn_attention_backward
|
15069
|
+
tags: nondeterministic_seeded
|
15070
|
+
|
14951
15071
|
- func: _triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor
|
14952
15072
|
variants: function
|
14953
15073
|
dispatch:
|
@@ -14990,7 +15110,7 @@
|
|
14990
15110
|
|
14991
15111
|
- func: special_bessel_j0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
14992
15112
|
dispatch:
|
14993
|
-
CPU, CUDA: special_bessel_j0_out
|
15113
|
+
CPU, CUDA, MPS: special_bessel_j0_out
|
14994
15114
|
python_module: special
|
14995
15115
|
structured_inherits: TensorIteratorBase
|
14996
15116
|
structured: True
|
@@ -15005,7 +15125,7 @@
|
|
15005
15125
|
|
15006
15126
|
- func: special_bessel_j1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15007
15127
|
dispatch:
|
15008
|
-
CPU, CUDA: special_bessel_j1_out
|
15128
|
+
CPU, CUDA, MPS: special_bessel_j1_out
|
15009
15129
|
python_module: special
|
15010
15130
|
structured_inherits: TensorIteratorBase
|
15011
15131
|
structured: True
|
@@ -15020,7 +15140,7 @@
|
|
15020
15140
|
|
15021
15141
|
- func: special_bessel_y0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15022
15142
|
dispatch:
|
15023
|
-
CPU, CUDA: special_bessel_y0_out
|
15143
|
+
CPU, CUDA, MPS: special_bessel_y0_out
|
15024
15144
|
python_module: special
|
15025
15145
|
structured_inherits: TensorIteratorBase
|
15026
15146
|
structured: True
|
@@ -15035,7 +15155,7 @@
|
|
15035
15155
|
|
15036
15156
|
- func: special_bessel_y1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15037
15157
|
dispatch:
|
15038
|
-
CPU, CUDA: special_bessel_y1_out
|
15158
|
+
CPU, CUDA, MPS: special_bessel_y1_out
|
15039
15159
|
python_module: special
|
15040
15160
|
structured_inherits: TensorIteratorBase
|
15041
15161
|
structured: True
|
@@ -15068,7 +15188,7 @@
|
|
15068
15188
|
- func: special_chebyshev_polynomial_t.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15069
15189
|
device_check: NoCheck
|
15070
15190
|
dispatch:
|
15071
|
-
CPU, CUDA: special_chebyshev_polynomial_t_out
|
15191
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_t_out
|
15072
15192
|
python_module: special
|
15073
15193
|
structured_inherits: TensorIteratorBase
|
15074
15194
|
structured: True
|
@@ -15117,7 +15237,7 @@
|
|
15117
15237
|
- func: special_chebyshev_polynomial_u.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15118
15238
|
device_check: NoCheck
|
15119
15239
|
dispatch:
|
15120
|
-
CPU, CUDA: special_chebyshev_polynomial_u_out
|
15240
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_u_out
|
15121
15241
|
python_module: special
|
15122
15242
|
structured_inherits: TensorIteratorBase
|
15123
15243
|
structured: True
|
@@ -15166,7 +15286,7 @@
|
|
15166
15286
|
- func: special_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15167
15287
|
device_check: NoCheck
|
15168
15288
|
dispatch:
|
15169
|
-
CPU, CUDA: special_chebyshev_polynomial_v_out
|
15289
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_v_out
|
15170
15290
|
python_module: special
|
15171
15291
|
structured_inherits: TensorIteratorBase
|
15172
15292
|
structured: True
|
@@ -15215,7 +15335,7 @@
|
|
15215
15335
|
- func: special_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15216
15336
|
device_check: NoCheck
|
15217
15337
|
dispatch:
|
15218
|
-
CPU, CUDA: special_chebyshev_polynomial_w_out
|
15338
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_w_out
|
15219
15339
|
python_module: special
|
15220
15340
|
structured_inherits: TensorIteratorBase
|
15221
15341
|
structured: True
|
@@ -15264,7 +15384,7 @@
|
|
15264
15384
|
- func: special_hermite_polynomial_h.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15265
15385
|
device_check: NoCheck
|
15266
15386
|
dispatch:
|
15267
|
-
CPU, CUDA: special_hermite_polynomial_h_out
|
15387
|
+
CPU, CUDA, MPS: special_hermite_polynomial_h_out
|
15268
15388
|
python_module: special
|
15269
15389
|
structured_inherits: TensorIteratorBase
|
15270
15390
|
structured: True
|
@@ -15313,7 +15433,7 @@
|
|
15313
15433
|
- func: special_hermite_polynomial_he.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15314
15434
|
device_check: NoCheck
|
15315
15435
|
dispatch:
|
15316
|
-
CPU, CUDA: special_hermite_polynomial_he_out
|
15436
|
+
CPU, CUDA, MPS: special_hermite_polynomial_he_out
|
15317
15437
|
python_module: special
|
15318
15438
|
structured_inherits: TensorIteratorBase
|
15319
15439
|
structured: True
|
@@ -15442,7 +15562,7 @@
|
|
15442
15562
|
|
15443
15563
|
- func: special_modified_bessel_i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15444
15564
|
dispatch:
|
15445
|
-
CPU, CUDA: special_modified_bessel_i0_out
|
15565
|
+
CPU, CUDA, MPS: special_modified_bessel_i0_out
|
15446
15566
|
python_module: special
|
15447
15567
|
structured_inherits: TensorIteratorBase
|
15448
15568
|
structured: True
|
@@ -15457,7 +15577,7 @@
|
|
15457
15577
|
|
15458
15578
|
- func: special_modified_bessel_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15459
15579
|
dispatch:
|
15460
|
-
CPU, CUDA: special_modified_bessel_i1_out
|
15580
|
+
CPU, CUDA, MPS: special_modified_bessel_i1_out
|
15461
15581
|
python_module: special
|
15462
15582
|
structured_inherits: TensorIteratorBase
|
15463
15583
|
structured: True
|
@@ -15472,7 +15592,7 @@
|
|
15472
15592
|
|
15473
15593
|
- func: special_modified_bessel_k0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15474
15594
|
dispatch:
|
15475
|
-
CPU, CUDA: special_modified_bessel_k0_out
|
15595
|
+
CPU, CUDA, MPS: special_modified_bessel_k0_out
|
15476
15596
|
python_module: special
|
15477
15597
|
structured_inherits: TensorIteratorBase
|
15478
15598
|
structured: True
|
@@ -15487,7 +15607,7 @@
|
|
15487
15607
|
|
15488
15608
|
- func: special_modified_bessel_k1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15489
15609
|
dispatch:
|
15490
|
-
CPU, CUDA: special_modified_bessel_k1_out
|
15610
|
+
CPU, CUDA, MPS: special_modified_bessel_k1_out
|
15491
15611
|
python_module: special
|
15492
15612
|
structured_inherits: TensorIteratorBase
|
15493
15613
|
structured: True
|
@@ -15502,7 +15622,7 @@
|
|
15502
15622
|
|
15503
15623
|
- func: special_scaled_modified_bessel_k0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
|
15504
15624
|
dispatch:
|
15505
|
-
CPU, CUDA: special_scaled_modified_bessel_k0_out
|
15625
|
+
CPU, CUDA, MPS: special_scaled_modified_bessel_k0_out
|
15506
15626
|
python_module: special
|
15507
15627
|
structured_inherits: TensorIteratorBase
|
15508
15628
|
structured: True
|
@@ -15517,7 +15637,7 @@
|
|
15517
15637
|
|
15518
15638
|
- func: special_scaled_modified_bessel_k1.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
|
15519
15639
|
dispatch:
|
15520
|
-
CPU, CUDA: special_scaled_modified_bessel_k1_out
|
15640
|
+
CPU, CUDA, MPS: special_scaled_modified_bessel_k1_out
|
15521
15641
|
python_module: special
|
15522
15642
|
structured_inherits: TensorIteratorBase
|
15523
15643
|
structured: True
|
@@ -15550,7 +15670,7 @@
|
|
15550
15670
|
- func: special_shifted_chebyshev_polynomial_t.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15551
15671
|
device_check: NoCheck
|
15552
15672
|
dispatch:
|
15553
|
-
CPU, CUDA: special_shifted_chebyshev_polynomial_t_out
|
15673
|
+
CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_t_out
|
15554
15674
|
python_module: special
|
15555
15675
|
structured_inherits: TensorIteratorBase
|
15556
15676
|
structured: True
|
@@ -15599,7 +15719,7 @@
|
|
15599
15719
|
- func: special_shifted_chebyshev_polynomial_u.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15600
15720
|
device_check: NoCheck
|
15601
15721
|
dispatch:
|
15602
|
-
CPU, CUDA: special_shifted_chebyshev_polynomial_u_out
|
15722
|
+
CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_u_out
|
15603
15723
|
python_module: special
|
15604
15724
|
structured_inherits: TensorIteratorBase
|
15605
15725
|
structured: True
|
@@ -15648,7 +15768,7 @@
|
|
15648
15768
|
- func: special_shifted_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15649
15769
|
device_check: NoCheck
|
15650
15770
|
dispatch:
|
15651
|
-
CPU, CUDA: special_shifted_chebyshev_polynomial_v_out
|
15771
|
+
CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_v_out
|
15652
15772
|
python_module: special
|
15653
15773
|
structured_inherits: TensorIteratorBase
|
15654
15774
|
structured: True
|
@@ -15697,7 +15817,7 @@
|
|
15697
15817
|
- func: special_shifted_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15698
15818
|
device_check: NoCheck
|
15699
15819
|
dispatch:
|
15700
|
-
CPU, CUDA: special_shifted_chebyshev_polynomial_w_out
|
15820
|
+
CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_w_out
|
15701
15821
|
python_module: special
|
15702
15822
|
structured_inherits: TensorIteratorBase
|
15703
15823
|
structured: True
|
@@ -15806,8 +15926,17 @@
|
|
15806
15926
|
variants: function
|
15807
15927
|
dispatch:
|
15808
15928
|
CPU: _fused_adagrad_kernel_cpu_
|
15929
|
+
CUDA: _fused_adagrad_kernel_cuda_
|
15809
15930
|
autogen: _fused_adagrad, _fused_adagrad.out
|
15810
15931
|
|
15932
|
+
- func: _fused_adagrad_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor[] state_steps, *, Tensor lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15933
|
+
device_check: NoCheck
|
15934
|
+
variants: function
|
15935
|
+
dispatch:
|
15936
|
+
CPU: _fused_adagrad_kernel_cpu_
|
15937
|
+
CUDA: _fused_adagrad_kernel_cuda_
|
15938
|
+
autogen: _fused_adagrad.tensor_lr, _fused_adagrad.tensor_lr_out
|
15939
|
+
|
15811
15940
|
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15812
15941
|
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15813
15942
|
variants: function
|