torch-rb 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -288,14 +288,16 @@
288
288
  dispatch:
289
289
  CPU: native_dropout_cpu
290
290
  CUDA: native_dropout_cuda
291
- NestedTensorCPU, NestedTensorCUDA: native_dropout_nested
291
+ MPS: native_dropout_mps
292
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: native_dropout_nested
292
293
  tags: [nondeterministic_seeded, core]
293
294
  autogen: native_dropout.out
294
295
 
295
296
  - func: native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor
296
297
  dispatch:
297
- CPU, NestedTensorCPU, NestedTensorCUDA: native_dropout_backward
298
+ CPU, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: native_dropout_backward
298
299
  CUDA: native_dropout_backward_cuda
300
+ MPS: native_dropout_backward_mps
299
301
  autogen: native_dropout_backward.out
300
302
  tags: pointwise
301
303
 
@@ -340,9 +342,9 @@
340
342
  variants: function, method
341
343
  dispatch:
342
344
  CompositeExplicitAutograd: abs
343
- SparseCPU, SparseCUDA: abs_sparse
344
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr
345
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs
345
+ SparseCPU, SparseCUDA, SparseMPS: abs_sparse
346
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: abs_sparse_csr
347
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_abs
346
348
  tags: [core, pointwise]
347
349
 
348
350
  - func: abs_(Tensor(a!) self) -> Tensor(a!)
@@ -350,17 +352,16 @@
350
352
  variants: function, method
351
353
  dispatch:
352
354
  CompositeExplicitAutograd: abs_
353
- SparseCPU, SparseCUDA: abs_sparse_
354
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_
355
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs_
355
+ SparseCPU, SparseCUDA, SparseMPS: abs_sparse_
356
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: abs_sparse_csr_
357
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_abs_
356
358
 
357
359
  - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
358
360
  device_check: NoCheck # TensorIterator
359
361
  dispatch:
360
- CPU, CUDA: abs_out
361
- MPS: abs_out_mps
362
- SparseCPU, SparseCUDA: abs_sparse_out
363
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_out
362
+ CPU, CUDA, MPS, MTIA: abs_out
363
+ SparseCPU, SparseCUDA, SparseMPS: abs_sparse_out
364
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: abs_sparse_csr_out
364
365
  tags: pointwise
365
366
 
366
367
  # Note [Adding an alias]
@@ -429,18 +430,18 @@
429
430
  variants: function, method
430
431
  structured_delegate: sgn.out
431
432
  dispatch:
432
- SparseCPU, SparseCUDA: sgn_sparse
433
+ SparseCPU, SparseCUDA, SparseMPS: sgn_sparse
433
434
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr
434
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn
435
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sgn
435
436
  tags: pointwise
436
437
 
437
438
  - func: sgn_(Tensor(a!) self) -> Tensor(a!)
438
439
  variants: method
439
440
  structured_delegate: sgn.out
440
441
  dispatch:
441
- SparseCPU, SparseCUDA: sgn_sparse_
442
+ SparseCPU, SparseCUDA, SparseMPS: sgn_sparse_
442
443
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_
443
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn_
444
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sgn_
444
445
  tags: pointwise
445
446
 
446
447
  - func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -449,7 +450,7 @@
449
450
  dispatch:
450
451
  CPU, CUDA: sgn_out
451
452
  MPS: sgn_out_mps
452
- SparseCPU, SparseCUDA: sgn_sparse_out
453
+ SparseCPU, SparseCUDA, SparseMPS: sgn_sparse_out
453
454
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_out
454
455
  tags: pointwise
455
456
 
@@ -477,7 +478,7 @@
477
478
  variants: function, method
478
479
  dispatch:
479
480
  CompositeExplicitAutograd: _conj_physical
480
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr
481
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: conj_physical_sparse_csr
481
482
  autogen: _conj_physical.out
482
483
 
483
484
  - func: conj_physical(Tensor self) -> Tensor
@@ -488,8 +489,8 @@
488
489
  dispatch:
489
490
  CPU, CUDA: conj_physical_out
490
491
  MPS: conj_physical_out_mps
491
- SparseCPU, SparseCUDA: conj_physical_out_sparse
492
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_out
492
+ SparseCPU, SparseCUDA, SparseMPS: conj_physical_out_sparse
493
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: conj_physical_sparse_csr_out
493
494
  tags: pointwise
494
495
 
495
496
  - func: conj_physical_(Tensor(a!) self) -> Tensor(a!)
@@ -527,8 +528,7 @@
527
528
  structured: True
528
529
  structured_inherits: TensorIteratorBase
529
530
  dispatch:
530
- CPU, CUDA: acos_out
531
- MPS: acos_out_mps
531
+ CPU, CUDA, MPS: acos_out
532
532
  tags: pointwise
533
533
 
534
534
  # arccos, alias of acos
@@ -556,11 +556,11 @@
556
556
  structured_delegate: add.out
557
557
  variants: function, method
558
558
  dispatch:
559
- SparseCPU, SparseCUDA, SparseMeta: add_sparse
559
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: add_sparse
560
560
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr
561
561
  MkldnnCPU: mkldnn_add
562
562
  ZeroTensor: add_zerotensor
563
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_add_Tensor
563
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_add_Tensor
564
564
  tags: [core, pointwise]
565
565
 
566
566
  - func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
@@ -568,10 +568,10 @@
568
568
  variants: method
569
569
  structured_delegate: add.out
570
570
  dispatch:
571
- SparseCPU, SparseCUDA, SparseMeta: add_sparse_
571
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: add_sparse_
572
572
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr_
573
573
  MkldnnCPU: mkldnn_add_
574
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_add__Tensor
574
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_add__Tensor
575
575
  tags: pointwise
576
576
 
577
577
  - func: add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
@@ -584,10 +584,12 @@
584
584
  dispatch:
585
585
  SparseCPU, SparseMeta: add_out_sparse_cpu
586
586
  SparseCUDA: add_out_sparse_cuda
587
+ SparseMPS: add_out_sparse_mps
587
588
  SparseCsrCPU, SparseCsrMeta: add_out_sparse_compressed_cpu
588
589
  SparseCsrCUDA: add_out_sparse_compressed_cuda
589
590
  MkldnnCPU: mkldnn_add_out
590
591
  MPS: add_out_mps
592
+ MTIA: add_out_mtia
591
593
  tags: pointwise
592
594
 
593
595
  - func: _add_relu.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
@@ -703,7 +705,7 @@
703
705
  structured_delegate: all.out
704
706
  variants: function, method
705
707
  dispatch:
706
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_all
708
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_all
707
709
 
708
710
 
709
711
  - func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
@@ -720,6 +722,7 @@
720
722
  dispatch:
721
723
  CPU, CUDA: all_out
722
724
  MPS: all_out_mps
725
+ MTIA: all_out_mtia
723
726
 
724
727
  - func: all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
725
728
  device_check: NoCheck # TensorIterator
@@ -809,6 +812,7 @@
809
812
  CPU, Meta: arange_out
810
813
  CUDA: arange_cuda_out
811
814
  MPS: arange_mps_out
815
+ MTIA: arange_mtia_out
812
816
  cpp_no_default_args: ['step']
813
817
 
814
818
  # This function is a temporary hack to allow tracing of arange like constructs with dynamic
@@ -873,7 +877,7 @@
873
877
  variants: function, method
874
878
  structured_delegate: asinh.out
875
879
  dispatch:
876
- SparseCPU, SparseCUDA: asinh_sparse
880
+ SparseCPU, SparseCUDA, SparseMPS: asinh_sparse
877
881
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr
878
882
  tags: [core, pointwise]
879
883
 
@@ -881,7 +885,7 @@
881
885
  variants: function, method
882
886
  structured_delegate: asinh.out
883
887
  dispatch:
884
- SparseCPU, SparseCUDA: asinh_sparse_
888
+ SparseCPU, SparseCUDA, SparseMPS: asinh_sparse_
885
889
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_
886
890
  tags: pointwise
887
891
 
@@ -891,7 +895,7 @@
891
895
  dispatch:
892
896
  CPU, CUDA: asinh_out
893
897
  MPS: asinh_out_mps
894
- SparseCPU, SparseCUDA: asinh_sparse_out
898
+ SparseCPU, SparseCUDA, SparseMPS: asinh_sparse_out
895
899
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_out
896
900
  tags: pointwise
897
901
 
@@ -908,7 +912,7 @@
908
912
  structured_delegate: atanh.out
909
913
  variants: function, method
910
914
  dispatch:
911
- SparseCPU, SparseCUDA: atanh_sparse
915
+ SparseCPU, SparseCUDA, SparseMPS: atanh_sparse
912
916
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr
913
917
  tags: [core, pointwise]
914
918
 
@@ -916,7 +920,7 @@
916
920
  structured_delegate: atanh.out
917
921
  variants: function, method
918
922
  dispatch:
919
- SparseCPU, SparseCUDA: atanh_sparse_
923
+ SparseCPU, SparseCUDA, SparseMPS: atanh_sparse_
920
924
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_
921
925
  tags: pointwise
922
926
 
@@ -926,7 +930,7 @@
926
930
  dispatch:
927
931
  CPU, CUDA: atanh_out
928
932
  MPS: atanh_out_mps
929
- SparseCPU, SparseCUDA: atanh_sparse_out
933
+ SparseCPU, SparseCUDA, SparseMPS: atanh_sparse_out
930
934
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_out
931
935
  tags: pointwise
932
936
  # arctanh, alias for atanh
@@ -942,9 +946,8 @@
942
946
  - func: as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a)
943
947
  variants: function, method
944
948
  dispatch:
945
- ZeroTensor, CPU, CUDA: as_strided_tensorimpl
949
+ ZeroTensor, CPU, CUDA, MTIA, MPS: as_strided_tensorimpl
946
950
  Meta: as_strided_tensorimpl_meta_symint
947
- MPS: as_strided_tensorimpl_mps
948
951
  QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
949
952
  device_check: NoCheck
950
953
  device_guard: False
@@ -964,7 +967,7 @@
964
967
  variants: function, method
965
968
  structured_delegate: asin.out
966
969
  dispatch:
967
- SparseCPU, SparseCUDA: asin_sparse
970
+ SparseCPU, SparseCUDA, SparseMPS: asin_sparse
968
971
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr
969
972
  tags: [core, pointwise]
970
973
 
@@ -973,7 +976,7 @@
973
976
  variants: function, method
974
977
  structured_delegate: asin.out
975
978
  dispatch:
976
- SparseCPU, SparseCUDA: asin_sparse_
979
+ SparseCPU, SparseCUDA, SparseMPS: asin_sparse_
977
980
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_
978
981
  tags: pointwise
979
982
 
@@ -982,9 +985,8 @@
982
985
  structured: True
983
986
  structured_inherits: TensorIteratorBase
984
987
  dispatch:
985
- CPU, CUDA: asin_out
986
- MPS: asin_out_mps
987
- SparseCPU, SparseCUDA: asin_sparse_out
988
+ CPU, CUDA, MPS: asin_out
989
+ SparseCPU, SparseCUDA, SparseMPS: asin_sparse_out
988
990
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_out
989
991
  tags: pointwise
990
992
 
@@ -1002,7 +1004,7 @@
1002
1004
  structured_delegate: atan.out
1003
1005
  variants: function, method
1004
1006
  dispatch:
1005
- SparseCPU, SparseCUDA: atan_sparse
1007
+ SparseCPU, SparseCUDA, SparseMPS: atan_sparse
1006
1008
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr
1007
1009
  tags: [core, pointwise]
1008
1010
 
@@ -1011,7 +1013,7 @@
1011
1013
  structured_delegate: atan.out
1012
1014
  variants: function, method
1013
1015
  dispatch:
1014
- SparseCPU, SparseCUDA: atan_sparse_
1016
+ SparseCPU, SparseCUDA, SparseMPS: atan_sparse_
1015
1017
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_
1016
1018
  tags: pointwise
1017
1019
 
@@ -1020,9 +1022,8 @@
1020
1022
  structured: True
1021
1023
  structured_inherits: TensorIteratorBase
1022
1024
  dispatch:
1023
- CPU, CUDA: atan_out
1024
- MPS: atan_out_mps
1025
- SparseCPU, SparseCUDA: atan_sparse_out
1025
+ CPU, CUDA, MPS: atan_out
1026
+ SparseCPU, SparseCUDA, SparseMPS: atan_sparse_out
1026
1027
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_out
1027
1028
  tags: pointwise
1028
1029
 
@@ -1071,8 +1072,19 @@
1071
1072
  CUDA: baddbmm_out_cuda
1072
1073
  MPS: baddbmm_out_mps
1073
1074
  XPU: baddbmm_out_xpu
1075
+ MTIA: baddbmm_out_mtia
1074
1076
  SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
1075
1077
 
1078
+ - func: baddbmm.dtype(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor
1079
+ variants: function
1080
+ dispatch:
1081
+ CUDA: _baddbmm_dtype_cuda
1082
+
1083
+ - func: baddbmm.dtype_out(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
1084
+ variants: function
1085
+ dispatch:
1086
+ CUDA: _baddbmm_out_dtype_cuda
1087
+
1076
1088
  - func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
1077
1089
  dispatch:
1078
1090
  CompositeExplicitAutograd: bartlett_window
@@ -1185,7 +1197,7 @@
1185
1197
  CompositeExplicitAutograd: binary_cross_entropy_with_logits
1186
1198
  autogen: binary_cross_entropy_with_logits.out
1187
1199
 
1188
- - func: bincount(Tensor self, Tensor? weights=None, int minlength=0) -> Tensor
1200
+ - func: bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor
1189
1201
  variants: function, method
1190
1202
  dispatch:
1191
1203
  CPU: _bincount_cpu
@@ -1211,8 +1223,7 @@
1211
1223
  structured: True
1212
1224
  structured_inherits: TensorIteratorBase
1213
1225
  dispatch:
1214
- CPU, CUDA: bitwise_not_out
1215
- MPS: bitwise_not_out_mps
1226
+ CPU, CUDA, MPS, MTIA: bitwise_not_out
1216
1227
  tags: pointwise
1217
1228
 
1218
1229
  - func: copysign.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -1262,7 +1273,7 @@
1262
1273
  variants: function, method
1263
1274
  dispatch:
1264
1275
  CompositeExplicitAutograd: logical_not
1265
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not
1276
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_logical_not
1266
1277
  tags: [core, pointwise]
1267
1278
 
1268
1279
  - func: logical_not_(Tensor(a!) self) -> Tensor(a!)
@@ -1270,13 +1281,13 @@
1270
1281
  variants: method
1271
1282
  dispatch:
1272
1283
  CompositeExplicitAutograd: logical_not_
1273
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not_
1284
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_logical_not_
1274
1285
  tags: pointwise
1275
1286
 
1276
1287
  - func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
1277
1288
  device_check: NoCheck # TensorIterator
1278
1289
  dispatch:
1279
- CPU, CUDA: logical_not_out
1290
+ CPU, CUDA, MTIA: logical_not_out
1280
1291
  MPS: logical_not_out_mps
1281
1292
  tags: pointwise
1282
1293
 
@@ -1318,7 +1329,7 @@
1318
1329
  - func: logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
1319
1330
  device_check: NoCheck # TensorIterator
1320
1331
  dispatch:
1321
- CPU, CUDA: logical_and_out
1332
+ CPU, CUDA, MTIA: logical_and_out
1322
1333
  MPS: logical_and_out_mps
1323
1334
  tags: pointwise
1324
1335
 
@@ -1339,7 +1350,7 @@
1339
1350
  - func: logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
1340
1351
  device_check: NoCheck # TensorIterator
1341
1352
  dispatch:
1342
- CPU, CUDA: logical_or_out
1353
+ CPU, CUDA, MTIA: logical_or_out
1343
1354
  MPS: logical_or_out_mps
1344
1355
  tags: pointwise
1345
1356
 
@@ -1371,10 +1382,21 @@
1371
1382
  CUDA: bmm_out_cuda
1372
1383
  MPS: bmm_out_mps
1373
1384
  XPU: bmm_out_xpu
1385
+ MTIA: bmm_out_mtia
1374
1386
  SparseCPU: bmm_out_sparse_cpu
1375
1387
  SparseCUDA: bmm_out_sparse_cuda
1376
1388
  SparseCsrCUDA: bmm_out_sparse_csr_cuda
1377
1389
 
1390
+ - func: bmm.dtype(Tensor self, Tensor mat2, ScalarType out_dtype) -> Tensor
1391
+ variants: function
1392
+ dispatch:
1393
+ CUDA: _bmm_dtype_cuda
1394
+
1395
+ - func: bmm.dtype_out(Tensor self, Tensor mat2, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
1396
+ variants: function
1397
+ dispatch:
1398
+ CUDA: _bmm_out_dtype_cuda
1399
+
1378
1400
  - func: broadcast_tensors(Tensor[] tensors) -> Tensor[]
1379
1401
  device_check: NoCheck
1380
1402
  device_guard: False
@@ -1394,7 +1416,7 @@
1394
1416
  dispatch:
1395
1417
  SparseCPU, SparseCUDA: cat_sparse
1396
1418
  QuantizedCPU: cat_quantized_cpu
1397
- NestedTensorCPU, NestedTensorCUDA: cat_nested
1419
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: cat_nested
1398
1420
  tags: core
1399
1421
 
1400
1422
  - func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
@@ -1440,7 +1462,7 @@
1440
1462
  structured_delegate: ceil.out
1441
1463
  variants: function, method
1442
1464
  dispatch:
1443
- SparseCPU, SparseCUDA: ceil_sparse
1465
+ SparseCPU, SparseCUDA, SparseMPS: ceil_sparse
1444
1466
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr
1445
1467
  tags: [core, pointwise]
1446
1468
 
@@ -1449,7 +1471,7 @@
1449
1471
  structured_delegate: ceil.out
1450
1472
  variants: function, method
1451
1473
  dispatch:
1452
- SparseCPU, SparseCUDA: ceil_sparse_
1474
+ SparseCPU, SparseCUDA, SparseMPS: ceil_sparse_
1453
1475
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_
1454
1476
  tags: pointwise
1455
1477
 
@@ -1459,7 +1481,7 @@
1459
1481
  structured_inherits: TensorIteratorBase
1460
1482
  dispatch:
1461
1483
  CPU, CUDA, MPS: ceil_out
1462
- SparseCPU, SparseCUDA: ceil_sparse_out
1484
+ SparseCPU, SparseCUDA, SparseMPS: ceil_sparse_out
1463
1485
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
1464
1486
  tags: pointwise
1465
1487
 
@@ -1482,7 +1504,7 @@
1482
1504
  device_guard: False
1483
1505
  dispatch:
1484
1506
  CompositeImplicitAutograd: chunk
1485
- NestedTensorCPU, NestedTensorCUDA: chunk_nested_tensor
1507
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: chunk_nested_tensor
1486
1508
 
1487
1509
  - func: tensor_split.sections(Tensor(a -> *) self, SymInt sections, int dim=0) -> Tensor(a)[]
1488
1510
  variants: function, method
@@ -1529,7 +1551,7 @@
1529
1551
  structured: True
1530
1552
  structured_inherits: TensorIteratorBase
1531
1553
  dispatch:
1532
- CPU, CUDA: clamp_out
1554
+ CPU, CUDA, MTIA: clamp_out
1533
1555
  MPS: clamp_out_mps
1534
1556
  tags: pointwise
1535
1557
 
@@ -1569,7 +1591,7 @@
1569
1591
  structured: True
1570
1592
  structured_inherits: TensorIteratorBase
1571
1593
  dispatch:
1572
- CPU, CUDA: clamp_max_out
1594
+ CPU, CUDA, MTIA: clamp_max_out
1573
1595
  MPS: clamp_max_out_mps
1574
1596
  tags: pointwise
1575
1597
 
@@ -1609,7 +1631,7 @@
1609
1631
  structured: True
1610
1632
  structured_inherits: TensorIteratorBase
1611
1633
  dispatch:
1612
- CPU, CUDA: clamp_min_out
1634
+ CPU, CUDA, MTIA: clamp_min_out
1613
1635
  MPS: clamp_min_out_mps
1614
1636
  tags: pointwise
1615
1637
 
@@ -1658,8 +1680,7 @@
1658
1680
 
1659
1681
  - func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
1660
1682
  dispatch:
1661
- CPU, CUDA: complex_out
1662
- MPS: complex_out_mps
1683
+ CPU, CUDA, MPS: complex_out
1663
1684
 
1664
1685
  - func: polar(Tensor abs, Tensor angle) -> Tensor
1665
1686
  variants: function
@@ -1668,8 +1689,7 @@
1668
1689
 
1669
1690
  - func: polar.out(Tensor abs, Tensor angle, *, Tensor(a!) out) -> Tensor(a!)
1670
1691
  dispatch:
1671
- CPU, CUDA: polar_out
1672
- MPS: polar_out_mps
1692
+ CPU, CUDA, MPS: polar_out
1673
1693
 
1674
1694
  - func: constant_pad_nd(Tensor self, SymInt[] pad, Scalar value=0) -> Tensor
1675
1695
  variants: function
@@ -1781,7 +1801,7 @@
1781
1801
  SparseCPU, SparseCUDA: copy_sparse_wrapper_
1782
1802
  CompositeExplicitAutograd: copy_
1783
1803
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: copy_sparse_compressed_
1784
- NestedTensorCPU, NestedTensorCUDA: copy_nested_
1804
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: copy_nested_
1785
1805
  autogen: copy.out
1786
1806
 
1787
1807
  - func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
@@ -1801,7 +1821,7 @@
1801
1821
  variants: function, method
1802
1822
  structured_delegate: cos.out
1803
1823
  dispatch:
1804
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_cos
1824
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_cos
1805
1825
  tags: [core, pointwise]
1806
1826
 
1807
1827
  - func: cos_(Tensor(a!) self) -> Tensor(a!)
@@ -1815,8 +1835,7 @@
1815
1835
  structured: True
1816
1836
  structured_inherits: TensorIteratorBase
1817
1837
  dispatch:
1818
- CPU, CUDA: cos_out
1819
- MPS: cos_out_mps
1838
+ CPU, CUDA, MPS, MTIA: cos_out
1820
1839
  tags: pointwise
1821
1840
 
1822
1841
  - func: cosh(Tensor self) -> Tensor
@@ -1836,8 +1855,7 @@
1836
1855
  structured: True
1837
1856
  structured_inherits: TensorIteratorBase
1838
1857
  dispatch:
1839
- CPU, CUDA: cosh_out
1840
- MPS: cosh_out_mps
1858
+ CPU, CUDA, MPS: cosh_out
1841
1859
  tags: pointwise
1842
1860
 
1843
1861
  - func: cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor
@@ -1876,7 +1894,10 @@
1876
1894
  - func: cudnn_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor, Tensor)
1877
1895
  dispatch:
1878
1896
  CUDA: cudnn_batch_norm
1879
- autogen: cudnn_batch_norm.out
1897
+
1898
+ - func: cudnn_batch_norm.out(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))
1899
+ dispatch:
1900
+ CUDA: cudnn_batch_norm_out
1880
1901
 
1881
1902
  # NB: You can only use this if you used cudnn_batch_norm training=True
1882
1903
  - func: cudnn_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace) -> (Tensor, Tensor, Tensor)
@@ -1951,6 +1972,7 @@
1951
1972
  dispatch:
1952
1973
  CPU: cummax_helper_cpu
1953
1974
  CUDA: cummax_helper_cuda
1975
+ MPS: cummax_helper_mps
1954
1976
 
1955
1977
  - func: cummin(Tensor self, int dim) -> (Tensor values, Tensor indices)
1956
1978
  device_check: NoCheck # TensorIterator
@@ -1975,6 +1997,7 @@
1975
1997
  dispatch:
1976
1998
  CPU: cummin_helper_cpu
1977
1999
  CUDA: cummin_helper_cuda
2000
+ MPS: cummin_helper_mps
1978
2001
 
1979
2002
  - func: cummaxmin_backward(Tensor grad, Tensor input, Tensor indices, int dim) -> Tensor
1980
2003
  variants: function
@@ -2139,7 +2162,7 @@
2139
2162
  dispatch:
2140
2163
  SparseCPU, SparseCUDA: div_sparse
2141
2164
  ZeroTensor: div_zerotensor
2142
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_div_Tensor
2165
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_div_Tensor
2143
2166
  tags: [core, pointwise]
2144
2167
 
2145
2168
  - func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
@@ -2155,8 +2178,7 @@
2155
2178
  structured: True
2156
2179
  structured_inherits: TensorIteratorBase
2157
2180
  dispatch:
2158
- CPU, CUDA: div_out
2159
- MPS: div_out_mps
2181
+ CPU, CUDA, MPS, MTIA: div_out
2160
2182
  SparseCPU, SparseCUDA: div_out_sparse_zerodim
2161
2183
  tags: pointwise
2162
2184
 
@@ -2181,8 +2203,7 @@
2181
2203
  structured: True
2182
2204
  structured_inherits: TensorIteratorBase
2183
2205
  dispatch:
2184
- CPU, CUDA: div_out_mode
2185
- MPS: div_out_mode_mps
2206
+ CPU, CUDA, MPS: div_out_mode
2186
2207
  SparseCPU, SparseCUDA: div_out_sparse_zerodim
2187
2208
  tags: pointwise
2188
2209
 
@@ -2192,7 +2213,7 @@
2192
2213
  variants: function, method
2193
2214
  dispatch:
2194
2215
  CompositeExplicitAutograd: div
2195
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_div_Scalar
2216
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_div_Scalar
2196
2217
  tags: [core, pointwise]
2197
2218
 
2198
2219
  - func: div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
@@ -2292,7 +2313,7 @@
2292
2313
  - func: embedding(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor
2293
2314
  dispatch:
2294
2315
  CompositeExplicitAutograd: embedding_symint
2295
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_embedding
2316
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_embedding
2296
2317
  autogen: embedding.out
2297
2318
  tags: core
2298
2319
 
@@ -2388,7 +2409,7 @@
2388
2409
  MPS: empty_mps
2389
2410
  Meta: empty_meta_symint
2390
2411
  MkldnnCPU: empty_mkldnn
2391
- SparseCPU, SparseCUDA: empty_sparse
2412
+ SparseCPU, SparseCUDA, SparseMPS: empty_sparse
2392
2413
  SparseMeta: empty_sparse_symint
2393
2414
  SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
2394
2415
  SparseCsrMeta: empty_sparse_compressed_symint
@@ -2498,7 +2519,7 @@
2498
2519
  QuantizedCPU, QuantizedCUDA: empty_like_quantized
2499
2520
  SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
2500
2521
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
2501
- NestedTensorCPU, NestedTensorCUDA: empty_like_nested
2522
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: empty_like_nested
2502
2523
  autogen: empty_like.out
2503
2524
 
2504
2525
  - func: empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -2516,7 +2537,7 @@
2516
2537
  structured_delegate: erf.out
2517
2538
  variants: function, method
2518
2539
  dispatch:
2519
- SparseCPU, SparseCUDA: erf_sparse
2540
+ SparseCPU, SparseCUDA, SparseMPS: erf_sparse
2520
2541
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr
2521
2542
  tags: [core, pointwise]
2522
2543
 
@@ -2525,7 +2546,7 @@
2525
2546
  structured_delegate: erf.out
2526
2547
  variants: function, method
2527
2548
  dispatch:
2528
- SparseCPU, SparseCUDA: erf_sparse_
2549
+ SparseCPU, SparseCUDA, SparseMPS: erf_sparse_
2529
2550
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_
2530
2551
  tags: pointwise
2531
2552
 
@@ -2534,9 +2555,8 @@
2534
2555
  structured: True
2535
2556
  structured_inherits: TensorIteratorBase
2536
2557
  dispatch:
2537
- CPU, CUDA: erf_out
2538
- MPS: erf_out_mps
2539
- SparseCPU, SparseCUDA: erf_sparse_out
2558
+ CPU, CUDA, MPS, MTIA: erf_out
2559
+ SparseCPU, SparseCUDA, SparseMPS: erf_sparse_out
2540
2560
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_out
2541
2561
  tags: pointwise
2542
2562
 
@@ -2557,7 +2577,7 @@
2557
2577
  structured: True
2558
2578
  structured_inherits: TensorIteratorBase
2559
2579
  dispatch:
2560
- CPU, CUDA: erfc_out
2580
+ CPU, CUDA, MPS: erfc_out
2561
2581
  tags: pointwise
2562
2582
 
2563
2583
  - func: exp(Tensor self) -> Tensor
@@ -2577,7 +2597,7 @@
2577
2597
  structured: True
2578
2598
  structured_inherits: TensorIteratorBase
2579
2599
  dispatch:
2580
- CPU, CUDA, MPS: exp_out
2600
+ CPU, CUDA, MPS, MTIA: exp_out
2581
2601
  tags: pointwise
2582
2602
 
2583
2603
  - func: exp2(Tensor self) -> Tensor
@@ -2594,8 +2614,7 @@
2594
2614
  structured: True
2595
2615
  structured_inherits: TensorIteratorBase
2596
2616
  dispatch:
2597
- CPU, CUDA: exp2_out
2598
- MPS: exp2_out_mps
2617
+ CPU, CUDA, MPS: exp2_out
2599
2618
  tags: pointwise
2600
2619
 
2601
2620
  - func: expm1(Tensor self) -> Tensor
@@ -2603,7 +2622,7 @@
2603
2622
  structured_delegate: expm1.out
2604
2623
  variants: function, method
2605
2624
  dispatch:
2606
- SparseCPU, SparseCUDA: expm1_sparse
2625
+ SparseCPU, SparseCUDA, SparseMPS: expm1_sparse
2607
2626
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr
2608
2627
  tags: [core, pointwise]
2609
2628
 
@@ -2612,7 +2631,7 @@
2612
2631
  structured_delegate: expm1.out
2613
2632
  variants: function, method
2614
2633
  dispatch:
2615
- SparseCPU, SparseCUDA: expm1_sparse_
2634
+ SparseCPU, SparseCUDA, SparseMPS: expm1_sparse_
2616
2635
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_
2617
2636
  tags: pointwise
2618
2637
 
@@ -2621,9 +2640,8 @@
2621
2640
  structured: True
2622
2641
  structured_inherits: TensorIteratorBase
2623
2642
  dispatch:
2624
- CPU, CUDA: expm1_out
2625
- MPS: expm1_out_mps
2626
- SparseCPU, SparseCUDA: expm1_sparse_out
2643
+ CPU, CUDA, MPS: expm1_out
2644
+ SparseCPU, SparseCUDA, SparseMPS: expm1_sparse_out
2627
2645
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_out
2628
2646
  tags: pointwise
2629
2647
 
@@ -2703,7 +2721,7 @@
2703
2721
  QuantizedCPU, QuantizedCUDA: fill_quantized_
2704
2722
  Meta: fill_meta_
2705
2723
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: fill_sparse_csr_
2706
- NestedTensorCPU, NestedTensorCUDA: fill_nested_
2724
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: fill_nested_
2707
2725
  autogen: fill.Scalar_out
2708
2726
 
2709
2727
  - func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
@@ -2714,7 +2732,7 @@
2714
2732
  MPS: fill_tensor_mps_
2715
2733
  QuantizedCPU, QuantizedCUDA: fill_quantized_
2716
2734
  Meta: fill_meta_
2717
- NestedTensorCPU, NestedTensorCUDA: fill_nested_
2735
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: fill_nested_
2718
2736
  autogen: fill.Tensor_out
2719
2737
 
2720
2738
  - func: floor(Tensor self) -> Tensor
@@ -2722,7 +2740,7 @@
2722
2740
  structured_delegate: floor.out
2723
2741
  variants: function, method
2724
2742
  dispatch:
2725
- SparseCPU, SparseCUDA: floor_sparse
2743
+ SparseCPU, SparseCUDA, SparseMPS: floor_sparse
2726
2744
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr
2727
2745
  tags: [core, pointwise]
2728
2746
 
@@ -2731,7 +2749,7 @@
2731
2749
  structured_delegate: floor.out
2732
2750
  variants: function, method
2733
2751
  dispatch:
2734
- SparseCPU, SparseCUDA: floor_sparse_
2752
+ SparseCPU, SparseCUDA, SparseMPS: floor_sparse_
2735
2753
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_
2736
2754
  tags: pointwise
2737
2755
 
@@ -2741,7 +2759,7 @@
2741
2759
  structured_inherits: TensorIteratorBase
2742
2760
  dispatch:
2743
2761
  CPU, CUDA, MPS: floor_out
2744
- SparseCPU, SparseCUDA: floor_sparse_out
2762
+ SparseCPU, SparseCUDA, SparseMPS: floor_sparse_out
2745
2763
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
2746
2764
  tags: pointwise
2747
2765
 
@@ -2749,23 +2767,20 @@
2749
2767
  device_check: NoCheck # TensorIterator
2750
2768
  variants: function, method
2751
2769
  dispatch:
2752
- CPU, CUDA: floor_divide
2753
- MPS: floor_divide_mps
2770
+ CPU, CUDA, MPS, MTIA: floor_divide
2754
2771
  SparseCPU, SparseCUDA: floor_divide_sparse
2755
2772
 
2756
2773
  - func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
2757
2774
  device_check: NoCheck # TensorIterator
2758
2775
  variants: method
2759
2776
  dispatch:
2760
- CPU, CUDA: floor_divide_
2761
- MPS: floor_divide_mps_
2777
+ CPU, CUDA, MPS: floor_divide_
2762
2778
  SparseCPU, SparseCUDA: floor_divide_sparse_
2763
2779
 
2764
2780
  - func: floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
2765
2781
  device_check: NoCheck # TensorIterator
2766
2782
  dispatch:
2767
- CPU, CUDA: floor_divide_out
2768
- MPS: floor_divide_out_mps
2783
+ CPU, CUDA, MPS: floor_divide_out
2769
2784
  SparseCPU, SparseCUDA: floor_divide_out_sparse_zerodim
2770
2785
 
2771
2786
  - func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
@@ -2786,7 +2801,7 @@
2786
2801
  structured_delegate: frac.out
2787
2802
  variants: function, method
2788
2803
  dispatch:
2789
- SparseCPU, SparseCUDA: frac_sparse
2804
+ SparseCPU, SparseCUDA, SparseMPS: frac_sparse
2790
2805
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr
2791
2806
  tags: pointwise
2792
2807
 
@@ -2795,7 +2810,7 @@
2795
2810
  structured_delegate: frac.out
2796
2811
  variants: function, method
2797
2812
  dispatch:
2798
- SparseCPU, SparseCUDA: frac_sparse_
2813
+ SparseCPU, SparseCUDA, SparseMPS: frac_sparse_
2799
2814
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_
2800
2815
  tags: pointwise
2801
2816
 
@@ -2806,7 +2821,7 @@
2806
2821
  dispatch:
2807
2822
  CPU, CUDA: frac_out
2808
2823
  MPS: frac_out_mps
2809
- SparseCPU, SparseCUDA: frac_sparse_out
2824
+ SparseCPU, SparseCUDA, SparseMPS: frac_sparse_out
2810
2825
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_out
2811
2826
  tags: pointwise
2812
2827
 
@@ -2919,6 +2934,7 @@
2919
2934
  dispatch:
2920
2935
  CPU: grid_sampler_3d_cpu
2921
2936
  CUDA: grid_sampler_3d_cuda
2937
+ MPS: grid_sampler_3d_mps
2922
2938
  autogen: grid_sampler_3d.out
2923
2939
 
2924
2940
  # `grid_sampler_3d_backward` takes in `output_mask` to optimize performance for
@@ -3100,6 +3116,7 @@
3100
3116
  - dim -> int dim
3101
3117
  dispatch:
3102
3118
  CPU, CUDA: index_copy_out
3119
+ MPS: index_copy_out_mps
3103
3120
 
3104
3121
  - func: index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
3105
3122
  variants: method
@@ -3170,7 +3187,7 @@
3170
3187
  variants: function
3171
3188
  structured: True
3172
3189
  dispatch:
3173
- CPU, CUDA: isin_Tensor_Scalar_out
3190
+ CPU, CUDA, MPS: isin_Tensor_Scalar_out
3174
3191
 
3175
3192
  - func: isin.Tensor_Scalar(Tensor elements, Scalar test_element, *, bool assume_unique=False, bool invert=False) -> Tensor
3176
3193
  variants: function
@@ -3181,6 +3198,7 @@
3181
3198
  structured: True
3182
3199
  dispatch:
3183
3200
  CPU, CUDA: isin_Scalar_Tensor_out
3201
+ MPS: isin_Scalar_Tensor_out_mps
3184
3202
 
3185
3203
  - func: isin.Scalar_Tensor(Scalar element, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor
3186
3204
  variants: function
@@ -3191,9 +3209,9 @@
3191
3209
  device_check: NoCheck
3192
3210
  device_guard: False
3193
3211
  dispatch:
3194
- CPU, CUDA, MPS: isnan
3195
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_isnan
3196
- SparseCPU, SparseCUDA: isnan_sparse
3212
+ CPU, CUDA, MPS, MTIA: isnan
3213
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isnan
3214
+ SparseCPU, SparseCUDA, SparseMPS: isnan_sparse
3197
3215
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isnan_sparse_csr
3198
3216
  autogen: isnan.out
3199
3217
  tags: [core, pointwise]
@@ -3243,7 +3261,7 @@
3243
3261
  device_check: NoCheck
3244
3262
  device_guard: False
3245
3263
  dispatch:
3246
- NestedTensorCPU, NestedTensorCUDA: nested_is_same_size
3264
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_is_same_size
3247
3265
  CompositeExplicitAutograd: is_same_size
3248
3266
 
3249
3267
  - func: is_signed(Tensor self) -> bool
@@ -3265,20 +3283,21 @@
3265
3283
 
3266
3284
  - func: kron.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
3267
3285
 
3268
- - func: kthvalue(Tensor self, int k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
3286
+ - func: kthvalue(Tensor self, SymInt k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
3269
3287
  variants: function, method
3270
3288
  dispatch:
3271
3289
  CompositeExplicitAutograd: kthvalue
3272
3290
 
3273
- - func: kthvalue.values(Tensor self, int k, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
3291
+ - func: kthvalue.values(Tensor self, SymInt k, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
3274
3292
  dispatch:
3275
3293
  CPU: kthvalue_out_cpu
3276
3294
  CUDA: kthvalue_out_cuda
3295
+ MPS: kthvalue_out_mps
3277
3296
 
3278
- - func: kthvalue.dimname(Tensor self, int k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
3297
+ - func: kthvalue.dimname(Tensor self, SymInt k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
3279
3298
  variants: function, method
3280
3299
 
3281
- - func: kthvalue.dimname_out(Tensor self, int k, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
3300
+ - func: kthvalue.dimname_out(Tensor self, SymInt k, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
3282
3301
 
3283
3302
  - func: layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor
3284
3303
  dispatch:
@@ -3290,7 +3309,7 @@
3290
3309
  CUDA: layer_norm_cuda
3291
3310
  MPS: layer_norm_mps
3292
3311
  CompositeExplicitAutograd: math_native_layer_norm
3293
- NestedTensorCPU, NestedTensorCUDA: nested_layer_norm
3312
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_layer_norm
3294
3313
  autogen: native_layer_norm.out
3295
3314
  tags: core
3296
3315
 
@@ -3299,7 +3318,7 @@
3299
3318
  CPU: layer_norm_backward_cpu
3300
3319
  CUDA: layer_norm_backward_cuda
3301
3320
  MPS: layer_norm_backward_mps
3302
- NestedTensorCPU, NestedTensorCUDA: layer_norm_backward_nested
3321
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: layer_norm_backward_nested
3303
3322
  autogen: native_layer_norm_backward.out
3304
3323
  tags: core
3305
3324
 
@@ -3307,37 +3326,47 @@
3307
3326
  dispatch:
3308
3327
  CompositeImplicitAutograd: rms_norm_symint
3309
3328
 
3329
+ - func: _fused_rms_norm(Tensor input, int[] normalized_shape, Tensor? weight, float? eps) -> (Tensor, Tensor)
3330
+ dispatch:
3331
+ CUDA: _fused_rms_norm_cuda
3332
+ MPS: _fused_rms_norm_mps
3333
+ CompositeImplicitAutograd: rms_norm_composite
3334
+
3335
+ - func: _fused_rms_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor rstd, Tensor? weight, bool[2] output_mask) -> (Tensor, Tensor)
3336
+ dispatch:
3337
+ CUDA: _fused_rms_norm_backward_cuda
3338
+
3310
3339
  - func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
3311
3340
  variants: function, method
3312
3341
  dispatch:
3313
3342
  CompositeExplicitAutograd: nan_to_num
3314
- SparseCPU, SparseCUDA: nan_to_num_sparse
3343
+ SparseCPU, SparseCUDA, SparseMPS: nan_to_num_sparse
3315
3344
  tags: pointwise
3316
3345
 
3317
3346
  - func: nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!)
3318
3347
  variants: function, method
3319
3348
  dispatch:
3320
3349
  CompositeExplicitAutograd: nan_to_num_
3321
- SparseCPU, SparseCUDA: nan_to_num_sparse_
3350
+ SparseCPU, SparseCUDA, SparseMPS: nan_to_num_sparse_
3322
3351
  tags: pointwise
3323
3352
 
3324
3353
  - func: nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)
3325
3354
  dispatch:
3326
- CPU, CUDA: nan_to_num_out
3355
+ CPU, CUDA, MTIA: nan_to_num_out
3327
3356
  MPS: nan_to_num_out_mps
3328
- SparseCPU, SparseCUDA: nan_to_num_sparse_out
3357
+ SparseCPU, SparseCUDA, SparseMPS: nan_to_num_sparse_out
3329
3358
  tags: pointwise
3330
3359
 
3331
3360
  - func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
3332
3361
  python_module: nn
3333
3362
  dispatch:
3334
3363
  CompositeImplicitAutograd: linear
3335
- NestedTensorCPU, NestedTensorCUDA: nested_linear
3364
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_linear
3336
3365
  MPS: _mps_linear
3337
3366
 
3338
3367
  - func: linear_backward(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
3339
3368
  dispatch:
3340
- NestedTensorCPU, NestedTensorCUDA: nested_linear_backward
3369
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_linear_backward
3341
3370
  MPS: mps_linear_backward
3342
3371
  autogen: linear_backward.out
3343
3372
 
@@ -3371,7 +3400,7 @@
3371
3400
  dispatch:
3372
3401
  CUDA: _cslt_compress
3373
3402
 
3374
- - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0, int split_k=1, bool split_k_one_kernel=True) -> Tensor
3403
+ - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0, int split_k=1, int split_k_mode=-1) -> Tensor
3375
3404
  dispatch:
3376
3405
  CUDA: _cslt_sparse_mm
3377
3406
  tags: needs_fixed_stride_order
@@ -3421,10 +3450,14 @@
3421
3450
 
3422
3451
  - func: _wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
3423
3452
 
3424
- - func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
3453
+ - func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor? bias) -> Tensor
3454
+
3455
+ - func: fbgemm_linear_fp16_weight_fp32_activation.out(Tensor input, Tensor packed_weight, Tensor? bias, Tensor(a!) output) -> Tensor
3425
3456
 
3426
3457
  - func: fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
3427
3458
 
3459
+ - func: fbgemm_linear_fp16_weight.out(Tensor input, Tensor packed_weight, Tensor bias, Tensor(a!) output) -> Tensor
3460
+
3428
3461
  - func: fbgemm_pack_quantized_matrix(Tensor input) -> Tensor
3429
3462
 
3430
3463
  - func: fbgemm_pack_quantized_matrix.KN(Tensor input, int K, int N) -> Tensor
@@ -3496,8 +3529,7 @@
3496
3529
  structured: True
3497
3530
  structured_inherits: TensorIteratorBase
3498
3531
  dispatch:
3499
- CPU, CUDA: log_out
3500
- MPS: log_out_mps
3532
+ CPU, CUDA, MPS, MTIA: log_out
3501
3533
  tags: pointwise
3502
3534
 
3503
3535
  - func: log10(Tensor self) -> Tensor
@@ -3517,8 +3549,7 @@
3517
3549
  structured: True
3518
3550
  structured_inherits: TensorIteratorBase
3519
3551
  dispatch:
3520
- CPU, CUDA: log10_out
3521
- MPS: log10_out_mps
3552
+ CPU, CUDA, MPS: log10_out
3522
3553
  tags: pointwise
3523
3554
 
3524
3555
  - func: log1p(Tensor self) -> Tensor
@@ -3526,7 +3557,7 @@
3526
3557
  structured_delegate: log1p.out
3527
3558
  variants: function, method
3528
3559
  dispatch:
3529
- SparseCPU, SparseCUDA: log1p_sparse
3560
+ SparseCPU, SparseCUDA, SparseMPS: log1p_sparse
3530
3561
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr
3531
3562
  tags: [core, pointwise]
3532
3563
 
@@ -3535,7 +3566,7 @@
3535
3566
  structured_delegate: log1p.out
3536
3567
  variants: function, method
3537
3568
  dispatch:
3538
- SparseCPU, SparseCUDA: log1p_sparse_
3569
+ SparseCPU, SparseCUDA, SparseMPS: log1p_sparse_
3539
3570
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_
3540
3571
  tags: pointwise
3541
3572
 
@@ -3544,9 +3575,8 @@
3544
3575
  structured: True
3545
3576
  structured_inherits: TensorIteratorBase
3546
3577
  dispatch:
3547
- CPU, CUDA: log1p_out
3548
- MPS: log1p_out_mps
3549
- SparseCPU, SparseCUDA: log1p_sparse_out
3578
+ CPU, CUDA, MPS: log1p_out
3579
+ SparseCPU, SparseCUDA, SparseMPS: log1p_sparse_out
3550
3580
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_out
3551
3581
  tags: pointwise
3552
3582
 
@@ -3567,8 +3597,7 @@
3567
3597
  structured: True
3568
3598
  structured_inherits: TensorIteratorBase
3569
3599
  dispatch:
3570
- CPU, CUDA: log2_out
3571
- MPS: log2_out_mps
3600
+ CPU, CUDA, MPS, MTIA: log2_out
3572
3601
  tags: pointwise
3573
3602
 
3574
3603
  - func: logaddexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -3715,6 +3744,7 @@
3715
3744
  dispatch:
3716
3745
  CPU: log_softmax_cpu_out
3717
3746
  CUDA: log_softmax_cuda_out
3747
+ MTIA: log_softmax_mtia_out
3718
3748
  MPS: log_softmax_mps_out
3719
3749
 
3720
3750
  - func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
@@ -3725,17 +3755,20 @@
3725
3755
  dispatch:
3726
3756
  CPU: log_softmax_backward_cpu_out
3727
3757
  CUDA: log_softmax_backward_cuda_out
3758
+ MTIA: log_softmax_backward_mtia_out
3728
3759
  MPS: log_softmax_backward_mps_out
3729
3760
 
3730
3761
  - func: _logcumsumexp(Tensor self, int dim) -> Tensor
3731
3762
  dispatch:
3732
3763
  CPU: _logcumsumexp_cpu
3733
3764
  CUDA: _logcumsumexp_cuda
3765
+ MPS: _logcumsumexp_mps
3734
3766
 
3735
3767
  - func: _logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
3736
3768
  dispatch:
3737
3769
  CPU: _logcumsumexp_out_cpu
3738
3770
  CUDA: _logcumsumexp_out_cuda
3771
+ MPS: _logcumsumexp_out_mps
3739
3772
 
3740
3773
  - func: logcumsumexp(Tensor self, int dim) -> Tensor
3741
3774
  variants: function, method
@@ -3776,17 +3809,17 @@
3776
3809
  variants: function, method
3777
3810
  dispatch:
3778
3811
  CompositeImplicitAutograd: matmul
3779
- NestedTensorCPU, NestedTensorCUDA: matmul_nested
3812
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_nested
3780
3813
 
3781
3814
  - func: matmul_backward(Tensor grad, Tensor self, Tensor other, bool[2] mask) -> (Tensor, Tensor)
3782
3815
  dispatch:
3783
- NestedTensorCPU, NestedTensorCUDA: matmul_backward_nested
3816
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_backward_nested
3784
3817
  autogen: matmul_backward.out
3785
3818
 
3786
3819
  - func: matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
3787
3820
  dispatch:
3788
3821
  CompositeImplicitAutograd: matmul_out
3789
- NestedTensorCPU, NestedTensorCUDA: matmul_out_nested
3822
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_out_nested
3790
3823
 
3791
3824
  # Alias to linalg.matrix_power
3792
3825
  - func: matrix_power(Tensor self, int n) -> Tensor
@@ -3848,7 +3881,7 @@
3848
3881
  precomputed:
3849
3882
  - dim -> int dim
3850
3883
  dispatch:
3851
- CPU, CUDA: max_out
3884
+ CPU, CUDA, MTIA: max_out
3852
3885
  MPS: max_out_mps
3853
3886
 
3854
3887
  - func: max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
@@ -4004,6 +4037,7 @@
4004
4037
  dispatch:
4005
4038
  CPU: nanmedian_cpu
4006
4039
  CUDA: nanmedian_cuda
4040
+ MPS: nanmedian_mps
4007
4041
  autogen: nanmedian.out
4008
4042
 
4009
4043
  - func: nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
@@ -4015,6 +4049,7 @@
4015
4049
  dispatch:
4016
4050
  CPU: nanmedian_out_cpu
4017
4051
  CUDA: nanmedian_out_cuda
4052
+ MPS: nanmedian_out_mps
4018
4053
 
4019
4054
  - func: nanmedian.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
4020
4055
  variants: function, method
@@ -4035,7 +4070,7 @@
4035
4070
  precomputed:
4036
4071
  - dim -> int dim
4037
4072
  dispatch:
4038
- CPU, CUDA: min_out
4073
+ CPU, CUDA, MTIA: min_out
4039
4074
  MPS: min_out_mps
4040
4075
 
4041
4076
  - func: min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
@@ -4143,20 +4178,31 @@
4143
4178
  dispatch:
4144
4179
  CPU: mm_out_cpu
4145
4180
  CUDA: mm_out_cuda
4181
+ MTIA: mm_out_mtia
4146
4182
  MPS: mm_out_mps
4147
4183
  XPU: mm_out_xpu
4148
4184
  SparseCPU, SparseCUDA: _sparse_mm_out
4149
4185
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm_out
4150
4186
 
4187
+ - func: mm.dtype(Tensor self, Tensor mat2, ScalarType out_dtype) -> Tensor
4188
+ dispatch:
4189
+ CUDA: _mm_dtype_cuda
4190
+
4191
+ - func: mm.dtype_out(Tensor self, Tensor mat2, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
4192
+ dispatch:
4193
+ CUDA: _mm_dtype_out_cuda
4194
+
4151
4195
  - func: _int_mm(Tensor self, Tensor mat2) -> Tensor
4152
4196
  dispatch:
4153
4197
  CPU: _int_mm_cpu
4154
4198
  CUDA: _int_mm_cuda
4199
+ XPU: _int_mm_xpu
4155
4200
 
4156
4201
  - func: _int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
4157
4202
  dispatch:
4158
4203
  CPU: _int_mm_out_cpu
4159
4204
  CUDA: _int_mm_out_cuda
4205
+ XPU: _int_mm_out_xpu
4160
4206
 
4161
4207
  - func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
4162
4208
  dispatch:
@@ -4168,6 +4214,10 @@
4168
4214
  MPS: _weight_int4pack_mm_mps
4169
4215
  CUDA: _weight_int4pack_mm_cuda
4170
4216
 
4217
+ - func: _weight_int4pack_mm_with_scales_and_zeros(Tensor self, Tensor mat2, int qGroupSize, Tensor qScale, Tensor qZeros) -> Tensor
4218
+ dispatch:
4219
+ XPU: _weight_int4pack_mm_xpu
4220
+
4171
4221
  # Split int4 pack weight between cpu and other devices due to
4172
4222
  # https://github.com/pytorch/ao/issues/1117#issuecomment-2451252756.
4173
4223
  - func: _convert_weight_to_int4pack_for_cpu(Tensor self, int innerKTiles) -> Tensor
@@ -4189,6 +4239,7 @@
4189
4239
  - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
4190
4240
  dispatch:
4191
4241
  CPU: _weight_int8pack_mm_cpu
4242
+ CUDA: _weight_int8pack_mm_cuda
4192
4243
  MPS: _weight_int8pack_mm_mps
4193
4244
 
4194
4245
  - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
@@ -4226,7 +4277,7 @@
4226
4277
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr
4227
4278
  MkldnnCPU: mkldnn_mul
4228
4279
  ZeroTensor: mul_zerotensor
4229
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
4280
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul_Tensor
4230
4281
  tags: [core, pointwise]
4231
4282
 
4232
4283
  - func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
@@ -4237,7 +4288,7 @@
4237
4288
  SparseCPU, SparseCUDA: mul_sparse_
4238
4289
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr_
4239
4290
  MkldnnCPU: mkldnn_mul_
4240
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Tensor
4291
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul__Tensor
4241
4292
  tags: pointwise
4242
4293
 
4243
4294
  - func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -4245,8 +4296,7 @@
4245
4296
  structured: True
4246
4297
  structured_inherits: TensorIteratorBase
4247
4298
  dispatch:
4248
- CPU, CUDA: mul_out
4249
- MPS: mul_out_mps
4299
+ CPU, CUDA, MPS, MTIA: mul_out
4250
4300
  SparseCPU: mul_out_sparse_cpu
4251
4301
  SparseCUDA: mul_out_sparse_cuda
4252
4302
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr
@@ -4260,7 +4310,7 @@
4260
4310
  dispatch:
4261
4311
  CompositeExplicitAutograd: mul
4262
4312
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_scalar_sparse_csr
4263
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Scalar
4313
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul_Scalar
4264
4314
  tags: [core, pointwise]
4265
4315
 
4266
4316
  - func: mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
@@ -4269,7 +4319,7 @@
4269
4319
  dispatch:
4270
4320
  CompositeExplicitAutograd: mul_
4271
4321
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul__scalar_sparse_csr
4272
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Scalar
4322
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul__Scalar
4273
4323
  autogen: mul.Scalar_out
4274
4324
  tags: pointwise
4275
4325
  # multiply, alias for mul
@@ -4335,7 +4385,7 @@
4335
4385
  device_guard: False
4336
4386
  dispatch:
4337
4387
  CompositeImplicitAutograd: narrow_symint
4338
- NestedTensorCPU, NestedTensorCUDA: narrow_nested_symint
4388
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: narrow_nested_symint
4339
4389
 
4340
4390
  - func: narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)
4341
4391
  variants: function, method
@@ -4474,7 +4524,7 @@
4474
4524
  # NB: Although this composite mutates on the inside, it is
4475
4525
  # non-differentiable so NonFunctional doesn't apply
4476
4526
  CompositeExplicitAutograd: ones_like
4477
- NestedTensorCPU, NestedTensorCUDA: ones_like
4527
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: ones_like
4478
4528
  autogen: ones_like.out
4479
4529
 
4480
4530
  - func: pairwise_distance(Tensor x1, Tensor x2, float p=2, float eps=1e-06, bool keepdim=False) -> Tensor
@@ -4618,7 +4668,7 @@
4618
4668
  variants: function, method
4619
4669
  dispatch:
4620
4670
  CompositeExplicitAutograd: rad2deg
4621
- SparseCPU, SparseCUDA: rad2deg_sparse
4671
+ SparseCPU, SparseCUDA, SparseMPS: rad2deg_sparse
4622
4672
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr
4623
4673
  tags: pointwise
4624
4674
 
@@ -4626,14 +4676,14 @@
4626
4676
  variants: function, method
4627
4677
  dispatch:
4628
4678
  CompositeExplicitAutograd: rad2deg_
4629
- SparseCPU, SparseCUDA: rad2deg_sparse_
4679
+ SparseCPU, SparseCUDA, SparseMPS: rad2deg_sparse_
4630
4680
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_
4631
4681
  tags: pointwise
4632
4682
 
4633
4683
  - func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4634
4684
  dispatch:
4635
4685
  CompositeExplicitAutograd: rad2deg_out
4636
- SparseCPU, SparseCUDA: rad2deg_sparse_out
4686
+ SparseCPU, SparseCUDA, SparseMPS: rad2deg_sparse_out
4637
4687
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_out
4638
4688
  tags: pointwise
4639
4689
 
@@ -4641,7 +4691,7 @@
4641
4691
  variants: function, method
4642
4692
  dispatch:
4643
4693
  CompositeExplicitAutograd: deg2rad
4644
- SparseCPU, SparseCUDA: deg2rad_sparse
4694
+ SparseCPU, SparseCUDA, SparseMPS: deg2rad_sparse
4645
4695
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr
4646
4696
  tags: pointwise
4647
4697
 
@@ -4649,14 +4699,14 @@
4649
4699
  variants: function, method
4650
4700
  dispatch:
4651
4701
  CompositeExplicitAutograd: deg2rad_
4652
- SparseCPU, SparseCUDA: deg2rad_sparse_
4702
+ SparseCPU, SparseCUDA, SparseMPS: deg2rad_sparse_
4653
4703
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_
4654
4704
  tags: pointwise
4655
4705
 
4656
4706
  - func: deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4657
4707
  dispatch:
4658
4708
  CompositeExplicitAutograd: deg2rad_out
4659
- SparseCPU, SparseCUDA: deg2rad_sparse_out
4709
+ SparseCPU, SparseCUDA, SparseMPS: deg2rad_sparse_out
4660
4710
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_out
4661
4711
  tags: pointwise
4662
4712
 
@@ -4756,6 +4806,14 @@
4756
4806
  CompositeExplicitAutograd: randint_like
4757
4807
  autogen: randint_like.out
4758
4808
 
4809
+ - func: randint_like.Tensor(Tensor self, Tensor high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
4810
+ tags: nondeterministic_seeded
4811
+ dispatch:
4812
+ # NB: Although this composite mutates on the inside, it is
4813
+ # non-differentiable so NonFunctional doesn't apply
4814
+ CompositeExplicitAutograd: randint_like
4815
+ autogen: randint_like.Tensor_out
4816
+
4759
4817
  - func: randint_like.low_dtype(Tensor self, SymInt low, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
4760
4818
  tags: nondeterministic_seeded
4761
4819
  dispatch:
@@ -4865,7 +4923,7 @@
4865
4923
  structured: True
4866
4924
  structured_inherits: TensorIteratorBase
4867
4925
  dispatch:
4868
- CPU, CUDA: reciprocal_out
4926
+ CPU, CUDA, MTIA: reciprocal_out
4869
4927
  MPS: reciprocal_out_mps
4870
4928
  tags: pointwise
4871
4929
 
@@ -4874,9 +4932,9 @@
4874
4932
  structured_delegate: neg.out
4875
4933
  variants: function, method
4876
4934
  dispatch:
4877
- SparseCPU, SparseCUDA: neg_sparse
4935
+ SparseCPU, SparseCUDA, SparseMPS: neg_sparse
4878
4936
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr
4879
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg
4937
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_neg
4880
4938
  tags: [core, pointwise]
4881
4939
 
4882
4940
  - func: neg_(Tensor(a!) self) -> Tensor(a!)
@@ -4884,9 +4942,9 @@
4884
4942
  structured_delegate: neg.out
4885
4943
  variants: function, method
4886
4944
  dispatch:
4887
- SparseCPU, SparseCUDA: neg_sparse_
4945
+ SparseCPU, SparseCUDA, SparseMPS: neg_sparse_
4888
4946
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_
4889
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg_
4947
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_neg_
4890
4948
  tags: pointwise
4891
4949
 
4892
4950
  - func: neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -4894,9 +4952,8 @@
4894
4952
  structured: True
4895
4953
  structured_inherits: TensorIteratorBase
4896
4954
  dispatch:
4897
- CPU, CUDA: neg_out
4898
- MPS: neg_out_mps
4899
- SparseCPU, SparseCUDA: neg_out_sparse
4955
+ CPU, CUDA, MPS, MTIA: neg_out
4956
+ SparseCPU, SparseCUDA, SparseMPS: neg_out_sparse
4900
4957
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_out
4901
4958
  tags: pointwise
4902
4959
  # Alias for neg
@@ -4957,7 +5014,7 @@
4957
5014
  device_check: NoCheck
4958
5015
  device_guard: False
4959
5016
  dispatch:
4960
- CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor, MPS: _reshape_alias
5017
+ CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor, MPS, MTIA: _reshape_alias
4961
5018
  # We don't need to support mkldnn since this is handled explicitly by the reshape operator.
4962
5019
 
4963
5020
  - func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
@@ -4980,7 +5037,7 @@
4980
5037
  structured_delegate: round.out
4981
5038
  variants: function, method
4982
5039
  dispatch:
4983
- SparseCPU, SparseCUDA: round_sparse
5040
+ SparseCPU, SparseCUDA, SparseMPS: round_sparse
4984
5041
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr
4985
5042
  tags: [core, pointwise]
4986
5043
 
@@ -4989,7 +5046,7 @@
4989
5046
  structured_delegate: round.out
4990
5047
  variants: function, method
4991
5048
  dispatch:
4992
- SparseCPU, SparseCUDA: round_sparse_
5049
+ SparseCPU, SparseCUDA, SparseMPS: round_sparse_
4993
5050
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_
4994
5051
  tags: pointwise
4995
5052
 
@@ -4999,7 +5056,7 @@
4999
5056
  structured_inherits: TensorIteratorBase
5000
5057
  dispatch:
5001
5058
  CPU, CUDA, MPS: round_out
5002
- SparseCPU, SparseCUDA: round_sparse_out
5059
+ SparseCPU, SparseCUDA, SparseMPS: round_sparse_out
5003
5060
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
5004
5061
  tags: pointwise
5005
5062
 
@@ -5037,11 +5094,12 @@
5037
5094
  dispatch:
5038
5095
  CPU, CUDA: relu
5039
5096
  MPS: relu_mps
5097
+ MTIA: relu_mtia
5040
5098
  MkldnnCPU: mkldnn_relu
5041
5099
  QuantizedCPU: relu_quantized_cpu
5042
5100
  QuantizedCUDA: relu_quantized_cuda
5043
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu
5044
- SparseCPU, SparseCUDA: relu_sparse
5101
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_relu
5102
+ SparseCPU, SparseCUDA, SparseMPS: relu_sparse
5045
5103
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr
5046
5104
  tags: [core, pointwise]
5047
5105
 
@@ -5051,11 +5109,12 @@
5051
5109
  dispatch:
5052
5110
  CPU, CUDA: relu_
5053
5111
  MPS: relu_mps_
5112
+ MTIA: relu_mtia_
5054
5113
  MkldnnCPU: mkldnn_relu_
5055
5114
  QuantizedCPU: relu_quantized_cpu_
5056
5115
  QuantizedCUDA: relu_quantized_cuda_
5057
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu_
5058
- SparseCPU, SparseCUDA: relu_sparse_
5116
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_relu_
5117
+ SparseCPU, SparseCUDA, SparseMPS: relu_sparse_
5059
5118
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr_
5060
5119
  autogen: relu.out
5061
5120
  tags: pointwise
@@ -5100,7 +5159,7 @@
5100
5159
  python_module: nn
5101
5160
  dispatch:
5102
5161
  QuantizedCPU: gelu_quantized_cpu_
5103
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
5162
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_gelu_
5104
5163
 
5105
5164
  - func: gelu(Tensor self, *, str approximate='none') -> Tensor
5106
5165
  structured_delegate: gelu.out
@@ -5110,7 +5169,7 @@
5110
5169
  MkldnnCPU: mkldnn_gelu
5111
5170
  QuantizedCPU: gelu_quantized_cpu
5112
5171
  QuantizedCUDA: gelu_quantized_cuda
5113
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu
5172
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_gelu
5114
5173
  tags: [core, pointwise]
5115
5174
 
5116
5175
  - func: gelu_backward.grad_input(Tensor grad_output, Tensor self, *, str approximate='none', Tensor(a!) grad_input) -> Tensor(a!)
@@ -5127,7 +5186,7 @@
5127
5186
  python_module: nn
5128
5187
  dispatch:
5129
5188
  MkldnnCPU: mkldnn_gelu_backward
5130
- NestedTensorCPU, NestedTensorCUDA: gelu_backwards_nested
5189
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: gelu_backwards_nested
5131
5190
  tags: pointwise
5132
5191
 
5133
5192
  - func: infinitely_differentiable_gelu_backward(Tensor grad, Tensor self) -> Tensor
@@ -5141,7 +5200,7 @@
5141
5200
  structured_inherits: TensorIteratorBase
5142
5201
  device_check: NoCheck # TensorIterator
5143
5202
  dispatch:
5144
- CPU, CUDA: hardshrink_out
5203
+ CPU, CUDA, MPS: hardshrink_out
5145
5204
 
5146
5205
  - func: hardshrink(Tensor self, Scalar lambd=0.5) -> Tensor
5147
5206
  structured_delegate: hardshrink.out
@@ -5153,7 +5212,7 @@
5153
5212
  structured: True
5154
5213
  structured_inherits: TensorIteratorBase
5155
5214
  dispatch:
5156
- CPU, CUDA: hardshrink_backward_out
5215
+ CPU, CUDA, MPS: hardshrink_backward_out
5157
5216
 
5158
5217
  - func: hardshrink_backward(Tensor grad_out, Tensor self, Scalar lambd) -> Tensor
5159
5218
  structured_delegate: hardshrink_backward.grad_input
@@ -5176,8 +5235,7 @@
5176
5235
  structured: True
5177
5236
  structured_inherits: TensorIteratorBase
5178
5237
  dispatch:
5179
- CPU, CUDA: rsqrt_out
5180
- MPS: rsqrt_out_mps
5238
+ CPU, CUDA, MPS, MTIA: rsqrt_out
5181
5239
  tags: pointwise
5182
5240
 
5183
5241
  - func: select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)
@@ -5192,7 +5250,7 @@
5192
5250
  dispatch:
5193
5251
  CompositeExplicitAutograd: select_symint
5194
5252
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_sparse_csr
5195
- NestedTensorCPU, NestedTensorCUDA: select_nested
5253
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: select_nested
5196
5254
  tags: core
5197
5255
 
5198
5256
  - func: select_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt index) -> Tensor
@@ -5208,7 +5266,7 @@
5208
5266
  device_check: NoCheck
5209
5267
  device_guard: False
5210
5268
  dispatch:
5211
- NestedTensorCPU, NestedTensorCUDA: _nested_select_backward_symint
5269
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_select_backward_symint
5212
5270
 
5213
5271
  - func: selu(Tensor self) -> Tensor
5214
5272
  device_check: NoCheck # TensorIterator
@@ -5233,14 +5291,14 @@
5233
5291
  structured_delegate: silu.out
5234
5292
  python_module: nn
5235
5293
  dispatch:
5236
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
5294
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_silu
5237
5295
  tags: pointwise
5238
5296
 
5239
5297
  - func: silu_(Tensor(a!) self) -> Tensor(a!)
5240
5298
  structured_delegate: silu.out
5241
5299
  python_module: nn
5242
5300
  dispatch:
5243
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
5301
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_silu_
5244
5302
  tags: pointwise
5245
5303
 
5246
5304
  - func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5248,7 +5306,7 @@
5248
5306
  structured_inherits: TensorIteratorBase
5249
5307
  python_module: nn
5250
5308
  dispatch:
5251
- CPU, CUDA: silu_out
5309
+ CPU, CUDA, MTIA: silu_out
5252
5310
  MPS: silu_out_mps
5253
5311
  tags: pointwise
5254
5312
 
@@ -5266,7 +5324,7 @@
5266
5324
  python_module: nn
5267
5325
  dispatch:
5268
5326
  CompositeImplicitAutograd: math_silu_backward
5269
- NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
5327
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: silu_backward_nested
5270
5328
  tags: pointwise
5271
5329
 
5272
5330
  - func: mish(Tensor self) -> Tensor
@@ -5315,14 +5373,13 @@
5315
5373
  structured: True
5316
5374
  structured_inherits: TensorIteratorBase
5317
5375
  dispatch:
5318
- CPU, CUDA: sigmoid_out
5319
- MPS: sigmoid_out_mps
5376
+ CPU, CUDA, MPS: sigmoid_out
5320
5377
  tags: pointwise
5321
5378
 
5322
5379
  - func: logit(Tensor self, float? eps=None) -> Tensor
5323
5380
  variants: function, method
5324
5381
  dispatch:
5325
- CPU, CUDA: logit
5382
+ CPU, CUDA, MTIA: logit
5326
5383
  MPS: logit_mps
5327
5384
  tags: pointwise
5328
5385
 
@@ -5344,8 +5401,8 @@
5344
5401
  variants: function, method
5345
5402
  dispatch:
5346
5403
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr
5347
- SparseCPU, SparseCUDA: sin_sparse
5348
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_sin
5404
+ SparseCPU, SparseCUDA, SparseMPS: sin_sparse
5405
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sin
5349
5406
  tags: [core, pointwise]
5350
5407
 
5351
5408
  - func: sin_(Tensor(a!) self) -> Tensor(a!)
@@ -5354,7 +5411,7 @@
5354
5411
  variants: function, method
5355
5412
  dispatch:
5356
5413
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_
5357
- SparseCPU, SparseCUDA: sin_sparse_
5414
+ SparseCPU, SparseCUDA, SparseMPS: sin_sparse_
5358
5415
  tags: pointwise
5359
5416
 
5360
5417
  - func: sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5362,10 +5419,9 @@
5362
5419
  structured: True
5363
5420
  structured_inherits: TensorIteratorBase
5364
5421
  dispatch:
5365
- CPU, CUDA: sin_out
5366
- MPS: sin_out_mps
5422
+ CPU, CUDA, MPS, MTIA: sin_out
5367
5423
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_out
5368
- SparseCPU, SparseCUDA: sin_sparse_out
5424
+ SparseCPU, SparseCUDA, SparseMPS: sin_sparse_out
5369
5425
  tags: pointwise
5370
5426
 
5371
5427
  - func: sinc(Tensor self) -> Tensor
@@ -5390,7 +5446,7 @@
5390
5446
  structured_delegate: sinh.out
5391
5447
  variants: function, method
5392
5448
  dispatch:
5393
- SparseCPU, SparseCUDA: sinh_sparse
5449
+ SparseCPU, SparseCUDA, SparseMPS: sinh_sparse
5394
5450
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr
5395
5451
  tags: [core, pointwise]
5396
5452
 
@@ -5399,7 +5455,7 @@
5399
5455
  structured_delegate: sinh.out
5400
5456
  variants: function, method
5401
5457
  dispatch:
5402
- SparseCPU, SparseCUDA: sinh_sparse_
5458
+ SparseCPU, SparseCUDA, SparseMPS: sinh_sparse_
5403
5459
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_
5404
5460
  tags: pointwise
5405
5461
 
@@ -5408,9 +5464,8 @@
5408
5464
  structured: True
5409
5465
  structured_inherits: TensorIteratorBase
5410
5466
  dispatch:
5411
- CPU, CUDA: sinh_out
5412
- MPS: sinh_out_mps
5413
- SparseCPU, SparseCUDA: sinh_sparse_out
5467
+ CPU, CUDA, MPS: sinh_out
5468
+ SparseCPU, SparseCUDA, SparseMPS: sinh_sparse_out
5414
5469
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_out
5415
5470
 
5416
5471
  # Returns a copy of this `Variable` that is detached from its autograd graph.
@@ -5429,7 +5484,7 @@
5429
5484
  variants: function, method
5430
5485
  dispatch:
5431
5486
  CompositeExplicitAutograd: detach
5432
- NestedTensorCPU, NestedTensorCUDA: detach
5487
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: detach
5433
5488
 
5434
5489
  # Like `detach()`, but modifies this `Variable` in-place. This method may
5435
5490
  # only be called on non-view `Variable`s. You can use `is_view()` to check
@@ -5458,6 +5513,13 @@
5458
5513
  tags: core
5459
5514
  manual_cpp_binding: True
5460
5515
 
5516
+ - func: sym_is_contiguous(Tensor self, MemoryFormat memory_format=contiguous_format) -> SymBool
5517
+ variants: function
5518
+ device_check: NoCheck
5519
+ device_guard: False
5520
+ tags: core
5521
+ manual_cpp_binding: True
5522
+
5461
5523
  - func: sym_numel(Tensor self) -> SymInt
5462
5524
  variants: function
5463
5525
  device_check: NoCheck
@@ -5559,7 +5621,7 @@
5559
5621
  structured_delegate: _softmax.out
5560
5622
  dispatch:
5561
5623
  MkldnnCPU: mkldnn_softmax
5562
- NestedTensorCPU, NestedTensorCUDA: softmax_nested
5624
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: softmax_nested
5563
5625
  tags: core
5564
5626
 
5565
5627
  - func: _softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)
@@ -5572,7 +5634,7 @@
5572
5634
  - func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
5573
5635
  structured_delegate: _softmax_backward_data.out
5574
5636
  dispatch:
5575
- NestedTensorCPU, NestedTensorCUDA: nested_softmax_backward
5637
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_softmax_backward
5576
5638
 
5577
5639
  - func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!)
5578
5640
  structured: True
@@ -5616,7 +5678,7 @@
5616
5678
  device_guard: False
5617
5679
  dispatch:
5618
5680
  CompositeExplicitAutograd: split_with_sizes
5619
- NestedTensorCPU, NestedTensorCUDA: split_with_sizes_nested
5681
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: split_with_sizes_nested
5620
5682
  tags: core
5621
5683
 
5622
5684
  - func: hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
@@ -5644,7 +5706,7 @@
5644
5706
  dispatch:
5645
5707
  CompositeExplicitAutograd: squeeze
5646
5708
  QuantizedCPU, QuantizedCUDA: squeeze_quantized
5647
- NestedTensorCPU, NestedTensorCUDA: squeeze_nested
5709
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_nested
5648
5710
 
5649
5711
  - func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
5650
5712
  variants: function, method
@@ -5653,7 +5715,7 @@
5653
5715
  dispatch:
5654
5716
  CompositeExplicitAutograd: squeeze
5655
5717
  QuantizedCPU, QuantizedCUDA: squeeze_quantized
5656
- NestedTensorCPU, NestedTensorCUDA: squeeze_dim_nested
5718
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_dim_nested
5657
5719
  tags: core
5658
5720
 
5659
5721
  - func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
@@ -5669,7 +5731,7 @@
5669
5731
  dispatch:
5670
5732
  CompositeExplicitAutograd: squeeze
5671
5733
  QuantizedCPU, QuantizedCUDA: squeeze_quantized
5672
- NestedTensorCPU, NestedTensorCUDA: squeeze_dim_nested
5734
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_dim_nested
5673
5735
  tags: core
5674
5736
 
5675
5737
  - func: squeeze_(Tensor(a!) self) -> Tensor(a!)
@@ -5831,6 +5893,15 @@
5831
5893
  CPU, CUDA: nansum_out
5832
5894
  MPS: nansum_out_mps
5833
5895
 
5896
+ - func: hash_tensor(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0) -> Tensor
5897
+ variants: function, method
5898
+ structured_delegate: hash_tensor.out
5899
+
5900
+ - func: hash_tensor.out(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0, Tensor(a!) out) -> Tensor(a!)
5901
+ structured: True
5902
+ dispatch:
5903
+ CPU, CUDA: hash_tensor_out
5904
+
5834
5905
  - func: sum_to_size(Tensor self, SymInt[] size) -> Tensor
5835
5906
  variants: method
5836
5907
  device_check: NoCheck
@@ -5843,8 +5914,8 @@
5843
5914
  structured_delegate: sqrt.out
5844
5915
  variants: function, method
5845
5916
  dispatch:
5846
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_sqrt
5847
- SparseCPU, SparseCUDA: sqrt_sparse
5917
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sqrt
5918
+ SparseCPU, SparseCUDA, SparseMPS: sqrt_sparse
5848
5919
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr
5849
5920
  tags: [core, pointwise]
5850
5921
 
@@ -5853,7 +5924,7 @@
5853
5924
  structured_delegate: sqrt.out
5854
5925
  variants: function, method
5855
5926
  dispatch:
5856
- SparseCPU, SparseCUDA: sqrt_sparse_
5927
+ SparseCPU, SparseCUDA, SparseMPS: sqrt_sparse_
5857
5928
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_
5858
5929
  tags: pointwise
5859
5930
 
@@ -5862,8 +5933,8 @@
5862
5933
  structured: True
5863
5934
  structured_inherits: TensorIteratorBase
5864
5935
  dispatch:
5865
- CPU, CUDA, MPS: sqrt_out
5866
- SparseCPU, SparseCUDA: sqrt_sparse_out
5936
+ CPU, CUDA, MPS, MTIA: sqrt_out
5937
+ SparseCPU, SparseCUDA, SparseMPS: sqrt_sparse_out
5867
5938
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
5868
5939
  tags: pointwise
5869
5940
 
@@ -6001,7 +6072,7 @@
6001
6072
  structured_delegate: tan.out
6002
6073
  variants: function, method
6003
6074
  dispatch:
6004
- SparseCPU, SparseCUDA: tan_sparse
6075
+ SparseCPU, SparseCUDA, SparseMPS: tan_sparse
6005
6076
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr
6006
6077
  tags: [core, pointwise]
6007
6078
 
@@ -6010,7 +6081,7 @@
6010
6081
  structured_delegate: tan.out
6011
6082
  variants: function, method
6012
6083
  dispatch:
6013
- SparseCPU, SparseCUDA: tan_sparse_
6084
+ SparseCPU, SparseCUDA, SparseMPS: tan_sparse_
6014
6085
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_
6015
6086
  tags: pointwise
6016
6087
 
@@ -6019,9 +6090,8 @@
6019
6090
  structured: True
6020
6091
  structured_inherits: TensorIteratorBase
6021
6092
  dispatch:
6022
- CPU, CUDA: tan_out
6023
- MPS: tan_out_mps
6024
- SparseCPU, SparseCUDA: tan_sparse_out
6093
+ CPU, CUDA, MPS: tan_out
6094
+ SparseCPU, SparseCUDA, SparseMPS: tan_sparse_out
6025
6095
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_out
6026
6096
  tags: pointwise
6027
6097
 
@@ -6032,9 +6102,9 @@
6032
6102
  dispatch:
6033
6103
  QuantizedCPU: tanh_quantized_cpu
6034
6104
  MkldnnCPU: mkldnn_tanh
6035
- SparseCPU, SparseCUDA: tanh_sparse
6105
+ SparseCPU, SparseCUDA, SparseMPS: tanh_sparse
6036
6106
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr
6037
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh
6107
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_tanh
6038
6108
  tags: [core, pointwise]
6039
6109
 
6040
6110
  - func: tanh_(Tensor(a!) self) -> Tensor(a!)
@@ -6043,9 +6113,9 @@
6043
6113
  variants: function, method
6044
6114
  dispatch:
6045
6115
  MkldnnCPU: mkldnn_tanh_
6046
- SparseCPU, SparseCUDA: tanh_sparse_
6116
+ SparseCPU, SparseCUDA, SparseMPS: tanh_sparse_
6047
6117
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_
6048
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh_
6118
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_tanh_
6049
6119
  tags: pointwise
6050
6120
 
6051
6121
  - func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -6053,8 +6123,8 @@
6053
6123
  structured: True
6054
6124
  structured_inherits: TensorIteratorBase
6055
6125
  dispatch:
6056
- CPU, CUDA, MPS: tanh_out
6057
- SparseCPU, SparseCUDA: tanh_sparse_out
6126
+ CPU, CUDA, MPS, MTIA: tanh_out
6127
+ SparseCPU, SparseCUDA, SparseMPS: tanh_sparse_out
6058
6128
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
6059
6129
  tags: pointwise
6060
6130
 
@@ -6102,7 +6172,7 @@
6102
6172
  MkldnnCPU: mkldnn_relu_backward
6103
6173
  SparseCPU, SparseCUDA: threshold_backward_sparse
6104
6174
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed
6105
- NestedTensorCPU, NestedTensorCUDA: threshold_backwards_nested
6175
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: threshold_backwards_nested
6106
6176
  tags: pointwise
6107
6177
 
6108
6178
  - func: tile(Tensor self, SymInt[] dims) -> Tensor
@@ -6116,7 +6186,7 @@
6116
6186
  device_guard: False
6117
6187
  dispatch:
6118
6188
  CompositeExplicitAutograd: transpose
6119
- NestedTensorCPU, NestedTensorCUDA: transpose_nested
6189
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: transpose_nested
6120
6190
 
6121
6191
  - func: transpose.Dimname(Tensor(a) self, Dimname dim0, Dimname dim1) -> Tensor(a)
6122
6192
  variants: function, method
@@ -6213,13 +6283,13 @@
6213
6283
  - func: _nested_tensor_size(Tensor self) -> Tensor
6214
6284
  variants: method
6215
6285
  dispatch:
6216
- NestedTensorCPU, NestedTensorCUDA: _nested_tensor_size
6286
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_tensor_size
6217
6287
  autogen: _nested_tensor_size.out
6218
6288
 
6219
6289
  - func: _nested_tensor_strides(Tensor self) -> Tensor
6220
6290
  variants: method
6221
6291
  dispatch:
6222
- NestedTensorCPU, NestedTensorCUDA: _nested_tensor_strides
6292
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_tensor_strides
6223
6293
  autogen: _nested_tensor_strides.out
6224
6294
 
6225
6295
  - func: _nested_tensor_storage_offsets(Tensor self) -> Tensor
@@ -6232,7 +6302,7 @@
6232
6302
  # _nested_from_padded_and_nested_example is available for testing.
6233
6303
  - func: _nested_from_padded_and_nested_example(Tensor padded, Tensor nt_example) -> Tensor
6234
6304
  dispatch:
6235
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_from_padded_and_nested_example
6305
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_from_padded_and_nested_example
6236
6306
  autogen: _nested_from_padded_and_nested_example.out
6237
6307
 
6238
6308
  # The input arguments' types to this functions are temporary. When nested tensors switch to using SymInts for their metadata representation
@@ -6326,8 +6396,8 @@
6326
6396
  device_check: NoCheck # TensorIterator
6327
6397
  variants: function, method
6328
6398
  dispatch:
6329
- SparseCPU, SparseCUDA: trunc_sparse
6330
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr
6399
+ SparseCPU, SparseCUDA, SparseMPS: trunc_sparse
6400
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: trunc_sparse_csr
6331
6401
  tags: [core, pointwise]
6332
6402
 
6333
6403
  - func: trunc_(Tensor(a!) self) -> Tensor(a!)
@@ -6335,8 +6405,8 @@
6335
6405
  device_check: NoCheck # TensorIterator
6336
6406
  variants: function, method
6337
6407
  dispatch:
6338
- SparseCPU, SparseCUDA: trunc_sparse_
6339
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_
6408
+ SparseCPU, SparseCUDA, SparseMPS: trunc_sparse_
6409
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: trunc_sparse_csr_
6340
6410
  tags: pointwise
6341
6411
 
6342
6412
  - func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -6345,8 +6415,8 @@
6345
6415
  device_check: NoCheck # TensorIterator
6346
6416
  dispatch:
6347
6417
  CPU, CUDA, MPS: trunc_out
6348
- SparseCPU, SparseCUDA: trunc_sparse_out
6349
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
6418
+ SparseCPU, SparseCUDA, SparseMPS: trunc_sparse_out
6419
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: trunc_sparse_csr_out
6350
6420
  tags: pointwise
6351
6421
  # Alias for trunc
6352
6422
 
@@ -6423,7 +6493,7 @@
6423
6493
  CompositeExplicitAutograd: unsqueeze
6424
6494
  SparseCPU, SparseCUDA: unsqueeze_sparse
6425
6495
  QuantizedCPU, QuantizedCUDA: unsqueeze_quantized
6426
- NestedTensorCPU, NestedTensorCUDA: unsqueeze_nested
6496
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: unsqueeze_nested
6427
6497
  tags: core
6428
6498
 
6429
6499
  - func: unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)
@@ -6517,15 +6587,15 @@
6517
6587
  device_check: NoCheck # TensorIterator
6518
6588
  variants: function, method
6519
6589
  dispatch:
6520
- CPU, CUDA, MPS: where
6521
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_where
6590
+ CPU, CUDA, MPS, MTIA: where
6591
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_where
6522
6592
  tags: [core, pointwise]
6523
6593
 
6524
6594
  - func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
6525
6595
  device_check: NoCheck # TensorIterator
6526
6596
  dispatch:
6527
- CPU, CUDA, MPS: where_self_out
6528
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_where_out
6597
+ CPU, CUDA, MPS, MTIA: where_self_out
6598
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_where_out
6529
6599
 
6530
6600
  - func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
6531
6601
  variants: function
@@ -6856,11 +6926,11 @@
6856
6926
  variants: function, method
6857
6927
  dispatch:
6858
6928
  CompositeExplicitAutograd: clone
6859
- SparseCPU, SparseCUDA: clone_sparse
6929
+ SparseCPU, SparseCUDA, SparseMPS: clone_sparse
6860
6930
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: clone_sparse_compressed
6861
6931
  MkldnnCPU: mkldnn_clone
6862
6932
  QuantizedCPU, QuantizedCUDA: quantized_clone
6863
- NestedTensorCPU, NestedTensorCUDA: clone_nested
6933
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: clone_nested
6864
6934
  autogen: clone.out
6865
6935
  tags: [core, pointwise]
6866
6936
 
@@ -6891,10 +6961,10 @@
6891
6961
  CPU, CUDA: zero_
6892
6962
  MPS: zero_mps_
6893
6963
  Meta: zero_meta_
6894
- SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
6964
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: zero_sparse_
6895
6965
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
6896
6966
  MkldnnCPU: mkldnn_zero_
6897
- NestedTensorCPU, NestedTensorCUDA: zero_nested_
6967
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: zero_nested_
6898
6968
  autogen: zero, zero.out
6899
6969
 
6900
6970
  - func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
@@ -6904,6 +6974,7 @@
6904
6974
  dispatch:
6905
6975
  CPU, CUDA: sub_out
6906
6976
  MPS: sub_out_mps
6977
+ MTIA: sub_out_mtia
6907
6978
  SparseCPU, SparseCUDA: sub_out_sparse
6908
6979
  tags: pointwise
6909
6980
 
@@ -6914,7 +6985,7 @@
6914
6985
  dispatch:
6915
6986
  SparseCPU, SparseCUDA: sub_sparse
6916
6987
  ZeroTensor: sub_zerotensor
6917
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_sub_Tensor
6988
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sub_Tensor
6918
6989
  tags: [core, pointwise]
6919
6990
 
6920
6991
  - func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
@@ -6961,7 +7032,7 @@
6961
7032
  device_check: NoCheck # TensorIterator
6962
7033
  variants: function
6963
7034
  dispatch:
6964
- CPU, CUDA: rsub
7035
+ CPU, CUDA, MPS, MTIA: rsub
6965
7036
  autogen: rsub.Tensor_out
6966
7037
 
6967
7038
  - func: heaviside.out(Tensor self, Tensor values, *, Tensor(a!) out) -> Tensor(a!)
@@ -7029,6 +7100,7 @@
7029
7100
  CUDA: addmm_out_cuda
7030
7101
  MPS: addmm_out_mps
7031
7102
  XPU: addmm_out_xpu
7103
+ MTIA: addmm_out_mtia
7032
7104
  SparseCPU: addmm_out_sparse_dense_cpu
7033
7105
  SparseCUDA: addmm_out_sparse_dense_cuda
7034
7106
  SparseCsrCPU: addmm_out_sparse_compressed_cpu
@@ -7043,6 +7115,14 @@
7043
7115
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: addmm_sparse_compressed_dense
7044
7116
  tags: core
7045
7117
 
7118
+ - func: addmm.dtype(Tensor self, Tensor mat1, Tensor mat2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor
7119
+ dispatch:
7120
+ CUDA: _addmm_dtype_cuda
7121
+
7122
+ - func: addmm.dtype_out(Tensor self, Tensor mat1, Tensor mat2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
7123
+ dispatch:
7124
+ CUDA: _addmm_dtype_out_cuda
7125
+
7046
7126
  - func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
7047
7127
  structured_delegate: addmm.out
7048
7128
  variants: method
@@ -7066,18 +7146,29 @@
7066
7146
  - func: _scaled_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
7067
7147
  variants: function
7068
7148
  dispatch:
7149
+ CPU: _scaled_mm_cpu
7069
7150
  CUDA: _scaled_mm_cuda
7151
+ tags: needs_exact_strides
7070
7152
 
7071
7153
  - func: _scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
7072
7154
  variants: function
7073
7155
  dispatch:
7156
+ CPU: _scaled_mm_out_cpu
7074
7157
  CUDA: _scaled_mm_out_cuda
7158
+ tags: needs_exact_strides
7075
7159
 
7076
7160
 
7077
7161
  - func: _scaled_grouped_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? offs=None, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
7078
7162
  variants: function
7079
7163
  dispatch:
7080
7164
  CUDA: _scaled_grouped_mm_cuda
7165
+ tags: needs_exact_strides
7166
+
7167
+ - func: _grouped_mm(Tensor self, Tensor mat2, Tensor? offs=None, Tensor? bias=None, ScalarType? out_dtype=None) -> Tensor
7168
+ variants: function
7169
+ dispatch:
7170
+ CompositeExplicitAutograd: _grouped_mm
7171
+ CUDA: _grouped_mm_cuda
7081
7172
 
7082
7173
  # NOTE [ Sparse: autograd and API ]
7083
7174
  #
@@ -7233,36 +7324,36 @@
7233
7324
  dispatch:
7234
7325
  CompositeImplicitAutograd: _sparse_coo_tensor_unsafe_symint
7235
7326
 
7236
- - func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None) -> ()
7327
+ - func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None, bool? check_pinning=None) -> ()
7237
7328
 
7238
- - func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout) -> ()
7239
- - func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
7240
- - func: _validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
7241
- - func: _validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
7242
- - func: _validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
7329
+ - func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout, bool? check_pinning=None) -> ()
7330
+ - func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
7331
+ - func: _validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
7332
+ - func: _validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
7333
+ - func: _validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
7243
7334
 
7244
7335
  - func: _sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
7245
7336
  dispatch:
7246
- SparseCPU, SparseCUDA, SparseMeta, Meta: new_with_dims_sparse
7337
+ SparseCPU, SparseCUDA, SparseMeta, SparseMPS, Meta: new_with_dims_sparse
7247
7338
  autogen: _sparse_coo_tensor_with_dims.out
7248
7339
 
7249
7340
  - func: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor
7250
7341
  dispatch:
7251
- SparseCPU, SparseCUDA, SparseMeta, Meta: new_with_dims_and_tensor_sparse_symint
7342
+ SparseCPU, SparseCUDA, SparseMeta, SparseMPS, Meta: new_with_dims_and_tensor_sparse_symint
7252
7343
  autogen: _sparse_coo_tensor_with_dims_and_tensors.out
7253
7344
 
7254
7345
  - func: sparse_resize_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)
7255
7346
  use_const_ref_for_mutable_tensors: True
7256
7347
  variants: method
7257
7348
  dispatch:
7258
- SparseCPU, SparseCUDA, SparseMeta: sparse_resize_
7349
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: sparse_resize_
7259
7350
  autogen: sparse_resize, sparse_resize.out
7260
7351
 
7261
7352
  - func: sparse_resize_and_clear_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)
7262
7353
  use_const_ref_for_mutable_tensors: True
7263
7354
  variants: method
7264
7355
  dispatch:
7265
- SparseCPU, SparseCUDA, SparseMeta: sparse_resize_and_clear_
7356
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: sparse_resize_and_clear_
7266
7357
  autogen: sparse_resize_and_clear, sparse_resize_and_clear.out
7267
7358
 
7268
7359
  - func: sparse_mask(Tensor self, Tensor mask) -> Tensor
@@ -7288,8 +7379,8 @@
7288
7379
  - func: _to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor
7289
7380
  variants: method
7290
7381
  dispatch:
7291
- SparseCPU, SparseCUDA: sparse_to_dense
7292
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_dense
7382
+ SparseCPU, SparseCUDA, SparseMPS: sparse_to_dense
7383
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: sparse_compressed_to_dense
7293
7384
  MkldnnCPU: mkldnn_to_dense
7294
7385
  autogen: _to_dense.out
7295
7386
 
@@ -7298,8 +7389,8 @@
7298
7389
  - func: sparse_dim(Tensor self) -> int
7299
7390
  variants: method
7300
7391
  dispatch:
7301
- SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
7302
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
7392
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: sparse_dim_sparse
7393
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: sparse_dim_sparse_csr
7303
7394
  CompositeExplicitAutograd: sparse_dim_default
7304
7395
  device_check: NoCheck
7305
7396
  device_guard: False
@@ -7315,8 +7406,8 @@
7315
7406
  - func: dense_dim(Tensor self) -> int
7316
7407
  variants: method
7317
7408
  dispatch:
7318
- SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
7319
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
7409
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: dense_dim_sparse
7410
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: dense_dim_sparse_csr
7320
7411
  CompositeExplicitAutograd: dense_dim_default
7321
7412
  device_check: NoCheck
7322
7413
  device_guard: False
@@ -7332,8 +7423,8 @@
7332
7423
  - func: _nnz(Tensor self) -> int
7333
7424
  variants: method
7334
7425
  dispatch:
7335
- SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
7336
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
7426
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _nnz_sparse
7427
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: _nnz_sparse_csr
7337
7428
  device_check: NoCheck
7338
7429
  device_guard: False
7339
7430
 
@@ -7349,12 +7440,13 @@
7349
7440
  dispatch:
7350
7441
  SparseCPU: _coalesce_sparse_cpu
7351
7442
  SparseCUDA: _coalesce_sparse_cuda
7443
+ SparseMPS: _coalesce_sparse_mps
7352
7444
  autogen: _coalesce.out
7353
7445
 
7354
7446
  - func: is_coalesced(Tensor self) -> bool
7355
7447
  variants: method
7356
7448
  dispatch:
7357
- SparseCPU, SparseCUDA, SparseMeta: is_coalesced_sparse
7449
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: is_coalesced_sparse
7358
7450
  CompositeExplicitAutograd: is_coalesced_default
7359
7451
  device_check: NoCheck
7360
7452
  device_guard: False
@@ -7362,14 +7454,14 @@
7362
7454
  - func: _indices(Tensor(a) self) -> Tensor(a)
7363
7455
  variants: method
7364
7456
  dispatch:
7365
- SparseCPU, SparseCUDA, SparseMeta: _indices_sparse
7457
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _indices_sparse
7366
7458
  device_check: NoCheck
7367
7459
  device_guard: False
7368
7460
 
7369
7461
  - func: _values(Tensor(a) self) -> Tensor(a)
7370
7462
  variants: method
7371
7463
  dispatch:
7372
- SparseCPU, SparseCUDA, SparseMeta: _values_sparse
7464
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _values_sparse
7373
7465
  device_check: NoCheck
7374
7466
  device_guard: False
7375
7467
 
@@ -7379,7 +7471,7 @@
7379
7471
  - func: _coalesced_(Tensor(a!) self, bool coalesced) -> Tensor(a!)
7380
7472
  variants: method
7381
7473
  dispatch:
7382
- SparseCPU, SparseCUDA, SparseMeta: _coalesced_sparse_
7474
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _coalesced_sparse_
7383
7475
  device_check: NoCheck
7384
7476
  device_guard: False
7385
7477
  autogen: _coalesced, _coalesced.out
@@ -7387,7 +7479,7 @@
7387
7479
  - func: indices(Tensor(a) self) -> Tensor(a)
7388
7480
  variants: method
7389
7481
  dispatch:
7390
- SparseCPU, SparseCUDA, SparseMeta: indices_sparse
7482
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: indices_sparse
7391
7483
  CompositeExplicitAutograd: indices_default
7392
7484
  device_check: NoCheck
7393
7485
  device_guard: False
@@ -7395,9 +7487,9 @@
7395
7487
  - func: values(Tensor(a) self) -> Tensor(a)
7396
7488
  variants: method
7397
7489
  dispatch:
7398
- SparseCPU, SparseCUDA, SparseMeta: values_sparse
7490
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: values_sparse
7399
7491
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
7400
- NestedTensorCPU, NestedTensorCUDA: values_nested
7492
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: values_nested
7401
7493
  CompositeExplicitAutograd: values_default
7402
7494
  device_check: NoCheck
7403
7495
  device_guard: False
@@ -7448,7 +7540,7 @@
7448
7540
  device_check: NoCheck # Allows copy into different device
7449
7541
  variants: function
7450
7542
  dispatch:
7451
- SparseCPU, SparseCUDA, SparseMeta: copy_sparse_
7543
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: copy_sparse_
7452
7544
  autogen: copy_sparse_to_sparse, copy_sparse_to_sparse.out
7453
7545
 
7454
7546
  # By adding the AutogradNestedTensor this makes this function CompositeImplicit-like for nested tensors
@@ -7456,7 +7548,7 @@
7456
7548
  variants: function, method
7457
7549
  dispatch:
7458
7550
  CompositeExplicitAutograd: unbind
7459
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
7551
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_unbind
7460
7552
 
7461
7553
  - func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
7462
7554
  variants: function, method
@@ -7468,9 +7560,9 @@
7468
7560
  - func: _to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
7469
7561
  variants: method
7470
7562
  dispatch:
7471
- CPU, CUDA: dense_to_sparse
7472
- SparseCPU, SparseCUDA: sparse_coo_to_sparse
7473
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7563
+ CPU, CUDA, MPS: dense_to_sparse
7564
+ SparseCPU, SparseCUDA, SparseMPS: sparse_coo_to_sparse
7565
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta, SparseCsrMPS: sparse_compressed_to_sparse
7474
7566
  autogen: _to_sparse.sparse_dim_out
7475
7567
 
7476
7568
  - func: to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
@@ -7480,8 +7572,8 @@
7480
7572
  - func: _to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
7481
7573
  variants: method
7482
7574
  dispatch:
7483
- CPU, CUDA: dense_to_sparse
7484
- SparseCPU, SparseCUDA: sparse_coo_to_sparse
7575
+ CPU, CUDA, MPS: dense_to_sparse
7576
+ SparseCPU, SparseCUDA, SparseMPS: sparse_coo_to_sparse
7485
7577
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7486
7578
  autogen: _to_sparse.out
7487
7579
 
@@ -7744,7 +7836,7 @@
7744
7836
  device_guard: False
7745
7837
  dispatch:
7746
7838
  CompositeExplicitAutograd: _to_copy
7747
- NestedTensorCPU, NestedTensorCUDA: _to_copy_nested
7839
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _to_copy_nested
7748
7840
  autogen: _to_copy.out
7749
7841
  tags: core
7750
7842
 
@@ -8030,7 +8122,7 @@
8030
8122
  variants: function, method
8031
8123
  dispatch:
8032
8124
  CompositeExplicitAutograd: masked_fill
8033
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_masked_fill
8125
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_masked_fill
8034
8126
  tags: pointwise
8035
8127
 
8036
8128
  - func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
@@ -8085,9 +8177,9 @@
8085
8177
  device_check: NoCheck
8086
8178
  device_guard: False
8087
8179
  dispatch:
8088
- ZeroTensor, Meta, CPU, CUDA, QuantizedCPU, QuantizedCUDA, MPS: view
8180
+ ZeroTensor, Meta, CPU, CUDA, QuantizedCPU, QuantizedCUDA, MPS, MTIA: view
8089
8181
  MkldnnCPU: mkldnn_view
8090
- NestedTensorCPU, NestedTensorCUDA: view_nested
8182
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: view_nested
8091
8183
  tags: core
8092
8184
 
8093
8185
  # Warning: If you want to change the name or overload name of this
@@ -8315,7 +8407,7 @@
8315
8407
  structured_inherits: TensorIteratorBase
8316
8408
  variants: function
8317
8409
  dispatch:
8318
- CPU, CUDA: bitwise_and_out
8410
+ CPU, CUDA, MTIA: bitwise_and_out
8319
8411
  MPS: bitwise_and_out_mps
8320
8412
  tags: pointwise
8321
8413
 
@@ -8382,7 +8474,7 @@
8382
8474
  structured_inherits: TensorIteratorBase
8383
8475
  variants: function
8384
8476
  dispatch:
8385
- CPU, CUDA: bitwise_or_out
8477
+ CPU, CUDA, MTIA: bitwise_or_out
8386
8478
  MPS: bitwise_or_out_mps
8387
8479
  tags: pointwise
8388
8480
 
@@ -8854,7 +8946,7 @@
8854
8946
  structured_inherits: TensorIteratorBase
8855
8947
  device_check: NoCheck # TensorIterator
8856
8948
  dispatch:
8857
- CPU, CUDA: ne_Scalar_out
8949
+ CPU, CUDA, MTIA: ne_Scalar_out
8858
8950
  MPS: ne_scalar_out_mps
8859
8951
  QuantizedCPU: ne_out_quantized_cpu
8860
8952
  tags: pointwise
@@ -8872,7 +8964,7 @@
8872
8964
  structured_inherits: TensorIteratorBase
8873
8965
  device_check: NoCheck # TensorIterator
8874
8966
  dispatch:
8875
- CPU, CUDA: ne_Tensor_out
8967
+ CPU, CUDA, MTIA: ne_Tensor_out
8876
8968
  MPS: ne_tensor_out_mps
8877
8969
  QuantizedCPU: ne_out_quantized_cpu
8878
8970
  tags: pointwise
@@ -8917,7 +9009,7 @@
8917
9009
  structured_inherits: TensorIteratorBase
8918
9010
  device_check: NoCheck # TensorIterator
8919
9011
  dispatch:
8920
- CPU, CUDA: eq_Scalar_out
9012
+ CPU, CUDA, MTIA: eq_Scalar_out
8921
9013
  MPS: eq_scalar_out_mps
8922
9014
  QuantizedCPU: eq_out_quantized_cpu
8923
9015
  tags: pointwise
@@ -8928,7 +9020,7 @@
8928
9020
  variants: method, function
8929
9021
  dispatch:
8930
9022
  QuantizedCPU: eq_quantized_cpu
8931
- NestedTensorCPU, NestedTensorCUDA: eq_scalar_nested
9023
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: eq_scalar_nested
8932
9024
  tags: [core, pointwise]
8933
9025
 
8934
9026
  - func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -8936,7 +9028,7 @@
8936
9028
  structured_inherits: TensorIteratorBase
8937
9029
  device_check: NoCheck # TensorIterator
8938
9030
  dispatch:
8939
- CPU, CUDA: eq_Tensor_out
9031
+ CPU, CUDA, MTIA: eq_Tensor_out
8940
9032
  MPS: eq_tensor_out_mps
8941
9033
  QuantizedCPU: eq_out_quantized_cpu
8942
9034
  tags: pointwise
@@ -8947,7 +9039,7 @@
8947
9039
  variants: method, function
8948
9040
  dispatch:
8949
9041
  QuantizedCPU: eq_quantized_cpu
8950
- NestedTensorCPU, NestedTensorCUDA: eq_tensor_nested
9042
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: eq_tensor_nested
8951
9043
  tags: [core, pointwise]
8952
9044
 
8953
9045
  - func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
@@ -8955,7 +9047,7 @@
8955
9047
  structured_inherits: TensorIteratorBase
8956
9048
  device_check: NoCheck # TensorIterator
8957
9049
  dispatch:
8958
- CPU, CUDA: ge_Scalar_out
9050
+ CPU, CUDA, MTIA: ge_Scalar_out
8959
9051
  MPS: ge_scalar_out_mps
8960
9052
  QuantizedCPU: ge_out_quantized_cpu
8961
9053
  tags: pointwise
@@ -8966,7 +9058,7 @@
8966
9058
  variants: method, function
8967
9059
  dispatch:
8968
9060
  QuantizedCPU: ge_quantized_cpu
8969
- NestedTensorCPU, NestedTensorCUDA: ge_scalar_nested
9061
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: ge_scalar_nested
8970
9062
  tags: [core, pointwise]
8971
9063
 
8972
9064
  - func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -8974,7 +9066,7 @@
8974
9066
  structured_inherits: TensorIteratorBase
8975
9067
  device_check: NoCheck # TensorIterator
8976
9068
  dispatch:
8977
- CPU, CUDA: ge_Tensor_out
9069
+ CPU, CUDA, MTIA: ge_Tensor_out
8978
9070
  MPS: ge_tensor_out_mps
8979
9071
  QuantizedCPU: ge_out_quantized_cpu
8980
9072
  tags: pointwise
@@ -9019,7 +9111,7 @@
9019
9111
  structured_inherits: TensorIteratorBase
9020
9112
  device_check: NoCheck # TensorIterator
9021
9113
  dispatch:
9022
- CPU, CUDA: le_Scalar_out
9114
+ CPU, CUDA, MTIA: le_Scalar_out
9023
9115
  MPS: le_scalar_out_mps
9024
9116
  QuantizedCPU: le_out_quantized_cpu
9025
9117
  tags: pointwise
@@ -9037,7 +9129,7 @@
9037
9129
  structured_inherits: TensorIteratorBase
9038
9130
  device_check: NoCheck # TensorIterator
9039
9131
  dispatch:
9040
- CPU, CUDA: le_Tensor_out
9132
+ CPU, CUDA, MTIA: le_Tensor_out
9041
9133
  MPS: le_tensor_out_mps
9042
9134
  QuantizedCPU: le_out_quantized_cpu
9043
9135
  tags: pointwise
@@ -9082,7 +9174,7 @@
9082
9174
  structured_inherits: TensorIteratorBase
9083
9175
  device_check: NoCheck # TensorIterator
9084
9176
  dispatch:
9085
- CPU, CUDA: gt_Scalar_out
9177
+ CPU, CUDA,MTIA: gt_Scalar_out
9086
9178
  MPS: gt_scalar_out_mps
9087
9179
  QuantizedCPU: gt_out_quantized_cpu
9088
9180
  tags: pointwise
@@ -9093,7 +9185,7 @@
9093
9185
  variants: method, function
9094
9186
  dispatch:
9095
9187
  QuantizedCPU: gt_quantized_cpu
9096
- NestedTensorCPU, NestedTensorCUDA: gt_scalar_nested
9188
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: gt_scalar_nested
9097
9189
  tags: [core, pointwise]
9098
9190
 
9099
9191
  - func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -9101,7 +9193,7 @@
9101
9193
  structured_inherits: TensorIteratorBase
9102
9194
  device_check: NoCheck # TensorIterator
9103
9195
  dispatch:
9104
- CPU, CUDA: gt_Tensor_out
9196
+ CPU, CUDA, MTIA: gt_Tensor_out
9105
9197
  MPS: gt_tensor_out_mps
9106
9198
  QuantizedCPU: gt_out_quantized_cpu
9107
9199
  tags: pointwise
@@ -9146,7 +9238,7 @@
9146
9238
  structured_inherits: TensorIteratorBase
9147
9239
  device_check: NoCheck # TensorIterator
9148
9240
  dispatch:
9149
- CPU, CUDA: lt_Scalar_out
9241
+ CPU, CUDA, MTIA: lt_Scalar_out
9150
9242
  MPS: lt_scalar_out_mps
9151
9243
  QuantizedCPU: lt_out_quantized_cpu
9152
9244
  tags: pointwise
@@ -9164,7 +9256,7 @@
9164
9256
  structured_inherits: TensorIteratorBase
9165
9257
  device_check: NoCheck # TensorIterator
9166
9258
  dispatch:
9167
- CPU, CUDA: lt_Tensor_out
9259
+ CPU, CUDA, MTIA: lt_Tensor_out
9168
9260
  MPS: lt_tensor_out_mps
9169
9261
  QuantizedCPU: lt_out_quantized_cpu
9170
9262
  tags: pointwise
@@ -9329,7 +9421,7 @@
9329
9421
  structured_inherits: TensorIteratorBase
9330
9422
  device_check: NoCheck # TensorIterator
9331
9423
  dispatch:
9332
- CPU, CUDA: addcmul_out
9424
+ CPU, CUDA, MTIA: addcmul_out
9333
9425
  MPS: addcmul_out_mps
9334
9426
  tags: pointwise
9335
9427
 
@@ -9350,7 +9442,7 @@
9350
9442
  structured_inherits: TensorIteratorBase
9351
9443
  device_check: NoCheck # TensorIterator
9352
9444
  dispatch:
9353
- CPU, CUDA: addcdiv_out
9445
+ CPU, CUDA, MTIA: addcdiv_out
9354
9446
  MPS: addcdiv_out_mps
9355
9447
  tags: pointwise
9356
9448
 
@@ -9436,14 +9528,12 @@
9436
9528
 
9437
9529
  - func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
9438
9530
  dispatch:
9439
- CPU, CUDA: cholesky_out
9440
- MPS: cholesky_mps_out
9531
+ CPU, CUDA, MPS: cholesky_out
9441
9532
 
9442
9533
  - func: cholesky(Tensor self, bool upper=False) -> Tensor
9443
9534
  variants: method, function
9444
9535
  dispatch:
9445
- CPU, CUDA: cholesky
9446
- MPS: cholesky_mps
9536
+ CPU, CUDA, MPS: cholesky
9447
9537
 
9448
9538
  - func: cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
9449
9539
  dispatch:
@@ -9520,13 +9610,13 @@
9520
9610
  MPS: lu_unpack_out_mps
9521
9611
 
9522
9612
  # TODO: remove dispatch section when porting TH CUDA to ATen
9523
- - func: multinomial.out(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
9613
+ - func: multinomial.out(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
9524
9614
  tags: nondeterministic_seeded
9525
9615
  dispatch:
9526
9616
  CPU, CUDA: multinomial_out
9527
9617
  MPS: multinomial_out_mps
9528
9618
 
9529
- - func: multinomial(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor
9619
+ - func: multinomial(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor
9530
9620
  variants: method, function
9531
9621
  dispatch:
9532
9622
  CPU, CUDA: multinomial
@@ -9641,7 +9731,7 @@
9641
9731
  structured_delegate: sign.out
9642
9732
  variants: function, method
9643
9733
  dispatch:
9644
- SparseCPU, SparseCUDA: sign_sparse
9734
+ SparseCPU, SparseCUDA, SparseMPS: sign_sparse
9645
9735
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr
9646
9736
  tags: [core, pointwise]
9647
9737
 
@@ -9650,7 +9740,7 @@
9650
9740
  structured_delegate: sign.out
9651
9741
  variants: method
9652
9742
  dispatch:
9653
- SparseCPU, SparseCUDA: sign_sparse_
9743
+ SparseCPU, SparseCUDA, SparseMPS: sign_sparse_
9654
9744
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_
9655
9745
  tags: pointwise
9656
9746
 
@@ -9661,7 +9751,7 @@
9661
9751
  dispatch:
9662
9752
  CPU, CUDA: sign_out
9663
9753
  MPS: sign_out_mps
9664
- SparseCPU, SparseCUDA: sign_sparse_out
9754
+ SparseCPU, SparseCUDA, SparseMPS: sign_sparse_out
9665
9755
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_out
9666
9756
  tags: pointwise
9667
9757
 
@@ -9669,7 +9759,7 @@
9669
9759
  variants: function, method
9670
9760
  structured_delegate: signbit.out
9671
9761
  dispatch:
9672
- SparseCPU, SparseCUDA: signbit_sparse
9762
+ SparseCPU, SparseCUDA, SparseMPS: signbit_sparse
9673
9763
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr
9674
9764
  tags: pointwise
9675
9765
 
@@ -9680,7 +9770,7 @@
9680
9770
  CPU: signbit_out
9681
9771
  CUDA: signbit_out
9682
9772
  MPS: signbit_out_mps
9683
- SparseCPU, SparseCUDA: signbit_sparse_out
9773
+ SparseCPU, SparseCUDA, SparseMPS: signbit_sparse_out
9684
9774
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr_out
9685
9775
  tags: pointwise
9686
9776
 
@@ -9727,8 +9817,7 @@
9727
9817
  structured: True
9728
9818
  structured_inherits: TensorIteratorBase
9729
9819
  dispatch:
9730
- CPU, CUDA: lerp_Scalar
9731
- MPS: lerp_Scalar_mps
9820
+ CPU, CUDA, MPS: lerp_Scalar
9732
9821
  tags: pointwise
9733
9822
 
9734
9823
  - func: lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)
@@ -9827,8 +9916,7 @@
9827
9916
  structured: True
9828
9917
  structured_inherits: TensorIteratorBase
9829
9918
  dispatch:
9830
- CPU, CUDA: fmod_out
9831
- MPS: fmod_mps_out
9919
+ CPU, CUDA, MPS, MTIA: fmod_out
9832
9920
  tags: pointwise
9833
9921
 
9834
9922
  - func: fmod.Tensor(Tensor self, Tensor other) -> Tensor
@@ -9865,7 +9953,7 @@
9865
9953
  structured: True
9866
9954
  structured_inherits: TensorIteratorBase
9867
9955
  dispatch:
9868
- CPU, CUDA: igamma_out
9956
+ CPU, CUDA, MPS: igamma_out
9869
9957
  tags: pointwise
9870
9958
 
9871
9959
  - func: igamma(Tensor self, Tensor other) -> Tensor
@@ -9882,7 +9970,7 @@
9882
9970
  structured: True
9883
9971
  structured_inherits: TensorIteratorBase
9884
9972
  dispatch:
9885
- CPU, CUDA: igammac_out
9973
+ CPU, CUDA, MPS: igammac_out
9886
9974
  tags: pointwise
9887
9975
 
9888
9976
  - func: igammac(Tensor self, Tensor other) -> Tensor
@@ -9934,8 +10022,7 @@
9934
10022
  structured: True
9935
10023
  structured_inherits: TensorIteratorBase
9936
10024
  dispatch:
9937
- CPU, CUDA: remainder_out
9938
- MPS: remainder_out_mps
10025
+ CPU, CUDA, MPS, MTIA: remainder_out
9939
10026
  tags: pointwise
9940
10027
 
9941
10028
  - func: remainder.Tensor(Tensor self, Tensor other) -> Tensor
@@ -10019,7 +10106,7 @@
10019
10106
  structured_inherits: TensorIteratorBase
10020
10107
  device_check: NoCheck # TensorIterator
10021
10108
  dispatch:
10022
- CPU, CUDA: maximum_out
10109
+ CPU, CUDA, MTIA: maximum_out
10023
10110
  MPS: maximum_out_mps
10024
10111
  tags: pointwise
10025
10112
 
@@ -10051,7 +10138,7 @@
10051
10138
  structured_inherits: TensorIteratorBase
10052
10139
  device_check: NoCheck # TensorIterator
10053
10140
  dispatch:
10054
- CPU, CUDA: minimum_out
10141
+ CPU, CUDA, MTIA: minimum_out
10055
10142
  MPS: minimum_out_mps
10056
10143
  tags: pointwise
10057
10144
 
@@ -10203,7 +10290,7 @@
10203
10290
  device_check: NoCheck
10204
10291
  device_guard: False
10205
10292
  dispatch:
10206
- CPU, CUDA, Meta, MPS: unfold
10293
+ CPU, CUDA, Meta, MPS, MTIA: unfold
10207
10294
  QuantizedCPU, QuantizedCUDA: unfold
10208
10295
 
10209
10296
  - func: unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor
@@ -10316,7 +10403,7 @@
10316
10403
  MPS: normal_mps_
10317
10404
  Meta: normal_meta_
10318
10405
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: normal_sparse_csr_
10319
- NestedTensorCPU, NestedTensorCUDA: normal_nested_
10406
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: normal_nested_
10320
10407
  autogen: normal.out
10321
10408
 
10322
10409
  # Only used by the functionalization pass.
@@ -10384,7 +10471,7 @@
10384
10471
  variants: method, function
10385
10472
  dispatch:
10386
10473
  CompositeExplicitAutograd: alias
10387
- NestedTensorCPU, NestedTensorCUDA: alias_nested
10474
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: alias_nested
10388
10475
  tags: core
10389
10476
 
10390
10477
  - func: _amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
@@ -10392,6 +10479,7 @@
10392
10479
  dispatch:
10393
10480
  CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
10394
10481
  CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
10482
+ MPS: _amp_foreach_non_finite_check_and_unscale_mps_
10395
10483
  autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
10396
10484
 
10397
10485
  - func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
@@ -10399,6 +10487,7 @@
10399
10487
  dispatch:
10400
10488
  CUDA: _amp_update_scale_cuda_
10401
10489
  CPU: _amp_update_scale_cpu_
10490
+ MPS: _amp_update_scale_mps_
10402
10491
  autogen: _amp_update_scale, _amp_update_scale.out
10403
10492
 
10404
10493
  #- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
@@ -10427,6 +10516,7 @@
10427
10516
  dispatch:
10428
10517
  CompositeExplicitAutograd: foreach_tensor_add_scalar_kernel_slow_
10429
10518
  CUDA: foreach_tensor_add_scalar_kernel_cuda_
10519
+ MTIA: foreach_tensor_add_scalar_kernel_mtia_
10430
10520
  autogen: _foreach_add.Scalar_out
10431
10521
 
10432
10522
  - func: _foreach_add.List(Tensor[] self, Tensor[] other, *, Scalar alpha=1) -> Tensor[]
@@ -10435,6 +10525,7 @@
10435
10525
  dispatch:
10436
10526
  CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow
10437
10527
  CUDA: foreach_tensor_add_list_kernel_cuda
10528
+ MTIA: foreach_tensor_add_list_kernel_mtia
10438
10529
 
10439
10530
  - func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
10440
10531
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10442,6 +10533,7 @@
10442
10533
  dispatch:
10443
10534
  CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow_
10444
10535
  CUDA: foreach_tensor_add_list_kernel_cuda_
10536
+ MTIA: foreach_tensor_add_list_kernel_mtia_
10445
10537
  autogen: _foreach_add.List_out
10446
10538
 
10447
10539
  - func: _foreach_add.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
@@ -10472,6 +10564,7 @@
10472
10564
  dispatch:
10473
10565
  CompositeExplicitAutograd: foreach_tensor_add_tensor_kernel_slow_
10474
10566
  CUDA: foreach_tensor_add_tensor_kernel_cuda_
10567
+ MTIA: foreach_tensor_add_tensor_kernel_mtia_
10475
10568
  autogen: _foreach_add.Tensor_out
10476
10569
 
10477
10570
  - func: _foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
@@ -10532,6 +10625,7 @@
10532
10625
  dispatch:
10533
10626
  CompositeExplicitAutograd: foreach_tensor_mul_scalar_kernel_slow_
10534
10627
  CUDA: foreach_tensor_mul_scalar_kernel_cuda_
10628
+ MTIA: foreach_tensor_mul_scalar_kernel_mtia_
10535
10629
  autogen: _foreach_mul.Scalar_out
10536
10630
 
10537
10631
  - func: _foreach_mul.List(Tensor[] self, Tensor[] other) -> Tensor[]
@@ -10540,6 +10634,7 @@
10540
10634
  dispatch:
10541
10635
  CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow
10542
10636
  CUDA: foreach_tensor_mul_list_kernel_cuda
10637
+ MTIA: foreach_tensor_mul_list_kernel_mtia
10543
10638
 
10544
10639
  - func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
10545
10640
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10547,6 +10642,7 @@
10547
10642
  dispatch:
10548
10643
  CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow_
10549
10644
  CUDA: foreach_tensor_mul_list_kernel_cuda_
10645
+ MTIA: foreach_tensor_mul_list_kernel_mtia_
10550
10646
  autogen: _foreach_mul.List_out
10551
10647
 
10552
10648
  - func: _foreach_mul.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
@@ -10570,6 +10666,7 @@
10570
10666
  dispatch:
10571
10667
  CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow
10572
10668
  CUDA: foreach_tensor_mul_tensor_kernel_cuda
10669
+ MTIA: foreach_tensor_mul_tensor_kernel_mtia
10573
10670
 
10574
10671
  - func: _foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
10575
10672
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10577,6 +10674,7 @@
10577
10674
  dispatch:
10578
10675
  CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow_
10579
10676
  CUDA: foreach_tensor_mul_tensor_kernel_cuda_
10677
+ MTIA: foreach_tensor_mul_tensor_kernel_mtia_
10580
10678
  autogen: _foreach_mul.Tensor_out
10581
10679
 
10582
10680
  - func: _foreach_div.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
@@ -10873,6 +10971,7 @@
10873
10971
  dispatch:
10874
10972
  CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow
10875
10973
  CUDA: foreach_tensor_addcmul_scalar_cuda
10974
+ MTIA: foreach_tensor_addcmul_scalar_mtia
10876
10975
 
10877
10976
  - func: _foreach_addcmul.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
10878
10977
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10894,6 +10993,7 @@
10894
10993
  dispatch:
10895
10994
  CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow_
10896
10995
  CUDA: foreach_tensor_addcmul_scalar_cuda_
10996
+ MTIA: foreach_tensor_addcmul_scalar_mtia_
10897
10997
  autogen: _foreach_addcmul.Scalar_out
10898
10998
 
10899
10999
  - func: _foreach_addcmul_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
@@ -10918,6 +11018,7 @@
10918
11018
  dispatch:
10919
11019
  CompositeExplicitAutograd: foreach_tensor_abs_slow
10920
11020
  CUDA: foreach_tensor_abs_cuda
11021
+ MTIA: foreach_tensor_abs_mtia
10921
11022
 
10922
11023
  - func: _foreach_abs_(Tensor(a!)[] self) -> ()
10923
11024
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10925,6 +11026,7 @@
10925
11026
  dispatch:
10926
11027
  CompositeExplicitAutograd: foreach_tensor_abs_slow_
10927
11028
  CUDA: foreach_tensor_abs_cuda_
11029
+ MTIA: foreach_tensor_abs_mtia_
10928
11030
  autogen: _foreach_abs.out
10929
11031
 
10930
11032
  - func: _foreach_acos(Tensor[] self) -> Tensor[]
@@ -11259,6 +11361,7 @@
11259
11361
  dispatch:
11260
11362
  CompositeExplicitAutograd: foreach_tensor_norm_slow
11261
11363
  CUDA: foreach_tensor_norm_cuda
11364
+ MTIA: foreach_tensor_norm_mtia
11262
11365
  autogen: _foreach_norm.Scalar_out
11263
11366
 
11264
11367
  - func: _foreach_pow.List(Tensor[] self, Tensor[] exponent) -> Tensor[]
@@ -11431,6 +11534,7 @@
11431
11534
  dispatch:
11432
11535
  CompositeExplicitAutograd: foreach_tensor_sqrt_slow_
11433
11536
  CUDA: foreach_tensor_sqrt_cuda_
11537
+ MTIA: foreach_tensor_sqrt_mtia_
11434
11538
  autogen: _foreach_sqrt.out
11435
11539
 
11436
11540
  - func: _foreach_tan(Tensor[] self) -> Tensor[]
@@ -11492,6 +11596,7 @@
11492
11596
  dispatch:
11493
11597
  CompositeExplicitAutograd: foreach_tensor_copy_list_kernel_slow_
11494
11598
  CUDA: foreach_tensor_copy_list_kernel_cuda_
11599
+ MTIA: foreach_tensor_copy_list_kernel_mtia_
11495
11600
  autogen: _foreach_copy.out
11496
11601
 
11497
11602
  - func: _foreach_copy(Tensor[] self, Tensor[] src, bool non_blocking=False) -> Tensor[] self_out
@@ -11499,6 +11604,7 @@
11499
11604
  variants: function
11500
11605
  dispatch:
11501
11606
  CompositeExplicitAutograd: _foreach_copy
11607
+ MTIA: foreach_tensor_copy_list_kernel_mtia
11502
11608
 
11503
11609
  - func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
11504
11610
  dispatch:
@@ -11801,7 +11907,7 @@
11801
11907
  structured_delegate: elu.out
11802
11908
  device_check: NoCheck # TensorIterator
11803
11909
  python_module: nn
11804
- tags: pointwise
11910
+ tags: [core, pointwise]
11805
11911
 
11806
11912
  - func: elu_backward.grad_input(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result, *, Tensor(a!) grad_input) -> Tensor(a!)
11807
11913
  structured: True
@@ -11865,8 +11971,7 @@
11865
11971
  device_check: NoCheck # TensorIterator
11866
11972
  python_module: nn
11867
11973
  dispatch:
11868
- CPU, CUDA: hardsigmoid_out
11869
- MPS: hardsigmoid_out_mps
11974
+ CPU, CUDA, MPS: hardsigmoid_out
11870
11975
  QuantizedCPU: hardsigmoid_out_quantized_cpu
11871
11976
 
11872
11977
  - func: hardsigmoid(Tensor self) -> Tensor
@@ -11887,8 +11992,7 @@
11887
11992
  structured_inherits: TensorIteratorBase
11888
11993
  python_module: nn
11889
11994
  dispatch:
11890
- CPU, CUDA: hardsigmoid_backward_out
11891
- MPS: hardsigmoid_backward_out_mps
11995
+ CPU, CUDA, MPS: hardsigmoid_backward_out
11892
11996
 
11893
11997
  - func: hardsigmoid_backward(Tensor grad_output, Tensor self) -> Tensor
11894
11998
  structured_delegate: hardsigmoid_backward.grad_input
@@ -11932,28 +12036,24 @@
11932
12036
  device_check: NoCheck # TensorIterator
11933
12037
  python_module: nn
11934
12038
  dispatch:
11935
- CPU, CUDA: hardswish_out
11936
- MPS: hardswish_out_mps
12039
+ CPU, CUDA, MPS: hardswish_out
11937
12040
 
11938
12041
  - func: hardswish(Tensor self) -> Tensor
11939
12042
  device_check: NoCheck # TensorIterator
11940
12043
  python_module: nn
11941
12044
  dispatch:
11942
- CPU, CUDA: hardswish
11943
- MPS: hardswish_mps
12045
+ CPU, CUDA, MPS: hardswish
11944
12046
 
11945
12047
  - func: hardswish_(Tensor(a!) self) -> Tensor(a!)
11946
12048
  device_check: NoCheck # TensorIterator
11947
12049
  python_module: nn
11948
12050
  dispatch:
11949
- CPU, CUDA: hardswish_
11950
- MPS: hardswish_mps_
12051
+ CPU, CUDA, MPS: hardswish_
11951
12052
 
11952
12053
  - func: hardswish_backward(Tensor grad_output, Tensor self) -> Tensor
11953
12054
  python_module: nn
11954
12055
  dispatch:
11955
- CPU, CUDA: hardswish_backward
11956
- MPS: hardswish_backward_mps
12056
+ CPU, CUDA, MPS: hardswish_backward
11957
12057
  autogen: hardswish_backward.out
11958
12058
 
11959
12059
  - func: leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)
@@ -11962,8 +12062,7 @@
11962
12062
  device_check: NoCheck # TensorIterator
11963
12063
  python_module: nn
11964
12064
  dispatch:
11965
- CPU, CUDA: leaky_relu_out
11966
- MPS: leaky_relu_out_mps
12065
+ CPU, CUDA, MPS: leaky_relu_out
11967
12066
  QuantizedCPU: leaky_relu_out_quantized_cpu
11968
12067
 
11969
12068
  - func: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor
@@ -11979,8 +12078,7 @@
11979
12078
  structured_inherits: TensorIteratorBase
11980
12079
  python_module: nn
11981
12080
  dispatch:
11982
- CPU, CUDA: leaky_relu_backward_out
11983
- MPS: leaky_relu_backward_out_mps
12081
+ CPU, CUDA, MPS: leaky_relu_backward_out
11984
12082
 
11985
12083
  - func: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor
11986
12084
  structured_delegate: leaky_relu_backward.grad_input
@@ -12092,8 +12190,7 @@
12092
12190
  device_check: NoCheck # TensorIterator
12093
12191
  python_module: nn
12094
12192
  dispatch:
12095
- CPU, CUDA: softshrink_out
12096
- MPS: softshrink_out_mps
12193
+ CPU, CUDA, MPS: softshrink_out
12097
12194
 
12098
12195
  - func: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
12099
12196
  structured_delegate: softshrink.out
@@ -12106,8 +12203,7 @@
12106
12203
  structured_inherits: TensorIteratorBase
12107
12204
  python_module: nn
12108
12205
  dispatch:
12109
- CPU, CUDA: softshrink_backward_out
12110
- MPS: softshrink_backward_out_mps
12206
+ CPU, CUDA, MPS: softshrink_backward_out
12111
12207
 
12112
12208
  - func: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
12113
12209
  structured_delegate: softshrink_backward.grad_input
@@ -12284,6 +12380,7 @@
12284
12380
  dispatch:
12285
12381
  CPU: avg_pool3d_out_cpu
12286
12382
  CUDA: avg_pool3d_out_cuda
12383
+ MPS: avg_pool3d_out_mps
12287
12384
  MkldnnCPU: mkldnn_avg_pool3d_out
12288
12385
 
12289
12386
  - func: avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor
@@ -12300,6 +12397,7 @@
12300
12397
  dispatch:
12301
12398
  CPU: avg_pool3d_backward_out_cpu
12302
12399
  CUDA: avg_pool3d_backward_out_cuda
12400
+ MPS: avg_pool3d_backward_out_mps
12303
12401
  MkldnnCPU: mkldnn_avg_pool3d_backward_out
12304
12402
 
12305
12403
  - func: avg_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor
@@ -12395,6 +12493,7 @@
12395
12493
  dispatch:
12396
12494
  CPU: max_pool3d_with_indices_out_cpu
12397
12495
  CUDA: max_pool3d_with_indices_out_cuda
12496
+ MPS: max_pool3d_with_indices_out_mps
12398
12497
 
12399
12498
  # Return: (Tensor output, Tensor indices)
12400
12499
  - func: max_pool3d_with_indices(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
@@ -12402,6 +12501,7 @@
12402
12501
  dispatch:
12403
12502
  CPU: max_pool3d_with_indices_cpu
12404
12503
  CUDA: max_pool3d_with_indices_cuda
12504
+ MPS: max_pool3d_with_indices_mps
12405
12505
  tags: core
12406
12506
 
12407
12507
  - func: max_pool3d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
@@ -12409,36 +12509,42 @@
12409
12509
  dispatch:
12410
12510
  CPU: max_pool3d_with_indices_backward_out_cpu
12411
12511
  CUDA: max_pool3d_with_indices_backward_out_cuda
12512
+ MPS: max_pool3d_with_indices_backward_out_mps
12412
12513
 
12413
12514
  - func: max_pool3d_with_indices_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices) -> Tensor
12414
12515
  python_module: nn
12415
12516
  dispatch:
12416
12517
  CPU: max_pool3d_with_indices_backward_cpu
12417
12518
  CUDA: max_pool3d_with_indices_backward_cuda
12519
+ MPS: max_pool3d_with_indices_backward_mps
12418
12520
 
12419
12521
  - func: max_unpool2d.out(Tensor self, Tensor indices, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)
12420
12522
  python_module: nn
12421
12523
  dispatch:
12422
12524
  CPU: max_unpooling2d_forward_out_cpu
12423
12525
  CUDA: max_unpooling2d_forward_out_cuda
12526
+ MPS: max_unpooling2d_forward_out_mps
12424
12527
 
12425
12528
  - func: max_unpool2d(Tensor self, Tensor indices, SymInt[2] output_size) -> Tensor
12426
12529
  python_module: nn
12427
12530
  dispatch:
12428
12531
  CPU: max_unpooling2d_forward_cpu
12429
12532
  CUDA: max_unpooling2d_forward_cuda
12533
+ MPS: max_unpooling2d_forward_mps
12430
12534
 
12431
12535
  - func: max_unpool3d.out(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) out) -> Tensor(a!)
12432
12536
  python_module: nn
12433
12537
  dispatch:
12434
12538
  CPU: max_unpooling3d_forward_out_cpu
12435
12539
  CUDA: max_unpooling3d_forward_out_cuda
12540
+ MPS: max_unpooling3d_forward_out_mps
12436
12541
 
12437
12542
  - func: max_unpool3d(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding) -> Tensor
12438
12543
  python_module: nn
12439
12544
  dispatch:
12440
12545
  CPU: max_unpooling3d_forward_cpu
12441
12546
  CUDA: max_unpooling3d_forward_cuda
12547
+ MPS: max_unpooling3d_forward_mps
12442
12548
 
12443
12549
  - func: reflection_pad1d.out(Tensor self, SymInt[2] padding, *, Tensor(a!) out) -> Tensor(a!)
12444
12550
  python_module: nn
@@ -12769,6 +12875,7 @@
12769
12875
  dispatch:
12770
12876
  CPU: _upsample_bicubic2d_aa_out_cpu
12771
12877
  CUDA: _upsample_bicubic2d_aa_out_cuda
12878
+ MPS: _upsample_bicubic2d_aa_out_mps
12772
12879
 
12773
12880
  - func: _upsample_bicubic2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
12774
12881
  python_module: nn
@@ -12791,6 +12898,7 @@
12791
12898
  dispatch:
12792
12899
  CPU: upsample_trilinear3d_out_cpu
12793
12900
  CUDA: upsample_trilinear3d_out_cuda
12901
+ MPS: upsample_trilinear3d_out_mps
12794
12902
 
12795
12903
  - func: upsample_trilinear3d(Tensor self, SymInt[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
12796
12904
  python_module: nn
@@ -12802,6 +12910,7 @@
12802
12910
  dispatch:
12803
12911
  CPU: upsample_trilinear3d_backward_out_cpu
12804
12912
  CUDA: upsample_trilinear3d_backward_out_cuda
12913
+ MPS: upsample_trilinear3d_backward_out_mps
12805
12914
 
12806
12915
  - func: upsample_trilinear3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
12807
12916
  python_module: nn
@@ -12913,6 +13022,7 @@
12913
13022
  dispatch:
12914
13023
  CPU: upsample_nearest3d_out_cpu
12915
13024
  CUDA: upsample_nearest3d_out_cuda
13025
+ MPS: upsample_nearest3d_out_mps
12916
13026
 
12917
13027
  - func: _upsample_nearest_exact3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
12918
13028
  python_module: nn
@@ -12920,6 +13030,7 @@
12920
13030
  dispatch:
12921
13031
  CPU: _upsample_nearest_exact3d_out_cpu
12922
13032
  CUDA: _upsample_nearest_exact3d_out_cuda
13033
+ MPS: _upsample_nearest_exact3d_out_mps
12923
13034
 
12924
13035
  - func: upsample_nearest3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
12925
13036
  python_module: nn
@@ -12939,6 +13050,7 @@
12939
13050
  dispatch:
12940
13051
  CPU: upsample_nearest3d_backward_out_cpu
12941
13052
  CUDA: upsample_nearest3d_backward_out_cuda
13053
+ MPS: upsample_nearest3d_backward_out_mps
12942
13054
 
12943
13055
  - func: _upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
12944
13056
  python_module: nn
@@ -12946,6 +13058,7 @@
12946
13058
  dispatch:
12947
13059
  CPU: _upsample_nearest_exact3d_backward_out_cpu
12948
13060
  CUDA: _upsample_nearest_exact3d_backward_out_cuda
13061
+ MPS: _upsample_nearest_exact3d_backward_out_mps
12949
13062
 
12950
13063
  - func: upsample_nearest3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
12951
13064
  python_module: nn
@@ -12988,7 +13101,7 @@
12988
13101
  structured: True
12989
13102
  structured_inherits: TensorIteratorBase
12990
13103
  dispatch:
12991
- CPU, CUDA: tanh_backward_out
13104
+ CPU, CUDA, MTIA: tanh_backward_out
12992
13105
  MPS: tanh_backward_out_mps
12993
13106
  tags: pointwise
12994
13107
 
@@ -13120,12 +13233,14 @@
13120
13233
  dispatch:
13121
13234
  CPU: col2im_out_cpu
13122
13235
  CUDA: col2im_out_cuda
13236
+ MPS: col2im_out_mps
13123
13237
 
13124
13238
  - func: col2im(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
13125
13239
  python_module: nn
13126
13240
  dispatch:
13127
13241
  CPU: col2im_cpu
13128
13242
  CUDA: col2im_cuda
13243
+ MPS: col2im_mps
13129
13244
  tags: core
13130
13245
 
13131
13246
  - func: column_stack(Tensor[] tensors) -> Tensor
@@ -13158,8 +13273,8 @@
13158
13273
  device_guard: False
13159
13274
  dispatch:
13160
13275
  CompositeExplicitAutograd: isinf
13161
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_isinf
13162
- SparseCPU, SparseCUDA: isinf_sparse
13276
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isinf
13277
+ SparseCPU, SparseCUDA, SparseMPS: isinf_sparse
13163
13278
  SparseMeta: isinf_sparse_meta
13164
13279
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isinf_sparse_csr
13165
13280
  autogen: isinf.out
@@ -13174,8 +13289,8 @@
13174
13289
  variants: function, method
13175
13290
  structured_delegate: isposinf.out
13176
13291
  dispatch:
13177
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_isposinf
13178
- SparseCPU, SparseCUDA: isposinf_sparse
13292
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isposinf
13293
+ SparseCPU, SparseCUDA, SparseMPS: isposinf_sparse
13179
13294
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr
13180
13295
  tags: pointwise
13181
13296
 
@@ -13184,7 +13299,7 @@
13184
13299
  structured_inherits: TensorIteratorBase
13185
13300
  dispatch:
13186
13301
  CPU, CUDA, MPS: isposinf_out
13187
- SparseCPU, SparseCUDA: isposinf_sparse_out
13302
+ SparseCPU, SparseCUDA, SparseMPS: isposinf_sparse_out
13188
13303
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr_out
13189
13304
  tags: pointwise
13190
13305
 
@@ -13192,8 +13307,8 @@
13192
13307
  variants: function, method
13193
13308
  structured_delegate: isneginf.out
13194
13309
  dispatch:
13195
- NestedTensorCPU, NestedTensorCUDA: NestedTensor_isneginf
13196
- SparseCPU, SparseCUDA: isneginf_sparse
13310
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isneginf
13311
+ SparseCPU, SparseCUDA, SparseMPS: isneginf_sparse
13197
13312
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr
13198
13313
  tags: pointwise
13199
13314
 
@@ -13202,7 +13317,7 @@
13202
13317
  structured_inherits: TensorIteratorBase
13203
13318
  dispatch:
13204
13319
  CPU, CUDA, MPS: isneginf_out
13205
- SparseCPU, SparseCUDA: isneginf_sparse_out
13320
+ SparseCPU, SparseCUDA, SparseMPS: isneginf_sparse_out
13206
13321
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr_out
13207
13322
  tags: pointwise
13208
13323
 
@@ -13500,7 +13615,7 @@
13500
13615
  structured: True
13501
13616
  structured_inherits: TensorIteratorBase
13502
13617
  dispatch:
13503
- CPU, CUDA: special_i0e_out
13618
+ CPU, CUDA, MPS: special_i0e_out
13504
13619
  tags: pointwise
13505
13620
 
13506
13621
  - func: special_i1(Tensor self) -> Tensor
@@ -13528,7 +13643,7 @@
13528
13643
  structured: True
13529
13644
  structured_inherits: TensorIteratorBase
13530
13645
  dispatch:
13531
- CPU, CUDA: special_i1e_out
13646
+ CPU, CUDA, MPS: special_i1e_out
13532
13647
  tags: pointwise
13533
13648
 
13534
13649
  - func: special_logit(Tensor self, float? eps=None) -> Tensor
@@ -13897,8 +14012,7 @@
13897
14012
  python_module: linalg
13898
14013
  structured: True
13899
14014
  dispatch:
13900
- CPU, CUDA: linalg_cholesky_ex_out
13901
- MPS: linalg_cholesky_ex_out_mps
14015
+ CPU, CUDA, MPS: linalg_cholesky_ex_out
13902
14016
 
13903
14017
  - func: linalg_cholesky(Tensor self, *, bool upper=False) -> Tensor
13904
14018
  python_module: linalg
@@ -14468,13 +14582,13 @@
14468
14582
  dispatch:
14469
14583
  # the NestedTensor keys are necessary because NestedTensor has been removed
14470
14584
  # from the CompositeExplicitAutograd keyset see Note [NestedTensor Not Included in Backend Keys]
14471
- CompositeExplicitAutograd, NestedTensorCPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_fullcoverage
14585
+ CompositeExplicitAutograd, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_fullcoverage
14472
14586
  autogen: _test_autograd_multiple_dispatch.fullcoverage_out
14473
14587
 
14474
14588
  # Note: this function is only for testing.
14475
14589
  - func: _test_autograd_multiple_dispatch.ntonly(Tensor self, bool b) -> Tensor
14476
14590
  dispatch:
14477
- CompositeImplicitAutograd, NestedTensorCPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_ntonly
14591
+ CompositeImplicitAutograd, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_ntonly
14478
14592
 
14479
14593
  # Note: this function is only for testing.
14480
14594
  - func: _test_autograd_multiple_dispatch_view(Tensor(a) self) -> Tensor(a)
@@ -14819,13 +14933,13 @@
14819
14933
  - func: _safe_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
14820
14934
  dispatch:
14821
14935
  CompositeExplicitAutograd: _safe_softmax
14822
- NestedTensorCPU, NestedTensorCUDA: _safe_softmax
14936
+ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _safe_softmax
14823
14937
 
14824
14938
  # Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
14825
14939
  - func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
14826
14940
  variants: function
14827
14941
  dispatch:
14828
- CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: transformer_encoder_layer_forward
14942
+ CPU, CUDA, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: transformer_encoder_layer_forward
14829
14943
  autogen: _transformer_encoder_layer_fwd.out
14830
14944
 
14831
14945
  - func: _native_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None) -> (Tensor, Tensor)
@@ -14916,6 +15030,7 @@
14916
15030
  - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14917
15031
  dispatch:
14918
15032
  CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
15033
+ NestedTensorCUDA: _scaled_dot_product_cudnn_attention_nestedtensor_backward_cuda
14919
15034
  tags: nondeterministic_seeded
14920
15035
 
14921
15036
  - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
@@ -14948,6 +15063,11 @@
14948
15063
  CUDA: _cudnn_attention_forward
14949
15064
  tags: nondeterministic_seeded
14950
15065
 
15066
+ - func: _cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
15067
+ dispatch:
15068
+ CUDA: _cudnn_attention_backward
15069
+ tags: nondeterministic_seeded
15070
+
14951
15071
  - func: _triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor
14952
15072
  variants: function
14953
15073
  dispatch:
@@ -14990,7 +15110,7 @@
14990
15110
 
14991
15111
  - func: special_bessel_j0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
14992
15112
  dispatch:
14993
- CPU, CUDA: special_bessel_j0_out
15113
+ CPU, CUDA, MPS: special_bessel_j0_out
14994
15114
  python_module: special
14995
15115
  structured_inherits: TensorIteratorBase
14996
15116
  structured: True
@@ -15005,7 +15125,7 @@
15005
15125
 
15006
15126
  - func: special_bessel_j1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
15007
15127
  dispatch:
15008
- CPU, CUDA: special_bessel_j1_out
15128
+ CPU, CUDA, MPS: special_bessel_j1_out
15009
15129
  python_module: special
15010
15130
  structured_inherits: TensorIteratorBase
15011
15131
  structured: True
@@ -15020,7 +15140,7 @@
15020
15140
 
15021
15141
  - func: special_bessel_y0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
15022
15142
  dispatch:
15023
- CPU, CUDA: special_bessel_y0_out
15143
+ CPU, CUDA, MPS: special_bessel_y0_out
15024
15144
  python_module: special
15025
15145
  structured_inherits: TensorIteratorBase
15026
15146
  structured: True
@@ -15035,7 +15155,7 @@
15035
15155
 
15036
15156
  - func: special_bessel_y1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
15037
15157
  dispatch:
15038
- CPU, CUDA: special_bessel_y1_out
15158
+ CPU, CUDA, MPS: special_bessel_y1_out
15039
15159
  python_module: special
15040
15160
  structured_inherits: TensorIteratorBase
15041
15161
  structured: True
@@ -15068,7 +15188,7 @@
15068
15188
  - func: special_chebyshev_polynomial_t.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15069
15189
  device_check: NoCheck
15070
15190
  dispatch:
15071
- CPU, CUDA: special_chebyshev_polynomial_t_out
15191
+ CPU, CUDA, MPS: special_chebyshev_polynomial_t_out
15072
15192
  python_module: special
15073
15193
  structured_inherits: TensorIteratorBase
15074
15194
  structured: True
@@ -15117,7 +15237,7 @@
15117
15237
  - func: special_chebyshev_polynomial_u.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15118
15238
  device_check: NoCheck
15119
15239
  dispatch:
15120
- CPU, CUDA: special_chebyshev_polynomial_u_out
15240
+ CPU, CUDA, MPS: special_chebyshev_polynomial_u_out
15121
15241
  python_module: special
15122
15242
  structured_inherits: TensorIteratorBase
15123
15243
  structured: True
@@ -15166,7 +15286,7 @@
15166
15286
  - func: special_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15167
15287
  device_check: NoCheck
15168
15288
  dispatch:
15169
- CPU, CUDA: special_chebyshev_polynomial_v_out
15289
+ CPU, CUDA, MPS: special_chebyshev_polynomial_v_out
15170
15290
  python_module: special
15171
15291
  structured_inherits: TensorIteratorBase
15172
15292
  structured: True
@@ -15215,7 +15335,7 @@
15215
15335
  - func: special_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15216
15336
  device_check: NoCheck
15217
15337
  dispatch:
15218
- CPU, CUDA: special_chebyshev_polynomial_w_out
15338
+ CPU, CUDA, MPS: special_chebyshev_polynomial_w_out
15219
15339
  python_module: special
15220
15340
  structured_inherits: TensorIteratorBase
15221
15341
  structured: True
@@ -15264,7 +15384,7 @@
15264
15384
  - func: special_hermite_polynomial_h.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15265
15385
  device_check: NoCheck
15266
15386
  dispatch:
15267
- CPU, CUDA: special_hermite_polynomial_h_out
15387
+ CPU, CUDA, MPS: special_hermite_polynomial_h_out
15268
15388
  python_module: special
15269
15389
  structured_inherits: TensorIteratorBase
15270
15390
  structured: True
@@ -15313,7 +15433,7 @@
15313
15433
  - func: special_hermite_polynomial_he.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15314
15434
  device_check: NoCheck
15315
15435
  dispatch:
15316
- CPU, CUDA: special_hermite_polynomial_he_out
15436
+ CPU, CUDA, MPS: special_hermite_polynomial_he_out
15317
15437
  python_module: special
15318
15438
  structured_inherits: TensorIteratorBase
15319
15439
  structured: True
@@ -15442,7 +15562,7 @@
15442
15562
 
15443
15563
  - func: special_modified_bessel_i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
15444
15564
  dispatch:
15445
- CPU, CUDA: special_modified_bessel_i0_out
15565
+ CPU, CUDA, MPS: special_modified_bessel_i0_out
15446
15566
  python_module: special
15447
15567
  structured_inherits: TensorIteratorBase
15448
15568
  structured: True
@@ -15457,7 +15577,7 @@
15457
15577
 
15458
15578
  - func: special_modified_bessel_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
15459
15579
  dispatch:
15460
- CPU, CUDA: special_modified_bessel_i1_out
15580
+ CPU, CUDA, MPS: special_modified_bessel_i1_out
15461
15581
  python_module: special
15462
15582
  structured_inherits: TensorIteratorBase
15463
15583
  structured: True
@@ -15472,7 +15592,7 @@
15472
15592
 
15473
15593
  - func: special_modified_bessel_k0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
15474
15594
  dispatch:
15475
- CPU, CUDA: special_modified_bessel_k0_out
15595
+ CPU, CUDA, MPS: special_modified_bessel_k0_out
15476
15596
  python_module: special
15477
15597
  structured_inherits: TensorIteratorBase
15478
15598
  structured: True
@@ -15487,7 +15607,7 @@
15487
15607
 
15488
15608
  - func: special_modified_bessel_k1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
15489
15609
  dispatch:
15490
- CPU, CUDA: special_modified_bessel_k1_out
15610
+ CPU, CUDA, MPS: special_modified_bessel_k1_out
15491
15611
  python_module: special
15492
15612
  structured_inherits: TensorIteratorBase
15493
15613
  structured: True
@@ -15502,7 +15622,7 @@
15502
15622
 
15503
15623
  - func: special_scaled_modified_bessel_k0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
15504
15624
  dispatch:
15505
- CPU, CUDA: special_scaled_modified_bessel_k0_out
15625
+ CPU, CUDA, MPS: special_scaled_modified_bessel_k0_out
15506
15626
  python_module: special
15507
15627
  structured_inherits: TensorIteratorBase
15508
15628
  structured: True
@@ -15517,7 +15637,7 @@
15517
15637
 
15518
15638
  - func: special_scaled_modified_bessel_k1.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
15519
15639
  dispatch:
15520
- CPU, CUDA: special_scaled_modified_bessel_k1_out
15640
+ CPU, CUDA, MPS: special_scaled_modified_bessel_k1_out
15521
15641
  python_module: special
15522
15642
  structured_inherits: TensorIteratorBase
15523
15643
  structured: True
@@ -15550,7 +15670,7 @@
15550
15670
  - func: special_shifted_chebyshev_polynomial_t.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15551
15671
  device_check: NoCheck
15552
15672
  dispatch:
15553
- CPU, CUDA: special_shifted_chebyshev_polynomial_t_out
15673
+ CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_t_out
15554
15674
  python_module: special
15555
15675
  structured_inherits: TensorIteratorBase
15556
15676
  structured: True
@@ -15599,7 +15719,7 @@
15599
15719
  - func: special_shifted_chebyshev_polynomial_u.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15600
15720
  device_check: NoCheck
15601
15721
  dispatch:
15602
- CPU, CUDA: special_shifted_chebyshev_polynomial_u_out
15722
+ CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_u_out
15603
15723
  python_module: special
15604
15724
  structured_inherits: TensorIteratorBase
15605
15725
  structured: True
@@ -15648,7 +15768,7 @@
15648
15768
  - func: special_shifted_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15649
15769
  device_check: NoCheck
15650
15770
  dispatch:
15651
- CPU, CUDA: special_shifted_chebyshev_polynomial_v_out
15771
+ CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_v_out
15652
15772
  python_module: special
15653
15773
  structured_inherits: TensorIteratorBase
15654
15774
  structured: True
@@ -15697,7 +15817,7 @@
15697
15817
  - func: special_shifted_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15698
15818
  device_check: NoCheck
15699
15819
  dispatch:
15700
- CPU, CUDA: special_shifted_chebyshev_polynomial_w_out
15820
+ CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_w_out
15701
15821
  python_module: special
15702
15822
  structured_inherits: TensorIteratorBase
15703
15823
  structured: True
@@ -15806,8 +15926,17 @@
15806
15926
  variants: function
15807
15927
  dispatch:
15808
15928
  CPU: _fused_adagrad_kernel_cpu_
15929
+ CUDA: _fused_adagrad_kernel_cuda_
15809
15930
  autogen: _fused_adagrad, _fused_adagrad.out
15810
15931
 
15932
+ - func: _fused_adagrad_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor[] state_steps, *, Tensor lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
15933
+ device_check: NoCheck
15934
+ variants: function
15935
+ dispatch:
15936
+ CPU: _fused_adagrad_kernel_cpu_
15937
+ CUDA: _fused_adagrad_kernel_cuda_
15938
+ autogen: _fused_adagrad.tensor_lr, _fused_adagrad.tensor_lr_out
15939
+
15811
15940
  # This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
15812
15941
  - func: _propagate_xla_data(Tensor input, Tensor output) -> ()
15813
15942
  variants: function