torch-rb 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -288,6 +288,7 @@
288
288
  dispatch:
289
289
  CPU: native_dropout_cpu
290
290
  CUDA: native_dropout_cuda
291
+ MPS: native_dropout_mps
291
292
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: native_dropout_nested
292
293
  tags: [nondeterministic_seeded, core]
293
294
  autogen: native_dropout.out
@@ -296,6 +297,7 @@
296
297
  dispatch:
297
298
  CPU, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: native_dropout_backward
298
299
  CUDA: native_dropout_backward_cuda
300
+ MPS: native_dropout_backward_mps
299
301
  autogen: native_dropout_backward.out
300
302
  tags: pointwise
301
303
 
@@ -340,8 +342,8 @@
340
342
  variants: function, method
341
343
  dispatch:
342
344
  CompositeExplicitAutograd: abs
343
- SparseCPU, SparseCUDA: abs_sparse
344
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr
345
+ SparseCPU, SparseCUDA, SparseMPS: abs_sparse
346
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: abs_sparse_csr
345
347
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_abs
346
348
  tags: [core, pointwise]
347
349
 
@@ -350,16 +352,16 @@
350
352
  variants: function, method
351
353
  dispatch:
352
354
  CompositeExplicitAutograd: abs_
353
- SparseCPU, SparseCUDA: abs_sparse_
354
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_
355
+ SparseCPU, SparseCUDA, SparseMPS: abs_sparse_
356
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: abs_sparse_csr_
355
357
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_abs_
356
358
 
357
359
  - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
358
360
  device_check: NoCheck # TensorIterator
359
361
  dispatch:
360
- CPU, CUDA, MPS: abs_out
361
- SparseCPU, SparseCUDA: abs_sparse_out
362
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_out
362
+ CPU, CUDA, MPS, MTIA: abs_out
363
+ SparseCPU, SparseCUDA, SparseMPS: abs_sparse_out
364
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: abs_sparse_csr_out
363
365
  tags: pointwise
364
366
 
365
367
  # Note [Adding an alias]
@@ -428,7 +430,7 @@
428
430
  variants: function, method
429
431
  structured_delegate: sgn.out
430
432
  dispatch:
431
- SparseCPU, SparseCUDA: sgn_sparse
433
+ SparseCPU, SparseCUDA, SparseMPS: sgn_sparse
432
434
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr
433
435
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sgn
434
436
  tags: pointwise
@@ -437,7 +439,7 @@
437
439
  variants: method
438
440
  structured_delegate: sgn.out
439
441
  dispatch:
440
- SparseCPU, SparseCUDA: sgn_sparse_
442
+ SparseCPU, SparseCUDA, SparseMPS: sgn_sparse_
441
443
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_
442
444
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sgn_
443
445
  tags: pointwise
@@ -448,7 +450,7 @@
448
450
  dispatch:
449
451
  CPU, CUDA: sgn_out
450
452
  MPS: sgn_out_mps
451
- SparseCPU, SparseCUDA: sgn_sparse_out
453
+ SparseCPU, SparseCUDA, SparseMPS: sgn_sparse_out
452
454
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_out
453
455
  tags: pointwise
454
456
 
@@ -476,7 +478,7 @@
476
478
  variants: function, method
477
479
  dispatch:
478
480
  CompositeExplicitAutograd: _conj_physical
479
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr
481
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: conj_physical_sparse_csr
480
482
  autogen: _conj_physical.out
481
483
 
482
484
  - func: conj_physical(Tensor self) -> Tensor
@@ -487,8 +489,8 @@
487
489
  dispatch:
488
490
  CPU, CUDA: conj_physical_out
489
491
  MPS: conj_physical_out_mps
490
- SparseCPU, SparseCUDA: conj_physical_out_sparse
491
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_out
492
+ SparseCPU, SparseCUDA, SparseMPS: conj_physical_out_sparse
493
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: conj_physical_sparse_csr_out
492
494
  tags: pointwise
493
495
 
494
496
  - func: conj_physical_(Tensor(a!) self) -> Tensor(a!)
@@ -554,7 +556,7 @@
554
556
  structured_delegate: add.out
555
557
  variants: function, method
556
558
  dispatch:
557
- SparseCPU, SparseCUDA, SparseMeta: add_sparse
559
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: add_sparse
558
560
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr
559
561
  MkldnnCPU: mkldnn_add
560
562
  ZeroTensor: add_zerotensor
@@ -566,7 +568,7 @@
566
568
  variants: method
567
569
  structured_delegate: add.out
568
570
  dispatch:
569
- SparseCPU, SparseCUDA, SparseMeta: add_sparse_
571
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: add_sparse_
570
572
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr_
571
573
  MkldnnCPU: mkldnn_add_
572
574
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_add__Tensor
@@ -582,10 +584,12 @@
582
584
  dispatch:
583
585
  SparseCPU, SparseMeta: add_out_sparse_cpu
584
586
  SparseCUDA: add_out_sparse_cuda
587
+ SparseMPS: add_out_sparse_mps
585
588
  SparseCsrCPU, SparseCsrMeta: add_out_sparse_compressed_cpu
586
589
  SparseCsrCUDA: add_out_sparse_compressed_cuda
587
590
  MkldnnCPU: mkldnn_add_out
588
591
  MPS: add_out_mps
592
+ MTIA: add_out_mtia
589
593
  tags: pointwise
590
594
 
591
595
  - func: _add_relu.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
@@ -718,6 +722,7 @@
718
722
  dispatch:
719
723
  CPU, CUDA: all_out
720
724
  MPS: all_out_mps
725
+ MTIA: all_out_mtia
721
726
 
722
727
  - func: all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
723
728
  device_check: NoCheck # TensorIterator
@@ -807,6 +812,7 @@
807
812
  CPU, Meta: arange_out
808
813
  CUDA: arange_cuda_out
809
814
  MPS: arange_mps_out
815
+ MTIA: arange_mtia_out
810
816
  cpp_no_default_args: ['step']
811
817
 
812
818
  # This function is a temporary hack to allow tracing of arange like constructs with dynamic
@@ -871,7 +877,7 @@
871
877
  variants: function, method
872
878
  structured_delegate: asinh.out
873
879
  dispatch:
874
- SparseCPU, SparseCUDA: asinh_sparse
880
+ SparseCPU, SparseCUDA, SparseMPS: asinh_sparse
875
881
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr
876
882
  tags: [core, pointwise]
877
883
 
@@ -879,7 +885,7 @@
879
885
  variants: function, method
880
886
  structured_delegate: asinh.out
881
887
  dispatch:
882
- SparseCPU, SparseCUDA: asinh_sparse_
888
+ SparseCPU, SparseCUDA, SparseMPS: asinh_sparse_
883
889
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_
884
890
  tags: pointwise
885
891
 
@@ -889,7 +895,7 @@
889
895
  dispatch:
890
896
  CPU, CUDA: asinh_out
891
897
  MPS: asinh_out_mps
892
- SparseCPU, SparseCUDA: asinh_sparse_out
898
+ SparseCPU, SparseCUDA, SparseMPS: asinh_sparse_out
893
899
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_out
894
900
  tags: pointwise
895
901
 
@@ -906,7 +912,7 @@
906
912
  structured_delegate: atanh.out
907
913
  variants: function, method
908
914
  dispatch:
909
- SparseCPU, SparseCUDA: atanh_sparse
915
+ SparseCPU, SparseCUDA, SparseMPS: atanh_sparse
910
916
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr
911
917
  tags: [core, pointwise]
912
918
 
@@ -914,7 +920,7 @@
914
920
  structured_delegate: atanh.out
915
921
  variants: function, method
916
922
  dispatch:
917
- SparseCPU, SparseCUDA: atanh_sparse_
923
+ SparseCPU, SparseCUDA, SparseMPS: atanh_sparse_
918
924
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_
919
925
  tags: pointwise
920
926
 
@@ -924,7 +930,7 @@
924
930
  dispatch:
925
931
  CPU, CUDA: atanh_out
926
932
  MPS: atanh_out_mps
927
- SparseCPU, SparseCUDA: atanh_sparse_out
933
+ SparseCPU, SparseCUDA, SparseMPS: atanh_sparse_out
928
934
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_out
929
935
  tags: pointwise
930
936
  # arctanh, alias for atanh
@@ -940,9 +946,8 @@
940
946
  - func: as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a)
941
947
  variants: function, method
942
948
  dispatch:
943
- ZeroTensor, CPU, CUDA, MTIA: as_strided_tensorimpl
949
+ ZeroTensor, CPU, CUDA, MTIA, MPS: as_strided_tensorimpl
944
950
  Meta: as_strided_tensorimpl_meta_symint
945
- MPS: as_strided_tensorimpl_mps
946
951
  QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
947
952
  device_check: NoCheck
948
953
  device_guard: False
@@ -962,7 +967,7 @@
962
967
  variants: function, method
963
968
  structured_delegate: asin.out
964
969
  dispatch:
965
- SparseCPU, SparseCUDA: asin_sparse
970
+ SparseCPU, SparseCUDA, SparseMPS: asin_sparse
966
971
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr
967
972
  tags: [core, pointwise]
968
973
 
@@ -971,7 +976,7 @@
971
976
  variants: function, method
972
977
  structured_delegate: asin.out
973
978
  dispatch:
974
- SparseCPU, SparseCUDA: asin_sparse_
979
+ SparseCPU, SparseCUDA, SparseMPS: asin_sparse_
975
980
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_
976
981
  tags: pointwise
977
982
 
@@ -981,7 +986,7 @@
981
986
  structured_inherits: TensorIteratorBase
982
987
  dispatch:
983
988
  CPU, CUDA, MPS: asin_out
984
- SparseCPU, SparseCUDA: asin_sparse_out
989
+ SparseCPU, SparseCUDA, SparseMPS: asin_sparse_out
985
990
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_out
986
991
  tags: pointwise
987
992
 
@@ -999,7 +1004,7 @@
999
1004
  structured_delegate: atan.out
1000
1005
  variants: function, method
1001
1006
  dispatch:
1002
- SparseCPU, SparseCUDA: atan_sparse
1007
+ SparseCPU, SparseCUDA, SparseMPS: atan_sparse
1003
1008
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr
1004
1009
  tags: [core, pointwise]
1005
1010
 
@@ -1008,7 +1013,7 @@
1008
1013
  structured_delegate: atan.out
1009
1014
  variants: function, method
1010
1015
  dispatch:
1011
- SparseCPU, SparseCUDA: atan_sparse_
1016
+ SparseCPU, SparseCUDA, SparseMPS: atan_sparse_
1012
1017
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_
1013
1018
  tags: pointwise
1014
1019
 
@@ -1018,7 +1023,7 @@
1018
1023
  structured_inherits: TensorIteratorBase
1019
1024
  dispatch:
1020
1025
  CPU, CUDA, MPS: atan_out
1021
- SparseCPU, SparseCUDA: atan_sparse_out
1026
+ SparseCPU, SparseCUDA, SparseMPS: atan_sparse_out
1022
1027
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_out
1023
1028
  tags: pointwise
1024
1029
 
@@ -1067,6 +1072,7 @@
1067
1072
  CUDA: baddbmm_out_cuda
1068
1073
  MPS: baddbmm_out_mps
1069
1074
  XPU: baddbmm_out_xpu
1075
+ MTIA: baddbmm_out_mtia
1070
1076
  SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
1071
1077
 
1072
1078
  - func: baddbmm.dtype(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor
@@ -1281,7 +1287,7 @@
1281
1287
  - func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
1282
1288
  device_check: NoCheck # TensorIterator
1283
1289
  dispatch:
1284
- CPU, CUDA: logical_not_out
1290
+ CPU, CUDA, MTIA: logical_not_out
1285
1291
  MPS: logical_not_out_mps
1286
1292
  tags: pointwise
1287
1293
 
@@ -1376,6 +1382,7 @@
1376
1382
  CUDA: bmm_out_cuda
1377
1383
  MPS: bmm_out_mps
1378
1384
  XPU: bmm_out_xpu
1385
+ MTIA: bmm_out_mtia
1379
1386
  SparseCPU: bmm_out_sparse_cpu
1380
1387
  SparseCUDA: bmm_out_sparse_cuda
1381
1388
  SparseCsrCUDA: bmm_out_sparse_csr_cuda
@@ -1455,7 +1462,7 @@
1455
1462
  structured_delegate: ceil.out
1456
1463
  variants: function, method
1457
1464
  dispatch:
1458
- SparseCPU, SparseCUDA: ceil_sparse
1465
+ SparseCPU, SparseCUDA, SparseMPS: ceil_sparse
1459
1466
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr
1460
1467
  tags: [core, pointwise]
1461
1468
 
@@ -1464,7 +1471,7 @@
1464
1471
  structured_delegate: ceil.out
1465
1472
  variants: function, method
1466
1473
  dispatch:
1467
- SparseCPU, SparseCUDA: ceil_sparse_
1474
+ SparseCPU, SparseCUDA, SparseMPS: ceil_sparse_
1468
1475
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_
1469
1476
  tags: pointwise
1470
1477
 
@@ -1474,7 +1481,7 @@
1474
1481
  structured_inherits: TensorIteratorBase
1475
1482
  dispatch:
1476
1483
  CPU, CUDA, MPS: ceil_out
1477
- SparseCPU, SparseCUDA: ceil_sparse_out
1484
+ SparseCPU, SparseCUDA, SparseMPS: ceil_sparse_out
1478
1485
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
1479
1486
  tags: pointwise
1480
1487
 
@@ -1887,7 +1894,10 @@
1887
1894
  - func: cudnn_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor, Tensor)
1888
1895
  dispatch:
1889
1896
  CUDA: cudnn_batch_norm
1890
- autogen: cudnn_batch_norm.out
1897
+
1898
+ - func: cudnn_batch_norm.out(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2, Tensor(d!) out3) -> (Tensor(a!), Tensor(b!), Tensor(c!), Tensor(d!))
1899
+ dispatch:
1900
+ CUDA: cudnn_batch_norm_out
1891
1901
 
1892
1902
  # NB: You can only use this if you used cudnn_batch_norm training=True
1893
1903
  - func: cudnn_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace) -> (Tensor, Tensor, Tensor)
@@ -2168,7 +2178,7 @@
2168
2178
  structured: True
2169
2179
  structured_inherits: TensorIteratorBase
2170
2180
  dispatch:
2171
- CPU, CUDA, MPS: div_out
2181
+ CPU, CUDA, MPS, MTIA: div_out
2172
2182
  SparseCPU, SparseCUDA: div_out_sparse_zerodim
2173
2183
  tags: pointwise
2174
2184
 
@@ -2399,7 +2409,7 @@
2399
2409
  MPS: empty_mps
2400
2410
  Meta: empty_meta_symint
2401
2411
  MkldnnCPU: empty_mkldnn
2402
- SparseCPU, SparseCUDA: empty_sparse
2412
+ SparseCPU, SparseCUDA, SparseMPS: empty_sparse
2403
2413
  SparseMeta: empty_sparse_symint
2404
2414
  SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
2405
2415
  SparseCsrMeta: empty_sparse_compressed_symint
@@ -2527,7 +2537,7 @@
2527
2537
  structured_delegate: erf.out
2528
2538
  variants: function, method
2529
2539
  dispatch:
2530
- SparseCPU, SparseCUDA: erf_sparse
2540
+ SparseCPU, SparseCUDA, SparseMPS: erf_sparse
2531
2541
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr
2532
2542
  tags: [core, pointwise]
2533
2543
 
@@ -2536,7 +2546,7 @@
2536
2546
  structured_delegate: erf.out
2537
2547
  variants: function, method
2538
2548
  dispatch:
2539
- SparseCPU, SparseCUDA: erf_sparse_
2549
+ SparseCPU, SparseCUDA, SparseMPS: erf_sparse_
2540
2550
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_
2541
2551
  tags: pointwise
2542
2552
 
@@ -2546,7 +2556,7 @@
2546
2556
  structured_inherits: TensorIteratorBase
2547
2557
  dispatch:
2548
2558
  CPU, CUDA, MPS, MTIA: erf_out
2549
- SparseCPU, SparseCUDA: erf_sparse_out
2559
+ SparseCPU, SparseCUDA, SparseMPS: erf_sparse_out
2550
2560
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_out
2551
2561
  tags: pointwise
2552
2562
 
@@ -2612,7 +2622,7 @@
2612
2622
  structured_delegate: expm1.out
2613
2623
  variants: function, method
2614
2624
  dispatch:
2615
- SparseCPU, SparseCUDA: expm1_sparse
2625
+ SparseCPU, SparseCUDA, SparseMPS: expm1_sparse
2616
2626
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr
2617
2627
  tags: [core, pointwise]
2618
2628
 
@@ -2621,7 +2631,7 @@
2621
2631
  structured_delegate: expm1.out
2622
2632
  variants: function, method
2623
2633
  dispatch:
2624
- SparseCPU, SparseCUDA: expm1_sparse_
2634
+ SparseCPU, SparseCUDA, SparseMPS: expm1_sparse_
2625
2635
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_
2626
2636
  tags: pointwise
2627
2637
 
@@ -2631,7 +2641,7 @@
2631
2641
  structured_inherits: TensorIteratorBase
2632
2642
  dispatch:
2633
2643
  CPU, CUDA, MPS: expm1_out
2634
- SparseCPU, SparseCUDA: expm1_sparse_out
2644
+ SparseCPU, SparseCUDA, SparseMPS: expm1_sparse_out
2635
2645
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_out
2636
2646
  tags: pointwise
2637
2647
 
@@ -2730,7 +2740,7 @@
2730
2740
  structured_delegate: floor.out
2731
2741
  variants: function, method
2732
2742
  dispatch:
2733
- SparseCPU, SparseCUDA: floor_sparse
2743
+ SparseCPU, SparseCUDA, SparseMPS: floor_sparse
2734
2744
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr
2735
2745
  tags: [core, pointwise]
2736
2746
 
@@ -2739,7 +2749,7 @@
2739
2749
  structured_delegate: floor.out
2740
2750
  variants: function, method
2741
2751
  dispatch:
2742
- SparseCPU, SparseCUDA: floor_sparse_
2752
+ SparseCPU, SparseCUDA, SparseMPS: floor_sparse_
2743
2753
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_
2744
2754
  tags: pointwise
2745
2755
 
@@ -2749,7 +2759,7 @@
2749
2759
  structured_inherits: TensorIteratorBase
2750
2760
  dispatch:
2751
2761
  CPU, CUDA, MPS: floor_out
2752
- SparseCPU, SparseCUDA: floor_sparse_out
2762
+ SparseCPU, SparseCUDA, SparseMPS: floor_sparse_out
2753
2763
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
2754
2764
  tags: pointwise
2755
2765
 
@@ -2757,7 +2767,7 @@
2757
2767
  device_check: NoCheck # TensorIterator
2758
2768
  variants: function, method
2759
2769
  dispatch:
2760
- CPU, CUDA, MPS: floor_divide
2770
+ CPU, CUDA, MPS, MTIA: floor_divide
2761
2771
  SparseCPU, SparseCUDA: floor_divide_sparse
2762
2772
 
2763
2773
  - func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
@@ -2791,7 +2801,7 @@
2791
2801
  structured_delegate: frac.out
2792
2802
  variants: function, method
2793
2803
  dispatch:
2794
- SparseCPU, SparseCUDA: frac_sparse
2804
+ SparseCPU, SparseCUDA, SparseMPS: frac_sparse
2795
2805
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr
2796
2806
  tags: pointwise
2797
2807
 
@@ -2800,7 +2810,7 @@
2800
2810
  structured_delegate: frac.out
2801
2811
  variants: function, method
2802
2812
  dispatch:
2803
- SparseCPU, SparseCUDA: frac_sparse_
2813
+ SparseCPU, SparseCUDA, SparseMPS: frac_sparse_
2804
2814
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_
2805
2815
  tags: pointwise
2806
2816
 
@@ -2811,7 +2821,7 @@
2811
2821
  dispatch:
2812
2822
  CPU, CUDA: frac_out
2813
2823
  MPS: frac_out_mps
2814
- SparseCPU, SparseCUDA: frac_sparse_out
2824
+ SparseCPU, SparseCUDA, SparseMPS: frac_sparse_out
2815
2825
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_out
2816
2826
  tags: pointwise
2817
2827
 
@@ -2924,6 +2934,7 @@
2924
2934
  dispatch:
2925
2935
  CPU: grid_sampler_3d_cpu
2926
2936
  CUDA: grid_sampler_3d_cuda
2937
+ MPS: grid_sampler_3d_mps
2927
2938
  autogen: grid_sampler_3d.out
2928
2939
 
2929
2940
  # `grid_sampler_3d_backward` takes in `output_mask` to optimize performance for
@@ -3200,7 +3211,7 @@
3200
3211
  dispatch:
3201
3212
  CPU, CUDA, MPS, MTIA: isnan
3202
3213
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isnan
3203
- SparseCPU, SparseCUDA: isnan_sparse
3214
+ SparseCPU, SparseCUDA, SparseMPS: isnan_sparse
3204
3215
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isnan_sparse_csr
3205
3216
  autogen: isnan.out
3206
3217
  tags: [core, pointwise]
@@ -3281,6 +3292,7 @@
3281
3292
  dispatch:
3282
3293
  CPU: kthvalue_out_cpu
3283
3294
  CUDA: kthvalue_out_cuda
3295
+ MPS: kthvalue_out_mps
3284
3296
 
3285
3297
  - func: kthvalue.dimname(Tensor self, SymInt k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
3286
3298
  variants: function, method
@@ -3314,29 +3326,35 @@
3314
3326
  dispatch:
3315
3327
  CompositeImplicitAutograd: rms_norm_symint
3316
3328
 
3317
- - func: _fused_rms_norm(Tensor input, int normalized_shape_ndim, Tensor weight, float eps) -> Tensor
3329
+ - func: _fused_rms_norm(Tensor input, int[] normalized_shape, Tensor? weight, float? eps) -> (Tensor, Tensor)
3318
3330
  dispatch:
3331
+ CUDA: _fused_rms_norm_cuda
3319
3332
  MPS: _fused_rms_norm_mps
3333
+ CompositeImplicitAutograd: rms_norm_composite
3334
+
3335
+ - func: _fused_rms_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor rstd, Tensor? weight, bool[2] output_mask) -> (Tensor, Tensor)
3336
+ dispatch:
3337
+ CUDA: _fused_rms_norm_backward_cuda
3320
3338
 
3321
3339
  - func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
3322
3340
  variants: function, method
3323
3341
  dispatch:
3324
3342
  CompositeExplicitAutograd: nan_to_num
3325
- SparseCPU, SparseCUDA: nan_to_num_sparse
3343
+ SparseCPU, SparseCUDA, SparseMPS: nan_to_num_sparse
3326
3344
  tags: pointwise
3327
3345
 
3328
3346
  - func: nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!)
3329
3347
  variants: function, method
3330
3348
  dispatch:
3331
3349
  CompositeExplicitAutograd: nan_to_num_
3332
- SparseCPU, SparseCUDA: nan_to_num_sparse_
3350
+ SparseCPU, SparseCUDA, SparseMPS: nan_to_num_sparse_
3333
3351
  tags: pointwise
3334
3352
 
3335
3353
  - func: nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)
3336
3354
  dispatch:
3337
3355
  CPU, CUDA, MTIA: nan_to_num_out
3338
3356
  MPS: nan_to_num_out_mps
3339
- SparseCPU, SparseCUDA: nan_to_num_sparse_out
3357
+ SparseCPU, SparseCUDA, SparseMPS: nan_to_num_sparse_out
3340
3358
  tags: pointwise
3341
3359
 
3342
3360
  - func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
@@ -3432,10 +3450,14 @@
3432
3450
 
3433
3451
  - func: _wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
3434
3452
 
3435
- - func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
3453
+ - func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor? bias) -> Tensor
3454
+
3455
+ - func: fbgemm_linear_fp16_weight_fp32_activation.out(Tensor input, Tensor packed_weight, Tensor? bias, Tensor(a!) output) -> Tensor
3436
3456
 
3437
3457
  - func: fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
3438
3458
 
3459
+ - func: fbgemm_linear_fp16_weight.out(Tensor input, Tensor packed_weight, Tensor bias, Tensor(a!) output) -> Tensor
3460
+
3439
3461
  - func: fbgemm_pack_quantized_matrix(Tensor input) -> Tensor
3440
3462
 
3441
3463
  - func: fbgemm_pack_quantized_matrix.KN(Tensor input, int K, int N) -> Tensor
@@ -3535,7 +3557,7 @@
3535
3557
  structured_delegate: log1p.out
3536
3558
  variants: function, method
3537
3559
  dispatch:
3538
- SparseCPU, SparseCUDA: log1p_sparse
3560
+ SparseCPU, SparseCUDA, SparseMPS: log1p_sparse
3539
3561
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr
3540
3562
  tags: [core, pointwise]
3541
3563
 
@@ -3544,7 +3566,7 @@
3544
3566
  structured_delegate: log1p.out
3545
3567
  variants: function, method
3546
3568
  dispatch:
3547
- SparseCPU, SparseCUDA: log1p_sparse_
3569
+ SparseCPU, SparseCUDA, SparseMPS: log1p_sparse_
3548
3570
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_
3549
3571
  tags: pointwise
3550
3572
 
@@ -3554,7 +3576,7 @@
3554
3576
  structured_inherits: TensorIteratorBase
3555
3577
  dispatch:
3556
3578
  CPU, CUDA, MPS: log1p_out
3557
- SparseCPU, SparseCUDA: log1p_sparse_out
3579
+ SparseCPU, SparseCUDA, SparseMPS: log1p_sparse_out
3558
3580
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_out
3559
3581
  tags: pointwise
3560
3582
 
@@ -3740,11 +3762,13 @@
3740
3762
  dispatch:
3741
3763
  CPU: _logcumsumexp_cpu
3742
3764
  CUDA: _logcumsumexp_cuda
3765
+ MPS: _logcumsumexp_mps
3743
3766
 
3744
3767
  - func: _logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
3745
3768
  dispatch:
3746
3769
  CPU: _logcumsumexp_out_cpu
3747
3770
  CUDA: _logcumsumexp_out_cuda
3771
+ MPS: _logcumsumexp_out_mps
3748
3772
 
3749
3773
  - func: logcumsumexp(Tensor self, int dim) -> Tensor
3750
3774
  variants: function, method
@@ -4172,11 +4196,13 @@
4172
4196
  dispatch:
4173
4197
  CPU: _int_mm_cpu
4174
4198
  CUDA: _int_mm_cuda
4199
+ XPU: _int_mm_xpu
4175
4200
 
4176
4201
  - func: _int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
4177
4202
  dispatch:
4178
4203
  CPU: _int_mm_out_cpu
4179
4204
  CUDA: _int_mm_out_cuda
4205
+ XPU: _int_mm_out_xpu
4180
4206
 
4181
4207
  - func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
4182
4208
  dispatch:
@@ -4213,6 +4239,7 @@
4213
4239
  - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
4214
4240
  dispatch:
4215
4241
  CPU: _weight_int8pack_mm_cpu
4242
+ CUDA: _weight_int8pack_mm_cuda
4216
4243
  MPS: _weight_int8pack_mm_mps
4217
4244
 
4218
4245
  - func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
@@ -4269,7 +4296,7 @@
4269
4296
  structured: True
4270
4297
  structured_inherits: TensorIteratorBase
4271
4298
  dispatch:
4272
- CPU, CUDA, MPS: mul_out
4299
+ CPU, CUDA, MPS, MTIA: mul_out
4273
4300
  SparseCPU: mul_out_sparse_cpu
4274
4301
  SparseCUDA: mul_out_sparse_cuda
4275
4302
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr
@@ -4641,7 +4668,7 @@
4641
4668
  variants: function, method
4642
4669
  dispatch:
4643
4670
  CompositeExplicitAutograd: rad2deg
4644
- SparseCPU, SparseCUDA: rad2deg_sparse
4671
+ SparseCPU, SparseCUDA, SparseMPS: rad2deg_sparse
4645
4672
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr
4646
4673
  tags: pointwise
4647
4674
 
@@ -4649,14 +4676,14 @@
4649
4676
  variants: function, method
4650
4677
  dispatch:
4651
4678
  CompositeExplicitAutograd: rad2deg_
4652
- SparseCPU, SparseCUDA: rad2deg_sparse_
4679
+ SparseCPU, SparseCUDA, SparseMPS: rad2deg_sparse_
4653
4680
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_
4654
4681
  tags: pointwise
4655
4682
 
4656
4683
  - func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4657
4684
  dispatch:
4658
4685
  CompositeExplicitAutograd: rad2deg_out
4659
- SparseCPU, SparseCUDA: rad2deg_sparse_out
4686
+ SparseCPU, SparseCUDA, SparseMPS: rad2deg_sparse_out
4660
4687
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_out
4661
4688
  tags: pointwise
4662
4689
 
@@ -4664,7 +4691,7 @@
4664
4691
  variants: function, method
4665
4692
  dispatch:
4666
4693
  CompositeExplicitAutograd: deg2rad
4667
- SparseCPU, SparseCUDA: deg2rad_sparse
4694
+ SparseCPU, SparseCUDA, SparseMPS: deg2rad_sparse
4668
4695
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr
4669
4696
  tags: pointwise
4670
4697
 
@@ -4672,14 +4699,14 @@
4672
4699
  variants: function, method
4673
4700
  dispatch:
4674
4701
  CompositeExplicitAutograd: deg2rad_
4675
- SparseCPU, SparseCUDA: deg2rad_sparse_
4702
+ SparseCPU, SparseCUDA, SparseMPS: deg2rad_sparse_
4676
4703
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_
4677
4704
  tags: pointwise
4678
4705
 
4679
4706
  - func: deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4680
4707
  dispatch:
4681
4708
  CompositeExplicitAutograd: deg2rad_out
4682
- SparseCPU, SparseCUDA: deg2rad_sparse_out
4709
+ SparseCPU, SparseCUDA, SparseMPS: deg2rad_sparse_out
4683
4710
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_out
4684
4711
  tags: pointwise
4685
4712
 
@@ -4905,7 +4932,7 @@
4905
4932
  structured_delegate: neg.out
4906
4933
  variants: function, method
4907
4934
  dispatch:
4908
- SparseCPU, SparseCUDA: neg_sparse
4935
+ SparseCPU, SparseCUDA, SparseMPS: neg_sparse
4909
4936
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr
4910
4937
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_neg
4911
4938
  tags: [core, pointwise]
@@ -4915,7 +4942,7 @@
4915
4942
  structured_delegate: neg.out
4916
4943
  variants: function, method
4917
4944
  dispatch:
4918
- SparseCPU, SparseCUDA: neg_sparse_
4945
+ SparseCPU, SparseCUDA, SparseMPS: neg_sparse_
4919
4946
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_
4920
4947
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_neg_
4921
4948
  tags: pointwise
@@ -4926,7 +4953,7 @@
4926
4953
  structured_inherits: TensorIteratorBase
4927
4954
  dispatch:
4928
4955
  CPU, CUDA, MPS, MTIA: neg_out
4929
- SparseCPU, SparseCUDA: neg_out_sparse
4956
+ SparseCPU, SparseCUDA, SparseMPS: neg_out_sparse
4930
4957
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_out
4931
4958
  tags: pointwise
4932
4959
  # Alias for neg
@@ -5010,7 +5037,7 @@
5010
5037
  structured_delegate: round.out
5011
5038
  variants: function, method
5012
5039
  dispatch:
5013
- SparseCPU, SparseCUDA: round_sparse
5040
+ SparseCPU, SparseCUDA, SparseMPS: round_sparse
5014
5041
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr
5015
5042
  tags: [core, pointwise]
5016
5043
 
@@ -5019,7 +5046,7 @@
5019
5046
  structured_delegate: round.out
5020
5047
  variants: function, method
5021
5048
  dispatch:
5022
- SparseCPU, SparseCUDA: round_sparse_
5049
+ SparseCPU, SparseCUDA, SparseMPS: round_sparse_
5023
5050
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_
5024
5051
  tags: pointwise
5025
5052
 
@@ -5029,7 +5056,7 @@
5029
5056
  structured_inherits: TensorIteratorBase
5030
5057
  dispatch:
5031
5058
  CPU, CUDA, MPS: round_out
5032
- SparseCPU, SparseCUDA: round_sparse_out
5059
+ SparseCPU, SparseCUDA, SparseMPS: round_sparse_out
5033
5060
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
5034
5061
  tags: pointwise
5035
5062
 
@@ -5065,13 +5092,14 @@
5065
5092
  device_check: NoCheck # TensorIterator
5066
5093
  variants: function, method
5067
5094
  dispatch:
5068
- CPU, CUDA, MTIA: relu
5095
+ CPU, CUDA: relu
5069
5096
  MPS: relu_mps
5097
+ MTIA: relu_mtia
5070
5098
  MkldnnCPU: mkldnn_relu
5071
5099
  QuantizedCPU: relu_quantized_cpu
5072
5100
  QuantizedCUDA: relu_quantized_cuda
5073
5101
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_relu
5074
- SparseCPU, SparseCUDA: relu_sparse
5102
+ SparseCPU, SparseCUDA, SparseMPS: relu_sparse
5075
5103
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr
5076
5104
  tags: [core, pointwise]
5077
5105
 
@@ -5079,13 +5107,14 @@
5079
5107
  device_check: NoCheck # TensorIterator
5080
5108
  variants: function, method
5081
5109
  dispatch:
5082
- CPU, CUDA, MTIA: relu_
5110
+ CPU, CUDA: relu_
5083
5111
  MPS: relu_mps_
5112
+ MTIA: relu_mtia_
5084
5113
  MkldnnCPU: mkldnn_relu_
5085
5114
  QuantizedCPU: relu_quantized_cpu_
5086
5115
  QuantizedCUDA: relu_quantized_cuda_
5087
5116
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_relu_
5088
- SparseCPU, SparseCUDA: relu_sparse_
5117
+ SparseCPU, SparseCUDA, SparseMPS: relu_sparse_
5089
5118
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr_
5090
5119
  autogen: relu.out
5091
5120
  tags: pointwise
@@ -5372,7 +5401,7 @@
5372
5401
  variants: function, method
5373
5402
  dispatch:
5374
5403
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr
5375
- SparseCPU, SparseCUDA: sin_sparse
5404
+ SparseCPU, SparseCUDA, SparseMPS: sin_sparse
5376
5405
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sin
5377
5406
  tags: [core, pointwise]
5378
5407
 
@@ -5382,7 +5411,7 @@
5382
5411
  variants: function, method
5383
5412
  dispatch:
5384
5413
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_
5385
- SparseCPU, SparseCUDA: sin_sparse_
5414
+ SparseCPU, SparseCUDA, SparseMPS: sin_sparse_
5386
5415
  tags: pointwise
5387
5416
 
5388
5417
  - func: sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5392,7 +5421,7 @@
5392
5421
  dispatch:
5393
5422
  CPU, CUDA, MPS, MTIA: sin_out
5394
5423
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_out
5395
- SparseCPU, SparseCUDA: sin_sparse_out
5424
+ SparseCPU, SparseCUDA, SparseMPS: sin_sparse_out
5396
5425
  tags: pointwise
5397
5426
 
5398
5427
  - func: sinc(Tensor self) -> Tensor
@@ -5417,7 +5446,7 @@
5417
5446
  structured_delegate: sinh.out
5418
5447
  variants: function, method
5419
5448
  dispatch:
5420
- SparseCPU, SparseCUDA: sinh_sparse
5449
+ SparseCPU, SparseCUDA, SparseMPS: sinh_sparse
5421
5450
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr
5422
5451
  tags: [core, pointwise]
5423
5452
 
@@ -5426,7 +5455,7 @@
5426
5455
  structured_delegate: sinh.out
5427
5456
  variants: function, method
5428
5457
  dispatch:
5429
- SparseCPU, SparseCUDA: sinh_sparse_
5458
+ SparseCPU, SparseCUDA, SparseMPS: sinh_sparse_
5430
5459
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_
5431
5460
  tags: pointwise
5432
5461
 
@@ -5436,7 +5465,7 @@
5436
5465
  structured_inherits: TensorIteratorBase
5437
5466
  dispatch:
5438
5467
  CPU, CUDA, MPS: sinh_out
5439
- SparseCPU, SparseCUDA: sinh_sparse_out
5468
+ SparseCPU, SparseCUDA, SparseMPS: sinh_sparse_out
5440
5469
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_out
5441
5470
 
5442
5471
  # Returns a copy of this `Variable` that is detached from its autograd graph.
@@ -5484,6 +5513,13 @@
5484
5513
  tags: core
5485
5514
  manual_cpp_binding: True
5486
5515
 
5516
+ - func: sym_is_contiguous(Tensor self, MemoryFormat memory_format=contiguous_format) -> SymBool
5517
+ variants: function
5518
+ device_check: NoCheck
5519
+ device_guard: False
5520
+ tags: core
5521
+ manual_cpp_binding: True
5522
+
5487
5523
  - func: sym_numel(Tensor self) -> SymInt
5488
5524
  variants: function
5489
5525
  device_check: NoCheck
@@ -5857,6 +5893,15 @@
5857
5893
  CPU, CUDA: nansum_out
5858
5894
  MPS: nansum_out_mps
5859
5895
 
5896
+ - func: hash_tensor(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0) -> Tensor
5897
+ variants: function, method
5898
+ structured_delegate: hash_tensor.out
5899
+
5900
+ - func: hash_tensor.out(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0, Tensor(a!) out) -> Tensor(a!)
5901
+ structured: True
5902
+ dispatch:
5903
+ CPU, CUDA: hash_tensor_out
5904
+
5860
5905
  - func: sum_to_size(Tensor self, SymInt[] size) -> Tensor
5861
5906
  variants: method
5862
5907
  device_check: NoCheck
@@ -5870,7 +5915,7 @@
5870
5915
  variants: function, method
5871
5916
  dispatch:
5872
5917
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sqrt
5873
- SparseCPU, SparseCUDA: sqrt_sparse
5918
+ SparseCPU, SparseCUDA, SparseMPS: sqrt_sparse
5874
5919
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr
5875
5920
  tags: [core, pointwise]
5876
5921
 
@@ -5879,7 +5924,7 @@
5879
5924
  structured_delegate: sqrt.out
5880
5925
  variants: function, method
5881
5926
  dispatch:
5882
- SparseCPU, SparseCUDA: sqrt_sparse_
5927
+ SparseCPU, SparseCUDA, SparseMPS: sqrt_sparse_
5883
5928
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_
5884
5929
  tags: pointwise
5885
5930
 
@@ -5889,7 +5934,7 @@
5889
5934
  structured_inherits: TensorIteratorBase
5890
5935
  dispatch:
5891
5936
  CPU, CUDA, MPS, MTIA: sqrt_out
5892
- SparseCPU, SparseCUDA: sqrt_sparse_out
5937
+ SparseCPU, SparseCUDA, SparseMPS: sqrt_sparse_out
5893
5938
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
5894
5939
  tags: pointwise
5895
5940
 
@@ -6027,7 +6072,7 @@
6027
6072
  structured_delegate: tan.out
6028
6073
  variants: function, method
6029
6074
  dispatch:
6030
- SparseCPU, SparseCUDA: tan_sparse
6075
+ SparseCPU, SparseCUDA, SparseMPS: tan_sparse
6031
6076
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr
6032
6077
  tags: [core, pointwise]
6033
6078
 
@@ -6036,7 +6081,7 @@
6036
6081
  structured_delegate: tan.out
6037
6082
  variants: function, method
6038
6083
  dispatch:
6039
- SparseCPU, SparseCUDA: tan_sparse_
6084
+ SparseCPU, SparseCUDA, SparseMPS: tan_sparse_
6040
6085
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_
6041
6086
  tags: pointwise
6042
6087
 
@@ -6046,7 +6091,7 @@
6046
6091
  structured_inherits: TensorIteratorBase
6047
6092
  dispatch:
6048
6093
  CPU, CUDA, MPS: tan_out
6049
- SparseCPU, SparseCUDA: tan_sparse_out
6094
+ SparseCPU, SparseCUDA, SparseMPS: tan_sparse_out
6050
6095
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_out
6051
6096
  tags: pointwise
6052
6097
 
@@ -6057,7 +6102,7 @@
6057
6102
  dispatch:
6058
6103
  QuantizedCPU: tanh_quantized_cpu
6059
6104
  MkldnnCPU: mkldnn_tanh
6060
- SparseCPU, SparseCUDA: tanh_sparse
6105
+ SparseCPU, SparseCUDA, SparseMPS: tanh_sparse
6061
6106
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr
6062
6107
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_tanh
6063
6108
  tags: [core, pointwise]
@@ -6068,7 +6113,7 @@
6068
6113
  variants: function, method
6069
6114
  dispatch:
6070
6115
  MkldnnCPU: mkldnn_tanh_
6071
- SparseCPU, SparseCUDA: tanh_sparse_
6116
+ SparseCPU, SparseCUDA, SparseMPS: tanh_sparse_
6072
6117
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_
6073
6118
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_tanh_
6074
6119
  tags: pointwise
@@ -6079,7 +6124,7 @@
6079
6124
  structured_inherits: TensorIteratorBase
6080
6125
  dispatch:
6081
6126
  CPU, CUDA, MPS, MTIA: tanh_out
6082
- SparseCPU, SparseCUDA: tanh_sparse_out
6127
+ SparseCPU, SparseCUDA, SparseMPS: tanh_sparse_out
6083
6128
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
6084
6129
  tags: pointwise
6085
6130
 
@@ -6351,8 +6396,8 @@
6351
6396
  device_check: NoCheck # TensorIterator
6352
6397
  variants: function, method
6353
6398
  dispatch:
6354
- SparseCPU, SparseCUDA: trunc_sparse
6355
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr
6399
+ SparseCPU, SparseCUDA, SparseMPS: trunc_sparse
6400
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: trunc_sparse_csr
6356
6401
  tags: [core, pointwise]
6357
6402
 
6358
6403
  - func: trunc_(Tensor(a!) self) -> Tensor(a!)
@@ -6360,8 +6405,8 @@
6360
6405
  device_check: NoCheck # TensorIterator
6361
6406
  variants: function, method
6362
6407
  dispatch:
6363
- SparseCPU, SparseCUDA: trunc_sparse_
6364
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_
6408
+ SparseCPU, SparseCUDA, SparseMPS: trunc_sparse_
6409
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: trunc_sparse_csr_
6365
6410
  tags: pointwise
6366
6411
 
6367
6412
  - func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -6370,8 +6415,8 @@
6370
6415
  device_check: NoCheck # TensorIterator
6371
6416
  dispatch:
6372
6417
  CPU, CUDA, MPS: trunc_out
6373
- SparseCPU, SparseCUDA: trunc_sparse_out
6374
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
6418
+ SparseCPU, SparseCUDA, SparseMPS: trunc_sparse_out
6419
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: trunc_sparse_csr_out
6375
6420
  tags: pointwise
6376
6421
  # Alias for trunc
6377
6422
 
@@ -6881,7 +6926,7 @@
6881
6926
  variants: function, method
6882
6927
  dispatch:
6883
6928
  CompositeExplicitAutograd: clone
6884
- SparseCPU, SparseCUDA: clone_sparse
6929
+ SparseCPU, SparseCUDA, SparseMPS: clone_sparse
6885
6930
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: clone_sparse_compressed
6886
6931
  MkldnnCPU: mkldnn_clone
6887
6932
  QuantizedCPU, QuantizedCUDA: quantized_clone
@@ -6916,7 +6961,7 @@
6916
6961
  CPU, CUDA: zero_
6917
6962
  MPS: zero_mps_
6918
6963
  Meta: zero_meta_
6919
- SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
6964
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: zero_sparse_
6920
6965
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
6921
6966
  MkldnnCPU: mkldnn_zero_
6922
6967
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: zero_nested_
@@ -6929,6 +6974,7 @@
6929
6974
  dispatch:
6930
6975
  CPU, CUDA: sub_out
6931
6976
  MPS: sub_out_mps
6977
+ MTIA: sub_out_mtia
6932
6978
  SparseCPU, SparseCUDA: sub_out_sparse
6933
6979
  tags: pointwise
6934
6980
 
@@ -6986,7 +7032,7 @@
6986
7032
  device_check: NoCheck # TensorIterator
6987
7033
  variants: function
6988
7034
  dispatch:
6989
- CPU, CUDA, MPS: rsub
7035
+ CPU, CUDA, MPS, MTIA: rsub
6990
7036
  autogen: rsub.Tensor_out
6991
7037
 
6992
7038
  - func: heaviside.out(Tensor self, Tensor values, *, Tensor(a!) out) -> Tensor(a!)
@@ -7054,6 +7100,7 @@
7054
7100
  CUDA: addmm_out_cuda
7055
7101
  MPS: addmm_out_mps
7056
7102
  XPU: addmm_out_xpu
7103
+ MTIA: addmm_out_mtia
7057
7104
  SparseCPU: addmm_out_sparse_dense_cpu
7058
7105
  SparseCUDA: addmm_out_sparse_dense_cuda
7059
7106
  SparseCsrCPU: addmm_out_sparse_compressed_cpu
@@ -7101,22 +7148,26 @@
7101
7148
  dispatch:
7102
7149
  CPU: _scaled_mm_cpu
7103
7150
  CUDA: _scaled_mm_cuda
7151
+ tags: needs_exact_strides
7104
7152
 
7105
7153
  - func: _scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
7106
7154
  variants: function
7107
7155
  dispatch:
7108
7156
  CPU: _scaled_mm_out_cpu
7109
7157
  CUDA: _scaled_mm_out_cuda
7158
+ tags: needs_exact_strides
7110
7159
 
7111
7160
 
7112
7161
  - func: _scaled_grouped_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? offs=None, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
7113
7162
  variants: function
7114
7163
  dispatch:
7115
7164
  CUDA: _scaled_grouped_mm_cuda
7165
+ tags: needs_exact_strides
7116
7166
 
7117
7167
  - func: _grouped_mm(Tensor self, Tensor mat2, Tensor? offs=None, Tensor? bias=None, ScalarType? out_dtype=None) -> Tensor
7118
7168
  variants: function
7119
7169
  dispatch:
7170
+ CompositeExplicitAutograd: _grouped_mm
7120
7171
  CUDA: _grouped_mm_cuda
7121
7172
 
7122
7173
  # NOTE [ Sparse: autograd and API ]
@@ -7283,26 +7334,26 @@
7283
7334
 
7284
7335
  - func: _sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
7285
7336
  dispatch:
7286
- SparseCPU, SparseCUDA, SparseMeta, Meta: new_with_dims_sparse
7337
+ SparseCPU, SparseCUDA, SparseMeta, SparseMPS, Meta: new_with_dims_sparse
7287
7338
  autogen: _sparse_coo_tensor_with_dims.out
7288
7339
 
7289
7340
  - func: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor
7290
7341
  dispatch:
7291
- SparseCPU, SparseCUDA, SparseMeta, Meta: new_with_dims_and_tensor_sparse_symint
7342
+ SparseCPU, SparseCUDA, SparseMeta, SparseMPS, Meta: new_with_dims_and_tensor_sparse_symint
7292
7343
  autogen: _sparse_coo_tensor_with_dims_and_tensors.out
7293
7344
 
7294
7345
  - func: sparse_resize_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)
7295
7346
  use_const_ref_for_mutable_tensors: True
7296
7347
  variants: method
7297
7348
  dispatch:
7298
- SparseCPU, SparseCUDA, SparseMeta: sparse_resize_
7349
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: sparse_resize_
7299
7350
  autogen: sparse_resize, sparse_resize.out
7300
7351
 
7301
7352
  - func: sparse_resize_and_clear_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)
7302
7353
  use_const_ref_for_mutable_tensors: True
7303
7354
  variants: method
7304
7355
  dispatch:
7305
- SparseCPU, SparseCUDA, SparseMeta: sparse_resize_and_clear_
7356
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: sparse_resize_and_clear_
7306
7357
  autogen: sparse_resize_and_clear, sparse_resize_and_clear.out
7307
7358
 
7308
7359
  - func: sparse_mask(Tensor self, Tensor mask) -> Tensor
@@ -7328,8 +7379,8 @@
7328
7379
  - func: _to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor
7329
7380
  variants: method
7330
7381
  dispatch:
7331
- SparseCPU, SparseCUDA: sparse_to_dense
7332
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_dense
7382
+ SparseCPU, SparseCUDA, SparseMPS: sparse_to_dense
7383
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: sparse_compressed_to_dense
7333
7384
  MkldnnCPU: mkldnn_to_dense
7334
7385
  autogen: _to_dense.out
7335
7386
 
@@ -7338,8 +7389,8 @@
7338
7389
  - func: sparse_dim(Tensor self) -> int
7339
7390
  variants: method
7340
7391
  dispatch:
7341
- SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
7342
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
7392
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: sparse_dim_sparse
7393
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: sparse_dim_sparse_csr
7343
7394
  CompositeExplicitAutograd: sparse_dim_default
7344
7395
  device_check: NoCheck
7345
7396
  device_guard: False
@@ -7355,8 +7406,8 @@
7355
7406
  - func: dense_dim(Tensor self) -> int
7356
7407
  variants: method
7357
7408
  dispatch:
7358
- SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
7359
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
7409
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: dense_dim_sparse
7410
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: dense_dim_sparse_csr
7360
7411
  CompositeExplicitAutograd: dense_dim_default
7361
7412
  device_check: NoCheck
7362
7413
  device_guard: False
@@ -7372,8 +7423,8 @@
7372
7423
  - func: _nnz(Tensor self) -> int
7373
7424
  variants: method
7374
7425
  dispatch:
7375
- SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
7376
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
7426
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _nnz_sparse
7427
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMPS, SparseCsrMeta: _nnz_sparse_csr
7377
7428
  device_check: NoCheck
7378
7429
  device_guard: False
7379
7430
 
@@ -7389,12 +7440,13 @@
7389
7440
  dispatch:
7390
7441
  SparseCPU: _coalesce_sparse_cpu
7391
7442
  SparseCUDA: _coalesce_sparse_cuda
7443
+ SparseMPS: _coalesce_sparse_mps
7392
7444
  autogen: _coalesce.out
7393
7445
 
7394
7446
  - func: is_coalesced(Tensor self) -> bool
7395
7447
  variants: method
7396
7448
  dispatch:
7397
- SparseCPU, SparseCUDA, SparseMeta: is_coalesced_sparse
7449
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: is_coalesced_sparse
7398
7450
  CompositeExplicitAutograd: is_coalesced_default
7399
7451
  device_check: NoCheck
7400
7452
  device_guard: False
@@ -7402,14 +7454,14 @@
7402
7454
  - func: _indices(Tensor(a) self) -> Tensor(a)
7403
7455
  variants: method
7404
7456
  dispatch:
7405
- SparseCPU, SparseCUDA, SparseMeta: _indices_sparse
7457
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _indices_sparse
7406
7458
  device_check: NoCheck
7407
7459
  device_guard: False
7408
7460
 
7409
7461
  - func: _values(Tensor(a) self) -> Tensor(a)
7410
7462
  variants: method
7411
7463
  dispatch:
7412
- SparseCPU, SparseCUDA, SparseMeta: _values_sparse
7464
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _values_sparse
7413
7465
  device_check: NoCheck
7414
7466
  device_guard: False
7415
7467
 
@@ -7419,7 +7471,7 @@
7419
7471
  - func: _coalesced_(Tensor(a!) self, bool coalesced) -> Tensor(a!)
7420
7472
  variants: method
7421
7473
  dispatch:
7422
- SparseCPU, SparseCUDA, SparseMeta: _coalesced_sparse_
7474
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: _coalesced_sparse_
7423
7475
  device_check: NoCheck
7424
7476
  device_guard: False
7425
7477
  autogen: _coalesced, _coalesced.out
@@ -7427,7 +7479,7 @@
7427
7479
  - func: indices(Tensor(a) self) -> Tensor(a)
7428
7480
  variants: method
7429
7481
  dispatch:
7430
- SparseCPU, SparseCUDA, SparseMeta: indices_sparse
7482
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: indices_sparse
7431
7483
  CompositeExplicitAutograd: indices_default
7432
7484
  device_check: NoCheck
7433
7485
  device_guard: False
@@ -7435,7 +7487,7 @@
7435
7487
  - func: values(Tensor(a) self) -> Tensor(a)
7436
7488
  variants: method
7437
7489
  dispatch:
7438
- SparseCPU, SparseCUDA, SparseMeta: values_sparse
7490
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: values_sparse
7439
7491
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
7440
7492
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: values_nested
7441
7493
  CompositeExplicitAutograd: values_default
@@ -7488,7 +7540,7 @@
7488
7540
  device_check: NoCheck # Allows copy into different device
7489
7541
  variants: function
7490
7542
  dispatch:
7491
- SparseCPU, SparseCUDA, SparseMeta: copy_sparse_
7543
+ SparseCPU, SparseCUDA, SparseMPS, SparseMeta: copy_sparse_
7492
7544
  autogen: copy_sparse_to_sparse, copy_sparse_to_sparse.out
7493
7545
 
7494
7546
  # By adding the AutogradNestedTensor this makes this function CompositeImplicit-like for nested tensors
@@ -7508,9 +7560,9 @@
7508
7560
  - func: _to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
7509
7561
  variants: method
7510
7562
  dispatch:
7511
- CPU, CUDA: dense_to_sparse
7512
- SparseCPU, SparseCUDA: sparse_coo_to_sparse
7513
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7563
+ CPU, CUDA, MPS: dense_to_sparse
7564
+ SparseCPU, SparseCUDA, SparseMPS: sparse_coo_to_sparse
7565
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta, SparseCsrMPS: sparse_compressed_to_sparse
7514
7566
  autogen: _to_sparse.sparse_dim_out
7515
7567
 
7516
7568
  - func: to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
@@ -7520,8 +7572,8 @@
7520
7572
  - func: _to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
7521
7573
  variants: method
7522
7574
  dispatch:
7523
- CPU, CUDA: dense_to_sparse
7524
- SparseCPU, SparseCUDA: sparse_coo_to_sparse
7575
+ CPU, CUDA, MPS: dense_to_sparse
7576
+ SparseCPU, SparseCUDA, SparseMPS: sparse_coo_to_sparse
7525
7577
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7526
7578
  autogen: _to_sparse.out
7527
7579
 
@@ -8894,7 +8946,7 @@
8894
8946
  structured_inherits: TensorIteratorBase
8895
8947
  device_check: NoCheck # TensorIterator
8896
8948
  dispatch:
8897
- CPU, CUDA: ne_Scalar_out
8949
+ CPU, CUDA, MTIA: ne_Scalar_out
8898
8950
  MPS: ne_scalar_out_mps
8899
8951
  QuantizedCPU: ne_out_quantized_cpu
8900
8952
  tags: pointwise
@@ -8912,7 +8964,7 @@
8912
8964
  structured_inherits: TensorIteratorBase
8913
8965
  device_check: NoCheck # TensorIterator
8914
8966
  dispatch:
8915
- CPU, CUDA: ne_Tensor_out
8967
+ CPU, CUDA, MTIA: ne_Tensor_out
8916
8968
  MPS: ne_tensor_out_mps
8917
8969
  QuantizedCPU: ne_out_quantized_cpu
8918
8970
  tags: pointwise
@@ -8957,7 +9009,7 @@
8957
9009
  structured_inherits: TensorIteratorBase
8958
9010
  device_check: NoCheck # TensorIterator
8959
9011
  dispatch:
8960
- CPU, CUDA: eq_Scalar_out
9012
+ CPU, CUDA, MTIA: eq_Scalar_out
8961
9013
  MPS: eq_scalar_out_mps
8962
9014
  QuantizedCPU: eq_out_quantized_cpu
8963
9015
  tags: pointwise
@@ -8976,7 +9028,7 @@
8976
9028
  structured_inherits: TensorIteratorBase
8977
9029
  device_check: NoCheck # TensorIterator
8978
9030
  dispatch:
8979
- CPU, CUDA: eq_Tensor_out
9031
+ CPU, CUDA, MTIA: eq_Tensor_out
8980
9032
  MPS: eq_tensor_out_mps
8981
9033
  QuantizedCPU: eq_out_quantized_cpu
8982
9034
  tags: pointwise
@@ -8995,7 +9047,7 @@
8995
9047
  structured_inherits: TensorIteratorBase
8996
9048
  device_check: NoCheck # TensorIterator
8997
9049
  dispatch:
8998
- CPU, CUDA: ge_Scalar_out
9050
+ CPU, CUDA, MTIA: ge_Scalar_out
8999
9051
  MPS: ge_scalar_out_mps
9000
9052
  QuantizedCPU: ge_out_quantized_cpu
9001
9053
  tags: pointwise
@@ -9014,7 +9066,7 @@
9014
9066
  structured_inherits: TensorIteratorBase
9015
9067
  device_check: NoCheck # TensorIterator
9016
9068
  dispatch:
9017
- CPU, CUDA: ge_Tensor_out
9069
+ CPU, CUDA, MTIA: ge_Tensor_out
9018
9070
  MPS: ge_tensor_out_mps
9019
9071
  QuantizedCPU: ge_out_quantized_cpu
9020
9072
  tags: pointwise
@@ -9059,7 +9111,7 @@
9059
9111
  structured_inherits: TensorIteratorBase
9060
9112
  device_check: NoCheck # TensorIterator
9061
9113
  dispatch:
9062
- CPU, CUDA: le_Scalar_out
9114
+ CPU, CUDA, MTIA: le_Scalar_out
9063
9115
  MPS: le_scalar_out_mps
9064
9116
  QuantizedCPU: le_out_quantized_cpu
9065
9117
  tags: pointwise
@@ -9077,7 +9129,7 @@
9077
9129
  structured_inherits: TensorIteratorBase
9078
9130
  device_check: NoCheck # TensorIterator
9079
9131
  dispatch:
9080
- CPU, CUDA: le_Tensor_out
9132
+ CPU, CUDA, MTIA: le_Tensor_out
9081
9133
  MPS: le_tensor_out_mps
9082
9134
  QuantizedCPU: le_out_quantized_cpu
9083
9135
  tags: pointwise
@@ -9122,7 +9174,7 @@
9122
9174
  structured_inherits: TensorIteratorBase
9123
9175
  device_check: NoCheck # TensorIterator
9124
9176
  dispatch:
9125
- CPU, CUDA: gt_Scalar_out
9177
+ CPU, CUDA,MTIA: gt_Scalar_out
9126
9178
  MPS: gt_scalar_out_mps
9127
9179
  QuantizedCPU: gt_out_quantized_cpu
9128
9180
  tags: pointwise
@@ -9141,7 +9193,7 @@
9141
9193
  structured_inherits: TensorIteratorBase
9142
9194
  device_check: NoCheck # TensorIterator
9143
9195
  dispatch:
9144
- CPU, CUDA: gt_Tensor_out
9196
+ CPU, CUDA, MTIA: gt_Tensor_out
9145
9197
  MPS: gt_tensor_out_mps
9146
9198
  QuantizedCPU: gt_out_quantized_cpu
9147
9199
  tags: pointwise
@@ -9369,7 +9421,7 @@
9369
9421
  structured_inherits: TensorIteratorBase
9370
9422
  device_check: NoCheck # TensorIterator
9371
9423
  dispatch:
9372
- CPU, CUDA: addcmul_out
9424
+ CPU, CUDA, MTIA: addcmul_out
9373
9425
  MPS: addcmul_out_mps
9374
9426
  tags: pointwise
9375
9427
 
@@ -9390,7 +9442,7 @@
9390
9442
  structured_inherits: TensorIteratorBase
9391
9443
  device_check: NoCheck # TensorIterator
9392
9444
  dispatch:
9393
- CPU, CUDA: addcdiv_out
9445
+ CPU, CUDA, MTIA: addcdiv_out
9394
9446
  MPS: addcdiv_out_mps
9395
9447
  tags: pointwise
9396
9448
 
@@ -9679,7 +9731,7 @@
9679
9731
  structured_delegate: sign.out
9680
9732
  variants: function, method
9681
9733
  dispatch:
9682
- SparseCPU, SparseCUDA: sign_sparse
9734
+ SparseCPU, SparseCUDA, SparseMPS: sign_sparse
9683
9735
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr
9684
9736
  tags: [core, pointwise]
9685
9737
 
@@ -9688,7 +9740,7 @@
9688
9740
  structured_delegate: sign.out
9689
9741
  variants: method
9690
9742
  dispatch:
9691
- SparseCPU, SparseCUDA: sign_sparse_
9743
+ SparseCPU, SparseCUDA, SparseMPS: sign_sparse_
9692
9744
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_
9693
9745
  tags: pointwise
9694
9746
 
@@ -9699,7 +9751,7 @@
9699
9751
  dispatch:
9700
9752
  CPU, CUDA: sign_out
9701
9753
  MPS: sign_out_mps
9702
- SparseCPU, SparseCUDA: sign_sparse_out
9754
+ SparseCPU, SparseCUDA, SparseMPS: sign_sparse_out
9703
9755
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_out
9704
9756
  tags: pointwise
9705
9757
 
@@ -9707,7 +9759,7 @@
9707
9759
  variants: function, method
9708
9760
  structured_delegate: signbit.out
9709
9761
  dispatch:
9710
- SparseCPU, SparseCUDA: signbit_sparse
9762
+ SparseCPU, SparseCUDA, SparseMPS: signbit_sparse
9711
9763
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr
9712
9764
  tags: pointwise
9713
9765
 
@@ -9718,7 +9770,7 @@
9718
9770
  CPU: signbit_out
9719
9771
  CUDA: signbit_out
9720
9772
  MPS: signbit_out_mps
9721
- SparseCPU, SparseCUDA: signbit_sparse_out
9773
+ SparseCPU, SparseCUDA, SparseMPS: signbit_sparse_out
9722
9774
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr_out
9723
9775
  tags: pointwise
9724
9776
 
@@ -9864,7 +9916,7 @@
9864
9916
  structured: True
9865
9917
  structured_inherits: TensorIteratorBase
9866
9918
  dispatch:
9867
- CPU, CUDA, MPS: fmod_out
9919
+ CPU, CUDA, MPS, MTIA: fmod_out
9868
9920
  tags: pointwise
9869
9921
 
9870
9922
  - func: fmod.Tensor(Tensor self, Tensor other) -> Tensor
@@ -9901,7 +9953,7 @@
9901
9953
  structured: True
9902
9954
  structured_inherits: TensorIteratorBase
9903
9955
  dispatch:
9904
- CPU, CUDA: igamma_out
9956
+ CPU, CUDA, MPS: igamma_out
9905
9957
  tags: pointwise
9906
9958
 
9907
9959
  - func: igamma(Tensor self, Tensor other) -> Tensor
@@ -9918,7 +9970,7 @@
9918
9970
  structured: True
9919
9971
  structured_inherits: TensorIteratorBase
9920
9972
  dispatch:
9921
- CPU, CUDA: igammac_out
9973
+ CPU, CUDA, MPS: igammac_out
9922
9974
  tags: pointwise
9923
9975
 
9924
9976
  - func: igammac(Tensor self, Tensor other) -> Tensor
@@ -10464,6 +10516,7 @@
10464
10516
  dispatch:
10465
10517
  CompositeExplicitAutograd: foreach_tensor_add_scalar_kernel_slow_
10466
10518
  CUDA: foreach_tensor_add_scalar_kernel_cuda_
10519
+ MTIA: foreach_tensor_add_scalar_kernel_mtia_
10467
10520
  autogen: _foreach_add.Scalar_out
10468
10521
 
10469
10522
  - func: _foreach_add.List(Tensor[] self, Tensor[] other, *, Scalar alpha=1) -> Tensor[]
@@ -10472,6 +10525,7 @@
10472
10525
  dispatch:
10473
10526
  CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow
10474
10527
  CUDA: foreach_tensor_add_list_kernel_cuda
10528
+ MTIA: foreach_tensor_add_list_kernel_mtia
10475
10529
 
10476
10530
  - func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
10477
10531
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10479,6 +10533,7 @@
10479
10533
  dispatch:
10480
10534
  CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow_
10481
10535
  CUDA: foreach_tensor_add_list_kernel_cuda_
10536
+ MTIA: foreach_tensor_add_list_kernel_mtia_
10482
10537
  autogen: _foreach_add.List_out
10483
10538
 
10484
10539
  - func: _foreach_add.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
@@ -10509,6 +10564,7 @@
10509
10564
  dispatch:
10510
10565
  CompositeExplicitAutograd: foreach_tensor_add_tensor_kernel_slow_
10511
10566
  CUDA: foreach_tensor_add_tensor_kernel_cuda_
10567
+ MTIA: foreach_tensor_add_tensor_kernel_mtia_
10512
10568
  autogen: _foreach_add.Tensor_out
10513
10569
 
10514
10570
  - func: _foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
@@ -10569,6 +10625,7 @@
10569
10625
  dispatch:
10570
10626
  CompositeExplicitAutograd: foreach_tensor_mul_scalar_kernel_slow_
10571
10627
  CUDA: foreach_tensor_mul_scalar_kernel_cuda_
10628
+ MTIA: foreach_tensor_mul_scalar_kernel_mtia_
10572
10629
  autogen: _foreach_mul.Scalar_out
10573
10630
 
10574
10631
  - func: _foreach_mul.List(Tensor[] self, Tensor[] other) -> Tensor[]
@@ -10577,6 +10634,7 @@
10577
10634
  dispatch:
10578
10635
  CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow
10579
10636
  CUDA: foreach_tensor_mul_list_kernel_cuda
10637
+ MTIA: foreach_tensor_mul_list_kernel_mtia
10580
10638
 
10581
10639
  - func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
10582
10640
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10584,6 +10642,7 @@
10584
10642
  dispatch:
10585
10643
  CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow_
10586
10644
  CUDA: foreach_tensor_mul_list_kernel_cuda_
10645
+ MTIA: foreach_tensor_mul_list_kernel_mtia_
10587
10646
  autogen: _foreach_mul.List_out
10588
10647
 
10589
10648
  - func: _foreach_mul.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
@@ -10607,6 +10666,7 @@
10607
10666
  dispatch:
10608
10667
  CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow
10609
10668
  CUDA: foreach_tensor_mul_tensor_kernel_cuda
10669
+ MTIA: foreach_tensor_mul_tensor_kernel_mtia
10610
10670
 
10611
10671
  - func: _foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
10612
10672
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10614,6 +10674,7 @@
10614
10674
  dispatch:
10615
10675
  CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow_
10616
10676
  CUDA: foreach_tensor_mul_tensor_kernel_cuda_
10677
+ MTIA: foreach_tensor_mul_tensor_kernel_mtia_
10617
10678
  autogen: _foreach_mul.Tensor_out
10618
10679
 
10619
10680
  - func: _foreach_div.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
@@ -10910,6 +10971,7 @@
10910
10971
  dispatch:
10911
10972
  CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow
10912
10973
  CUDA: foreach_tensor_addcmul_scalar_cuda
10974
+ MTIA: foreach_tensor_addcmul_scalar_mtia
10913
10975
 
10914
10976
  - func: _foreach_addcmul.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
10915
10977
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10931,6 +10993,7 @@
10931
10993
  dispatch:
10932
10994
  CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow_
10933
10995
  CUDA: foreach_tensor_addcmul_scalar_cuda_
10996
+ MTIA: foreach_tensor_addcmul_scalar_mtia_
10934
10997
  autogen: _foreach_addcmul.Scalar_out
10935
10998
 
10936
10999
  - func: _foreach_addcmul_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
@@ -10955,6 +11018,7 @@
10955
11018
  dispatch:
10956
11019
  CompositeExplicitAutograd: foreach_tensor_abs_slow
10957
11020
  CUDA: foreach_tensor_abs_cuda
11021
+ MTIA: foreach_tensor_abs_mtia
10958
11022
 
10959
11023
  - func: _foreach_abs_(Tensor(a!)[] self) -> ()
10960
11024
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
@@ -10962,6 +11026,7 @@
10962
11026
  dispatch:
10963
11027
  CompositeExplicitAutograd: foreach_tensor_abs_slow_
10964
11028
  CUDA: foreach_tensor_abs_cuda_
11029
+ MTIA: foreach_tensor_abs_mtia_
10965
11030
  autogen: _foreach_abs.out
10966
11031
 
10967
11032
  - func: _foreach_acos(Tensor[] self) -> Tensor[]
@@ -11296,6 +11361,7 @@
11296
11361
  dispatch:
11297
11362
  CompositeExplicitAutograd: foreach_tensor_norm_slow
11298
11363
  CUDA: foreach_tensor_norm_cuda
11364
+ MTIA: foreach_tensor_norm_mtia
11299
11365
  autogen: _foreach_norm.Scalar_out
11300
11366
 
11301
11367
  - func: _foreach_pow.List(Tensor[] self, Tensor[] exponent) -> Tensor[]
@@ -11468,6 +11534,7 @@
11468
11534
  dispatch:
11469
11535
  CompositeExplicitAutograd: foreach_tensor_sqrt_slow_
11470
11536
  CUDA: foreach_tensor_sqrt_cuda_
11537
+ MTIA: foreach_tensor_sqrt_mtia_
11471
11538
  autogen: _foreach_sqrt.out
11472
11539
 
11473
11540
  - func: _foreach_tan(Tensor[] self) -> Tensor[]
@@ -11529,6 +11596,7 @@
11529
11596
  dispatch:
11530
11597
  CompositeExplicitAutograd: foreach_tensor_copy_list_kernel_slow_
11531
11598
  CUDA: foreach_tensor_copy_list_kernel_cuda_
11599
+ MTIA: foreach_tensor_copy_list_kernel_mtia_
11532
11600
  autogen: _foreach_copy.out
11533
11601
 
11534
11602
  - func: _foreach_copy(Tensor[] self, Tensor[] src, bool non_blocking=False) -> Tensor[] self_out
@@ -11536,6 +11604,7 @@
11536
11604
  variants: function
11537
11605
  dispatch:
11538
11606
  CompositeExplicitAutograd: _foreach_copy
11607
+ MTIA: foreach_tensor_copy_list_kernel_mtia
11539
11608
 
11540
11609
  - func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
11541
11610
  dispatch:
@@ -12311,6 +12380,7 @@
12311
12380
  dispatch:
12312
12381
  CPU: avg_pool3d_out_cpu
12313
12382
  CUDA: avg_pool3d_out_cuda
12383
+ MPS: avg_pool3d_out_mps
12314
12384
  MkldnnCPU: mkldnn_avg_pool3d_out
12315
12385
 
12316
12386
  - func: avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor
@@ -12327,6 +12397,7 @@
12327
12397
  dispatch:
12328
12398
  CPU: avg_pool3d_backward_out_cpu
12329
12399
  CUDA: avg_pool3d_backward_out_cuda
12400
+ MPS: avg_pool3d_backward_out_mps
12330
12401
  MkldnnCPU: mkldnn_avg_pool3d_backward_out
12331
12402
 
12332
12403
  - func: avg_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor
@@ -12422,6 +12493,7 @@
12422
12493
  dispatch:
12423
12494
  CPU: max_pool3d_with_indices_out_cpu
12424
12495
  CUDA: max_pool3d_with_indices_out_cuda
12496
+ MPS: max_pool3d_with_indices_out_mps
12425
12497
 
12426
12498
  # Return: (Tensor output, Tensor indices)
12427
12499
  - func: max_pool3d_with_indices(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
@@ -12429,6 +12501,7 @@
12429
12501
  dispatch:
12430
12502
  CPU: max_pool3d_with_indices_cpu
12431
12503
  CUDA: max_pool3d_with_indices_cuda
12504
+ MPS: max_pool3d_with_indices_mps
12432
12505
  tags: core
12433
12506
 
12434
12507
  - func: max_pool3d_with_indices_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
@@ -12436,36 +12509,42 @@
12436
12509
  dispatch:
12437
12510
  CPU: max_pool3d_with_indices_backward_out_cpu
12438
12511
  CUDA: max_pool3d_with_indices_backward_out_cuda
12512
+ MPS: max_pool3d_with_indices_backward_out_mps
12439
12513
 
12440
12514
  - func: max_pool3d_with_indices_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices) -> Tensor
12441
12515
  python_module: nn
12442
12516
  dispatch:
12443
12517
  CPU: max_pool3d_with_indices_backward_cpu
12444
12518
  CUDA: max_pool3d_with_indices_backward_cuda
12519
+ MPS: max_pool3d_with_indices_backward_mps
12445
12520
 
12446
12521
  - func: max_unpool2d.out(Tensor self, Tensor indices, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)
12447
12522
  python_module: nn
12448
12523
  dispatch:
12449
12524
  CPU: max_unpooling2d_forward_out_cpu
12450
12525
  CUDA: max_unpooling2d_forward_out_cuda
12526
+ MPS: max_unpooling2d_forward_out_mps
12451
12527
 
12452
12528
  - func: max_unpool2d(Tensor self, Tensor indices, SymInt[2] output_size) -> Tensor
12453
12529
  python_module: nn
12454
12530
  dispatch:
12455
12531
  CPU: max_unpooling2d_forward_cpu
12456
12532
  CUDA: max_unpooling2d_forward_cuda
12533
+ MPS: max_unpooling2d_forward_mps
12457
12534
 
12458
12535
  - func: max_unpool3d.out(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) out) -> Tensor(a!)
12459
12536
  python_module: nn
12460
12537
  dispatch:
12461
12538
  CPU: max_unpooling3d_forward_out_cpu
12462
12539
  CUDA: max_unpooling3d_forward_out_cuda
12540
+ MPS: max_unpooling3d_forward_out_mps
12463
12541
 
12464
12542
  - func: max_unpool3d(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding) -> Tensor
12465
12543
  python_module: nn
12466
12544
  dispatch:
12467
12545
  CPU: max_unpooling3d_forward_cpu
12468
12546
  CUDA: max_unpooling3d_forward_cuda
12547
+ MPS: max_unpooling3d_forward_mps
12469
12548
 
12470
12549
  - func: reflection_pad1d.out(Tensor self, SymInt[2] padding, *, Tensor(a!) out) -> Tensor(a!)
12471
12550
  python_module: nn
@@ -13195,7 +13274,7 @@
13195
13274
  dispatch:
13196
13275
  CompositeExplicitAutograd: isinf
13197
13276
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isinf
13198
- SparseCPU, SparseCUDA: isinf_sparse
13277
+ SparseCPU, SparseCUDA, SparseMPS: isinf_sparse
13199
13278
  SparseMeta: isinf_sparse_meta
13200
13279
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isinf_sparse_csr
13201
13280
  autogen: isinf.out
@@ -13211,7 +13290,7 @@
13211
13290
  structured_delegate: isposinf.out
13212
13291
  dispatch:
13213
13292
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isposinf
13214
- SparseCPU, SparseCUDA: isposinf_sparse
13293
+ SparseCPU, SparseCUDA, SparseMPS: isposinf_sparse
13215
13294
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr
13216
13295
  tags: pointwise
13217
13296
 
@@ -13220,7 +13299,7 @@
13220
13299
  structured_inherits: TensorIteratorBase
13221
13300
  dispatch:
13222
13301
  CPU, CUDA, MPS: isposinf_out
13223
- SparseCPU, SparseCUDA: isposinf_sparse_out
13302
+ SparseCPU, SparseCUDA, SparseMPS: isposinf_sparse_out
13224
13303
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr_out
13225
13304
  tags: pointwise
13226
13305
 
@@ -13229,7 +13308,7 @@
13229
13308
  structured_delegate: isneginf.out
13230
13309
  dispatch:
13231
13310
  NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isneginf
13232
- SparseCPU, SparseCUDA: isneginf_sparse
13311
+ SparseCPU, SparseCUDA, SparseMPS: isneginf_sparse
13233
13312
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr
13234
13313
  tags: pointwise
13235
13314
 
@@ -13238,7 +13317,7 @@
13238
13317
  structured_inherits: TensorIteratorBase
13239
13318
  dispatch:
13240
13319
  CPU, CUDA, MPS: isneginf_out
13241
- SparseCPU, SparseCUDA: isneginf_sparse_out
13320
+ SparseCPU, SparseCUDA, SparseMPS: isneginf_sparse_out
13242
13321
  SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr_out
13243
13322
  tags: pointwise
13244
13323
 
@@ -14951,6 +15030,7 @@
14951
15030
  - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14952
15031
  dispatch:
14953
15032
  CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
15033
+ NestedTensorCUDA: _scaled_dot_product_cudnn_attention_nestedtensor_backward_cuda
14954
15034
  tags: nondeterministic_seeded
14955
15035
 
14956
15036
  - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
@@ -14983,6 +15063,11 @@
14983
15063
  CUDA: _cudnn_attention_forward
14984
15064
  tags: nondeterministic_seeded
14985
15065
 
15066
+ - func: _cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
15067
+ dispatch:
15068
+ CUDA: _cudnn_attention_backward
15069
+ tags: nondeterministic_seeded
15070
+
14986
15071
  - func: _triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor
14987
15072
  variants: function
14988
15073
  dispatch:
@@ -15585,7 +15670,7 @@
15585
15670
  - func: special_shifted_chebyshev_polynomial_t.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15586
15671
  device_check: NoCheck
15587
15672
  dispatch:
15588
- CPU, CUDA: special_shifted_chebyshev_polynomial_t_out
15673
+ CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_t_out
15589
15674
  python_module: special
15590
15675
  structured_inherits: TensorIteratorBase
15591
15676
  structured: True
@@ -15634,7 +15719,7 @@
15634
15719
  - func: special_shifted_chebyshev_polynomial_u.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15635
15720
  device_check: NoCheck
15636
15721
  dispatch:
15637
- CPU, CUDA: special_shifted_chebyshev_polynomial_u_out
15722
+ CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_u_out
15638
15723
  python_module: special
15639
15724
  structured_inherits: TensorIteratorBase
15640
15725
  structured: True
@@ -15683,7 +15768,7 @@
15683
15768
  - func: special_shifted_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15684
15769
  device_check: NoCheck
15685
15770
  dispatch:
15686
- CPU, CUDA: special_shifted_chebyshev_polynomial_v_out
15771
+ CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_v_out
15687
15772
  python_module: special
15688
15773
  structured_inherits: TensorIteratorBase
15689
15774
  structured: True
@@ -15732,7 +15817,7 @@
15732
15817
  - func: special_shifted_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
15733
15818
  device_check: NoCheck
15734
15819
  dispatch:
15735
- CPU, CUDA: special_shifted_chebyshev_polynomial_w_out
15820
+ CPU, CUDA, MPS: special_shifted_chebyshev_polynomial_w_out
15736
15821
  python_module: special
15737
15822
  structured_inherits: TensorIteratorBase
15738
15823
  structured: True
@@ -15841,6 +15926,7 @@
15841
15926
  variants: function
15842
15927
  dispatch:
15843
15928
  CPU: _fused_adagrad_kernel_cpu_
15929
+ CUDA: _fused_adagrad_kernel_cuda_
15844
15930
  autogen: _fused_adagrad, _fused_adagrad.out
15845
15931
 
15846
15932
  - func: _fused_adagrad_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor[] state_steps, *, Tensor lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15848,6 +15934,7 @@
15848
15934
  variants: function
15849
15935
  dispatch:
15850
15936
  CPU: _fused_adagrad_kernel_cpu_
15937
+ CUDA: _fused_adagrad_kernel_cuda_
15851
15938
  autogen: _fused_adagrad.tensor_lr, _fused_adagrad.tensor_lr_out
15852
15939
 
15853
15940
  # This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.