torch-rb 0.17.1 → 0.19.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -187,7 +187,10 @@
187
187
  dispatch:
188
188
  CPU: _functional_assert_async_msg_cpu
189
189
 
190
- - func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
190
+ - func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None, *, Device? device=None, Layout? layout=None) -> ()
191
+ dispatch:
192
+ CompositeExplicitAutograd: _assert_tensor_metadata
193
+ Meta: _assert_tensor_metadata_meta_symint
191
194
 
192
195
  - func: _print(str s) -> ()
193
196
  dispatch:
@@ -309,25 +312,25 @@
309
312
  - func: _shape_as_tensor(Tensor self) -> Tensor
310
313
 
311
314
  - func: dropout(Tensor input, float p, bool train) -> Tensor
312
- tags: nondeterministic_seeded
315
+ tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
313
316
 
314
317
  - func: dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
315
318
  tags: nondeterministic_seeded
316
319
 
317
320
  - func: feature_dropout(Tensor input, float p, bool train) -> Tensor
318
- tags: nondeterministic_seeded
321
+ tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
319
322
 
320
323
  - func: feature_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
321
324
  tags: nondeterministic_seeded
322
325
 
323
326
  - func: alpha_dropout(Tensor input, float p, bool train) -> Tensor
324
- tags: nondeterministic_seeded
327
+ tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
325
328
 
326
329
  - func: alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
327
330
  tags: nondeterministic_seeded
328
331
 
329
332
  - func: feature_alpha_dropout(Tensor input, float p, bool train) -> Tensor
330
- tags: nondeterministic_seeded
333
+ tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
331
334
 
332
335
  - func: feature_alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
333
336
  tags: nondeterministic_seeded
@@ -338,7 +341,7 @@
338
341
  dispatch:
339
342
  CompositeExplicitAutograd: abs
340
343
  SparseCPU, SparseCUDA: abs_sparse
341
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr
344
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr
342
345
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs
343
346
  tags: [core, pointwise]
344
347
 
@@ -348,7 +351,7 @@
348
351
  dispatch:
349
352
  CompositeExplicitAutograd: abs_
350
353
  SparseCPU, SparseCUDA: abs_sparse_
351
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_
354
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_
352
355
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs_
353
356
 
354
357
  - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -357,7 +360,7 @@
357
360
  CPU, CUDA: abs_out
358
361
  MPS: abs_out_mps
359
362
  SparseCPU, SparseCUDA: abs_sparse_out
360
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_out
363
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_out
361
364
  tags: pointwise
362
365
 
363
366
  # Note [Adding an alias]
@@ -400,14 +403,14 @@
400
403
  variants: function, method
401
404
  dispatch:
402
405
  CPU, CUDA: angle
403
- SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr
406
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr
404
407
  tags: pointwise
405
408
 
406
409
  - func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
407
410
  device_check: NoCheck # TensorIterator
408
411
  dispatch:
409
412
  CPU, CUDA: angle_out
410
- SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr_out
413
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr_out
411
414
  tags: pointwise
412
415
 
413
416
  - func: view_as_real(Tensor(a) self) -> Tensor(a)
@@ -425,7 +428,7 @@
425
428
  structured_delegate: sgn.out
426
429
  dispatch:
427
430
  SparseCPU, SparseCUDA: sgn_sparse
428
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr
431
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr
429
432
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn
430
433
  tags: pointwise
431
434
 
@@ -434,7 +437,7 @@
434
437
  structured_delegate: sgn.out
435
438
  dispatch:
436
439
  SparseCPU, SparseCUDA: sgn_sparse_
437
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_
440
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_
438
441
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn_
439
442
  tags: pointwise
440
443
 
@@ -445,7 +448,7 @@
445
448
  CPU, CUDA: sgn_out
446
449
  MPS: sgn_out_mps
447
450
  SparseCPU, SparseCUDA: sgn_sparse_out
448
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
451
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_out
449
452
  tags: pointwise
450
453
 
451
454
  - func: chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
@@ -472,26 +475,26 @@
472
475
  variants: function, method
473
476
  dispatch:
474
477
  CompositeExplicitAutograd: _conj_physical
475
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr
478
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr
476
479
  autogen: _conj_physical.out
477
480
 
478
481
  - func: conj_physical(Tensor self) -> Tensor
479
482
  variants: function, method
480
- tags: pointwise
483
+ tags: [pointwise, maybe_aliasing_or_mutating]
481
484
 
482
485
  - func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
483
486
  dispatch:
484
487
  CPU, CUDA: conj_physical_out
485
488
  MPS: conj_physical_out_mps
486
489
  SparseCPU, SparseCUDA: conj_physical_out_sparse
487
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
490
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_out
488
491
  tags: pointwise
489
492
 
490
493
  - func: conj_physical_(Tensor(a!) self) -> Tensor(a!)
491
494
  variants: function, method
492
495
  dispatch:
493
496
  CompositeExplicitAutograd: conj_physical_
494
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_
497
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_
495
498
  tags: pointwise
496
499
 
497
500
  - func: resolve_conj(Tensor(a) self) -> Tensor(a)
@@ -537,9 +540,11 @@
537
540
 
538
541
  - func: avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor
539
542
  tags: core
543
+ autogen: avg_pool1d.out
540
544
 
541
545
  - func: adaptive_avg_pool1d(Tensor self, int[1] output_size) -> Tensor
542
546
  tags: core
547
+ autogen: adaptive_avg_pool1d.out
543
548
 
544
549
  # Return: (Tensor output, Tensor indices)
545
550
  - func: adaptive_max_pool1d(Tensor self, int[1] output_size) -> (Tensor, Tensor)
@@ -639,6 +644,7 @@
639
644
  CPU: addmv_out_cpu
640
645
  CUDA: addmv_out_cuda
641
646
  MPS: addmv_out_mps
647
+ XPU: addmv_out_xpu
642
648
  SparseCsrCPU: addmv_out_sparse_compressed
643
649
  SparseCsrCUDA: addmv_out_sparse_compressed_cuda
644
650
 
@@ -694,6 +700,9 @@
694
700
  device_check: NoCheck # TensorIterator
695
701
  structured_delegate: all.out
696
702
  variants: function, method
703
+ dispatch:
704
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_all
705
+
697
706
 
698
707
  - func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
699
708
  device_check: NoCheck # TensorIterator
@@ -863,7 +872,7 @@
863
872
  structured_delegate: asinh.out
864
873
  dispatch:
865
874
  SparseCPU, SparseCUDA: asinh_sparse
866
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr
875
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr
867
876
  tags: [core, pointwise]
868
877
 
869
878
  - func: asinh_(Tensor(a!) self) -> Tensor(a!)
@@ -871,7 +880,7 @@
871
880
  structured_delegate: asinh.out
872
881
  dispatch:
873
882
  SparseCPU, SparseCUDA: asinh_sparse_
874
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_
883
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_
875
884
  tags: pointwise
876
885
 
877
886
  - func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -881,7 +890,7 @@
881
890
  CPU, CUDA: asinh_out
882
891
  MPS: asinh_out_mps
883
892
  SparseCPU, SparseCUDA: asinh_sparse_out
884
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_out
893
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_out
885
894
  tags: pointwise
886
895
 
887
896
  # arcsinh, alias for asinh
@@ -898,7 +907,7 @@
898
907
  variants: function, method
899
908
  dispatch:
900
909
  SparseCPU, SparseCUDA: atanh_sparse
901
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr
910
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr
902
911
  tags: [core, pointwise]
903
912
 
904
913
  - func: atanh_(Tensor(a!) self) -> Tensor(a!)
@@ -906,7 +915,7 @@
906
915
  variants: function, method
907
916
  dispatch:
908
917
  SparseCPU, SparseCUDA: atanh_sparse_
909
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_
918
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_
910
919
  tags: pointwise
911
920
 
912
921
  - func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -916,7 +925,7 @@
916
925
  CPU, CUDA: atanh_out
917
926
  MPS: atanh_out_mps
918
927
  SparseCPU, SparseCUDA: atanh_sparse_out
919
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_out
928
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_out
920
929
  tags: pointwise
921
930
  # arctanh, alias for atanh
922
931
 
@@ -954,7 +963,7 @@
954
963
  structured_delegate: asin.out
955
964
  dispatch:
956
965
  SparseCPU, SparseCUDA: asin_sparse
957
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr
966
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr
958
967
  tags: [core, pointwise]
959
968
 
960
969
  - func: asin_(Tensor(a!) self) -> Tensor(a!)
@@ -963,7 +972,7 @@
963
972
  structured_delegate: asin.out
964
973
  dispatch:
965
974
  SparseCPU, SparseCUDA: asin_sparse_
966
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_
975
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_
967
976
  tags: pointwise
968
977
 
969
978
  - func: asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -974,7 +983,7 @@
974
983
  CPU, CUDA: asin_out
975
984
  MPS: asin_out_mps
976
985
  SparseCPU, SparseCUDA: asin_sparse_out
977
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_out
986
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_out
978
987
  tags: pointwise
979
988
 
980
989
  # arcsin, alias of asin
@@ -992,7 +1001,7 @@
992
1001
  variants: function, method
993
1002
  dispatch:
994
1003
  SparseCPU, SparseCUDA: atan_sparse
995
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr
1004
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr
996
1005
  tags: [core, pointwise]
997
1006
 
998
1007
  - func: atan_(Tensor(a!) self) -> Tensor(a!)
@@ -1001,7 +1010,7 @@
1001
1010
  variants: function, method
1002
1011
  dispatch:
1003
1012
  SparseCPU, SparseCUDA: atan_sparse_
1004
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_
1013
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_
1005
1014
  tags: pointwise
1006
1015
 
1007
1016
  - func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -1012,7 +1021,7 @@
1012
1021
  CPU, CUDA: atan_out
1013
1022
  MPS: atan_out_mps
1014
1023
  SparseCPU, SparseCUDA: atan_sparse_out
1015
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_out
1024
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_out
1016
1025
  tags: pointwise
1017
1026
 
1018
1027
  # arctan, alias of atan
@@ -1026,17 +1035,20 @@
1026
1035
 
1027
1036
  - func: atleast_1d(Tensor self) -> Tensor
1028
1037
  variants: function
1038
+ tags: maybe_aliasing_or_mutating
1029
1039
 
1030
1040
  - func: atleast_1d.Sequence(Tensor[] tensors) -> Tensor[]
1031
1041
 
1032
1042
  - func: atleast_2d(Tensor self) -> Tensor
1033
1043
  variants: function
1044
+ tags: maybe_aliasing_or_mutating
1034
1045
 
1035
1046
  - func: atleast_2d.Sequence(Tensor[] tensors) -> Tensor[]
1036
1047
  variants: function
1037
1048
 
1038
1049
  - func: atleast_3d(Tensor self) -> Tensor
1039
1050
  variants: function
1051
+ tags: maybe_aliasing_or_mutating
1040
1052
 
1041
1053
  - func: atleast_3d.Sequence(Tensor[] tensors) -> Tensor[]
1042
1054
  variants: function
@@ -1056,6 +1068,7 @@
1056
1068
  CPU: baddbmm_out_cpu
1057
1069
  CUDA: baddbmm_out_cuda
1058
1070
  MPS: baddbmm_out_mps
1071
+ XPU: baddbmm_out_xpu
1059
1072
  SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
1060
1073
 
1061
1074
  - func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -1069,6 +1082,7 @@
1069
1082
  autogen: bartlett_window.periodic_out
1070
1083
 
1071
1084
  - func: batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor
1085
+ tags: maybe_aliasing_or_mutating
1072
1086
 
1073
1087
  - func: quantized_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor var, float eps, float output_scale, int output_zero_point) -> Tensor
1074
1088
  dispatch:
@@ -1076,6 +1090,7 @@
1076
1090
  autogen: quantized_batch_norm.out
1077
1091
 
1078
1092
  - func: _batch_norm_impl_index(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> (Tensor, Tensor, Tensor, Tensor, int)
1093
+ tags: maybe_aliasing_or_mutating
1079
1094
 
1080
1095
  - func: _batch_norm_impl_index_backward(int impl_index, Tensor input, Tensor grad_output, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var_transform, bool train, float eps, bool[3] output_mask, Tensor reservedSpace) -> (Tensor, Tensor, Tensor)
1081
1096
 
@@ -1353,6 +1368,7 @@
1353
1368
  CPU: bmm_out_cpu
1354
1369
  CUDA: bmm_out_cuda
1355
1370
  MPS: bmm_out_mps
1371
+ XPU: bmm_out_xpu
1356
1372
  SparseCPU: bmm_out_sparse_cpu
1357
1373
  SparseCUDA: bmm_out_sparse_cuda
1358
1374
  SparseCsrCUDA: bmm_out_sparse_csr_cuda
@@ -1423,7 +1439,7 @@
1423
1439
  variants: function, method
1424
1440
  dispatch:
1425
1441
  SparseCPU, SparseCUDA: ceil_sparse
1426
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr
1442
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr
1427
1443
  tags: [core, pointwise]
1428
1444
 
1429
1445
  - func: ceil_(Tensor(a!) self) -> Tensor(a!)
@@ -1432,7 +1448,7 @@
1432
1448
  variants: function, method
1433
1449
  dispatch:
1434
1450
  SparseCPU, SparseCUDA: ceil_sparse_
1435
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_
1451
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_
1436
1452
  tags: pointwise
1437
1453
 
1438
1454
  - func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -1443,7 +1459,7 @@
1443
1459
  CPU, CUDA: ceil_out
1444
1460
  MPS: ceil_out_mps
1445
1461
  SparseCPU, SparseCUDA: ceil_sparse_out
1446
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_out
1462
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
1447
1463
  tags: pointwise
1448
1464
 
1449
1465
  # alias for torch.linalg.multi_dot
@@ -1457,6 +1473,7 @@
1457
1473
  variants: function, method
1458
1474
  device_check: NoCheck
1459
1475
  device_guard: False
1476
+ tags: maybe_aliasing_or_mutating
1460
1477
 
1461
1478
  - func: chunk(Tensor(a -> *) self, int chunks, int dim=0) -> Tensor(a)[]
1462
1479
  variants: function, method
@@ -1762,7 +1779,7 @@
1762
1779
  MkldnnCPU: copy_mkldnn_
1763
1780
  SparseCPU, SparseCUDA: copy_sparse_wrapper_
1764
1781
  CompositeExplicitAutograd: copy_
1765
- SparseCsrCPU, SparseCsrCUDA: copy_sparse_compressed_
1782
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: copy_sparse_compressed_
1766
1783
  NestedTensorCPU, NestedTensorCUDA: copy_nested_
1767
1784
  autogen: copy.out
1768
1785
 
@@ -1783,7 +1800,7 @@
1783
1800
  variants: function, method
1784
1801
  structured_delegate: cos.out
1785
1802
  dispatch:
1786
- NestedTensorCPU, NestedTensorCUDA: cos_nested
1803
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_cos
1787
1804
  tags: [core, pointwise]
1788
1805
 
1789
1806
  - func: cos_(Tensor(a!) self) -> Tensor(a!)
@@ -2338,7 +2355,7 @@
2338
2355
 
2339
2356
  - func: _embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
2340
2357
  dispatch:
2341
- CompositeImplicitAutograd: _embedding_bag_backward_symint
2358
+ CPU, CUDA: _embedding_bag_backward_symint
2342
2359
 
2343
2360
  - func: _embedding_bag_sparse_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
2344
2361
  dispatch:
@@ -2370,8 +2387,10 @@
2370
2387
  MPS: empty_mps
2371
2388
  Meta: empty_meta_symint
2372
2389
  MkldnnCPU: empty_mkldnn
2373
- SparseCPU, SparseCUDA, SparseMeta: empty_sparse
2374
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
2390
+ SparseCPU, SparseCUDA: empty_sparse
2391
+ SparseMeta: empty_sparse_symint
2392
+ SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
2393
+ SparseCsrMeta: empty_sparse_compressed_symint
2375
2394
  QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
2376
2395
  tags: core
2377
2396
 
@@ -2446,7 +2465,7 @@
2446
2465
  CUDA: resize_cuda_
2447
2466
  MPS: resize_mps_
2448
2467
  QuantizedCPU: quantized_resize_cpu_
2449
- SparseCsrCPU, SparseCsrCUDA: resize_sparse_csr_
2468
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: resize_sparse_csr_
2450
2469
  autogen: resize, resize.out
2451
2470
 
2452
2471
  # This is a utility function to enable users to resize out tensor while registering kernels for out variants.
@@ -2497,7 +2516,7 @@
2497
2516
  variants: function, method
2498
2517
  dispatch:
2499
2518
  SparseCPU, SparseCUDA: erf_sparse
2500
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr
2519
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr
2501
2520
  tags: [core, pointwise]
2502
2521
 
2503
2522
  - func: erf_(Tensor(a!) self) -> Tensor(a!)
@@ -2506,7 +2525,7 @@
2506
2525
  variants: function, method
2507
2526
  dispatch:
2508
2527
  SparseCPU, SparseCUDA: erf_sparse_
2509
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_
2528
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_
2510
2529
  tags: pointwise
2511
2530
 
2512
2531
  - func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2517,7 +2536,7 @@
2517
2536
  CPU, CUDA: erf_out
2518
2537
  MPS: erf_out_mps
2519
2538
  SparseCPU, SparseCUDA: erf_sparse_out
2520
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_out
2539
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_out
2521
2540
  tags: pointwise
2522
2541
 
2523
2542
  - func: erfc(Tensor self) -> Tensor
@@ -2585,7 +2604,7 @@
2585
2604
  variants: function, method
2586
2605
  dispatch:
2587
2606
  SparseCPU, SparseCUDA: expm1_sparse
2588
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
2607
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr
2589
2608
  tags: [core, pointwise]
2590
2609
 
2591
2610
  - func: expm1_(Tensor(a!) self) -> Tensor(a!)
@@ -2594,7 +2613,7 @@
2594
2613
  variants: function, method
2595
2614
  dispatch:
2596
2615
  SparseCPU, SparseCUDA: expm1_sparse_
2597
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_
2616
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_
2598
2617
  tags: pointwise
2599
2618
 
2600
2619
  - func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2605,7 +2624,7 @@
2605
2624
  CPU, CUDA: expm1_out
2606
2625
  MPS: expm1_out_mps
2607
2626
  SparseCPU, SparseCUDA: expm1_sparse_out
2608
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_out
2627
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_out
2609
2628
  tags: pointwise
2610
2629
 
2611
2630
  - func: expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)
@@ -2683,7 +2702,7 @@
2683
2702
  MPS: fill_scalar_mps
2684
2703
  QuantizedCPU, QuantizedCUDA: fill_quantized_
2685
2704
  Meta: fill_meta_
2686
- SparseCsrCPU, SparseCsrCUDA: fill_sparse_csr_
2705
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: fill_sparse_csr_
2687
2706
  NestedTensorCPU, NestedTensorCUDA: fill_nested_
2688
2707
  autogen: fill.Scalar_out
2689
2708
 
@@ -2704,7 +2723,7 @@
2704
2723
  variants: function, method
2705
2724
  dispatch:
2706
2725
  SparseCPU, SparseCUDA: floor_sparse
2707
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr
2726
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr
2708
2727
  tags: [core, pointwise]
2709
2728
 
2710
2729
  - func: floor_(Tensor(a!) self) -> Tensor(a!)
@@ -2713,7 +2732,7 @@
2713
2732
  variants: function, method
2714
2733
  dispatch:
2715
2734
  SparseCPU, SparseCUDA: floor_sparse_
2716
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_
2735
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_
2717
2736
  tags: pointwise
2718
2737
 
2719
2738
  - func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2724,7 +2743,7 @@
2724
2743
  CPU, CUDA: floor_out
2725
2744
  MPS: floor_out_mps
2726
2745
  SparseCPU, SparseCUDA: floor_sparse_out
2727
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_out
2746
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
2728
2747
  tags: pointwise
2729
2748
 
2730
2749
  - func: floor_divide(Tensor self, Tensor other) -> Tensor
@@ -2769,7 +2788,7 @@
2769
2788
  variants: function, method
2770
2789
  dispatch:
2771
2790
  SparseCPU, SparseCUDA: frac_sparse
2772
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr
2791
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr
2773
2792
  tags: pointwise
2774
2793
 
2775
2794
  - func: frac_(Tensor(a!) self) -> Tensor(a!)
@@ -2778,7 +2797,7 @@
2778
2797
  variants: function, method
2779
2798
  dispatch:
2780
2799
  SparseCPU, SparseCUDA: frac_sparse_
2781
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr_
2800
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_
2782
2801
  tags: pointwise
2783
2802
 
2784
2803
  - func: frac.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2789,7 +2808,7 @@
2789
2808
  CPU, CUDA: frac_out
2790
2809
  MPS: frac_out_mps
2791
2810
  SparseCPU, SparseCUDA: frac_sparse_out
2792
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr_out
2811
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_out
2793
2812
  tags: pointwise
2794
2813
 
2795
2814
  - func: full.names(int[] size, Scalar fill_value, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -2814,6 +2833,7 @@
2814
2833
  # non-differentiable so NonFunctional doesn't apply
2815
2834
  CompositeExplicitAutograd: full_like
2816
2835
  autogen: full_like.out
2836
+ tags: core
2817
2837
 
2818
2838
  - func: from_file(str filename, bool? shared=None, int? size=0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
2819
2839
  dispatch:
@@ -3061,6 +3081,18 @@
3061
3081
  dispatch:
3062
3082
  CompositeExplicitAutograd: _unsafe_index
3063
3083
 
3084
+ # Used by inductor to generate masked loads
3085
+ # Note that we don't support boolean indexing, to avoid dynamic output shapes
3086
+ - func: _unsafe_masked_index(Tensor self, Tensor mask, Tensor?[] indices, Scalar fill) -> Tensor
3087
+ variants: function
3088
+ dispatch:
3089
+ CompositeExplicitAutograd: _unsafe_masked_index
3090
+
3091
+ - func: _unsafe_masked_index_put_accumulate(Tensor self, Tensor mask, Tensor?[] indices, Tensor values) -> Tensor
3092
+ variants: function
3093
+ dispatch:
3094
+ CompositeExplicitAutograd: _unsafe_masked_index_put_accumulate
3095
+
3064
3096
  - func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
3065
3097
  structured: True
3066
3098
  variants: function
@@ -3160,8 +3192,9 @@
3160
3192
  device_guard: False
3161
3193
  dispatch:
3162
3194
  CPU, CUDA, MPS: isnan
3195
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_isnan
3163
3196
  SparseCPU, SparseCUDA: isnan_sparse
3164
- SparseCsrCPU, SparseCsrCUDA: isnan_sparse_csr
3197
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isnan_sparse_csr
3165
3198
  autogen: isnan.out
3166
3199
  tags: [core, pointwise]
3167
3200
 
@@ -3270,7 +3303,9 @@
3270
3303
  autogen: native_layer_norm_backward.out
3271
3304
  tags: core
3272
3305
 
3273
- - func: rms_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, float? eps=None) -> Tensor
3306
+ - func: rms_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, float? eps=None) -> Tensor
3307
+ dispatch:
3308
+ CompositeImplicitAutograd: rms_norm_symint
3274
3309
 
3275
3310
  - func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
3276
3311
  variants: function, method
@@ -3336,9 +3371,10 @@
3336
3371
  dispatch:
3337
3372
  CUDA: _cslt_compress
3338
3373
 
3339
- - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
3374
+ - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0, int split_k=1, bool split_k_one_kernel=True) -> Tensor
3340
3375
  dispatch:
3341
3376
  CUDA: _cslt_sparse_mm
3377
+ tags: needs_fixed_stride_order
3342
3378
 
3343
3379
  - func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
3344
3380
  dispatch:
@@ -3381,6 +3417,10 @@
3381
3417
 
3382
3418
  - func: fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor
3383
3419
 
3420
+ - func: _wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor
3421
+
3422
+ - func: _wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
3423
+
3384
3424
  - func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
3385
3425
 
3386
3426
  - func: fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
@@ -3487,7 +3527,7 @@
3487
3527
  variants: function, method
3488
3528
  dispatch:
3489
3529
  SparseCPU, SparseCUDA: log1p_sparse
3490
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
3530
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr
3491
3531
  tags: [core, pointwise]
3492
3532
 
3493
3533
  - func: log1p_(Tensor(a!) self) -> Tensor(a!)
@@ -3496,7 +3536,7 @@
3496
3536
  variants: function, method
3497
3537
  dispatch:
3498
3538
  SparseCPU, SparseCUDA: log1p_sparse_
3499
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_
3539
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_
3500
3540
  tags: pointwise
3501
3541
 
3502
3542
  - func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -3507,7 +3547,7 @@
3507
3547
  CPU, CUDA: log1p_out
3508
3548
  MPS: log1p_out_mps
3509
3549
  SparseCPU, SparseCUDA: log1p_sparse_out
3510
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_out
3550
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_out
3511
3551
  tags: pointwise
3512
3552
 
3513
3553
  - func: log2(Tensor self) -> Tensor
@@ -3899,11 +3939,10 @@
3899
3939
  tags: core
3900
3940
 
3901
3941
  # For normal naming convention this should be `mean.out`. However since we already have `mean.out` we have to rename this.
3902
- # FIXME: fix CI jobs and re-enable this
3903
- #- func: mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
3904
- # device_check: NoCheck # TensorIterator
3905
- # dispatch:
3906
- # CompositeExplicitAutograd: mean_dtype_out
3942
+ - func: mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
3943
+ device_check: NoCheck # TensorIterator
3944
+ dispatch:
3945
+ CompositeExplicitAutograd: mean_dtype_out
3907
3946
 
3908
3947
  - func: mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
3909
3948
  structured_delegate: mean.out
@@ -4095,7 +4134,7 @@
4095
4134
  variants: function, method
4096
4135
  dispatch:
4097
4136
  SparseCPU, SparseCUDA: _sparse_mm
4098
- SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm
4137
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm
4099
4138
  tags: core
4100
4139
 
4101
4140
  - func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
@@ -4104,8 +4143,9 @@
4104
4143
  CPU: mm_out_cpu
4105
4144
  CUDA: mm_out_cuda
4106
4145
  MPS: mm_out_mps
4146
+ XPU: mm_out_xpu
4107
4147
  SparseCPU, SparseCUDA: _sparse_mm_out
4108
- SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm_out
4148
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm_out
4109
4149
 
4110
4150
  - func: _int_mm(Tensor self, Tensor mat2) -> Tensor
4111
4151
  dispatch:
@@ -4119,15 +4159,24 @@
4119
4159
 
4120
4160
  - func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
4121
4161
  dispatch:
4122
- CPU: _convert_weight_to_int4pack_cpu
4123
4162
  CUDA: _convert_weight_to_int4pack_cuda
4163
+ MPS: _convert_weight_to_int4pack_mps
4124
4164
 
4125
4165
  - func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
4126
4166
  dispatch:
4127
- CPU: _weight_int4pack_mm_cpu
4128
4167
  MPS: _weight_int4pack_mm_mps
4129
4168
  CUDA: _weight_int4pack_mm_cuda
4130
4169
 
4170
+ # Split int4 pack weight between cpu and other devices due to
4171
+ # https://github.com/pytorch/ao/issues/1117#issuecomment-2451252756.
4172
+ - func: _convert_weight_to_int4pack_for_cpu(Tensor self, int innerKTiles) -> Tensor
4173
+ dispatch:
4174
+ CPU: _convert_weight_to_int4pack_cpu
4175
+
4176
+ - func: _weight_int4pack_mm_for_cpu(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
4177
+ dispatch:
4178
+ CPU: _weight_int4pack_mm_cpu
4179
+
4131
4180
  - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
4132
4181
  dispatch:
4133
4182
  CPU: _weight_int8pack_mm_cpu
@@ -4165,7 +4214,7 @@
4165
4214
  variants: function, method
4166
4215
  dispatch:
4167
4216
  SparseCPU, SparseCUDA: mul_sparse
4168
- SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr
4217
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr
4169
4218
  MkldnnCPU: mkldnn_mul
4170
4219
  ZeroTensor: mul_zerotensor
4171
4220
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
@@ -4177,7 +4226,7 @@
4177
4226
  variants: method
4178
4227
  dispatch:
4179
4228
  SparseCPU, SparseCUDA: mul_sparse_
4180
- SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr_
4229
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr_
4181
4230
  MkldnnCPU: mkldnn_mul_
4182
4231
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Tensor
4183
4232
  tags: pointwise
@@ -4191,7 +4240,7 @@
4191
4240
  MPS: mul_out_mps
4192
4241
  SparseCPU: mul_out_sparse_cpu
4193
4242
  SparseCUDA: mul_out_sparse_cuda
4194
- SparseCsrCPU, SparseCsrCUDA: mul_out_sparse_csr
4243
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr
4195
4244
  MkldnnCPU: mkldnn_mul_out
4196
4245
  tags: pointwise
4197
4246
  # For C++ only, until we have conversion from C++ numbers to Tensor
@@ -4201,7 +4250,7 @@
4201
4250
  variants: function, method
4202
4251
  dispatch:
4203
4252
  CompositeExplicitAutograd: mul
4204
- SparseCsrCPU, SparseCsrCUDA: mul_scalar_sparse_csr
4253
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_scalar_sparse_csr
4205
4254
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Scalar
4206
4255
  tags: [core, pointwise]
4207
4256
 
@@ -4210,7 +4259,7 @@
4210
4259
  variants: method
4211
4260
  dispatch:
4212
4261
  CompositeExplicitAutograd: mul_
4213
- SparseCsrCPU, SparseCsrCUDA: mul__scalar_sparse_csr
4262
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul__scalar_sparse_csr
4214
4263
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Scalar
4215
4264
  autogen: mul.Scalar_out
4216
4265
  tags: pointwise
@@ -4530,9 +4579,11 @@
4530
4579
  - func: is_pinned(Tensor self, Device? device=None) -> bool
4531
4580
  variants: method
4532
4581
  dispatch:
4533
- NestedTensorCUDA, CUDA: is_pinned_cuda
4534
- MPS: is_pinned_mps
4535
- CompositeExplicitAutograd: is_pinned_default
4582
+ # the NestedTensor keys are necessary because NestedTensor has been removed
4583
+ # from the CompositeExplicitAutograd keyset see Note [NestedTensor Not Included in Backend Keys]
4584
+ CompositeExplicitAutograd, NestedTensorCPU: is_pinned
4585
+ SparseCsrCPU: is_pinned_sparse_compressed
4586
+ SparseCPU: is_pinned_sparse_coo
4536
4587
 
4537
4588
  # TODO: add a copy kwarg that guarantees that the tensor is put into fresh
4538
4589
  # pinned memory
@@ -4542,9 +4593,10 @@
4542
4593
  # Unlike pin_memory, this is guaranteed to give a new non-aliasing tensor
4543
4594
  - func: _pin_memory(Tensor self, Device? device=None) -> Tensor
4544
4595
  dispatch:
4545
- CUDA: _pin_memory_cuda
4546
- MPS: _pin_memory_mps
4547
- NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
4596
+ CompositeExplicitAutograd: _pin_memory
4597
+ NestedTensorCPU: _pin_memory_nested
4598
+ SparseCPU: _pin_memory_sparse_coo
4599
+ SparseCsrCPU: _pin_memory_sparse_compressed
4548
4600
  autogen: _pin_memory.out
4549
4601
 
4550
4602
  - func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
@@ -4558,27 +4610,30 @@
4558
4610
  dispatch:
4559
4611
  CompositeExplicitAutograd: rad2deg
4560
4612
  SparseCPU, SparseCUDA: rad2deg_sparse
4561
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr
4613
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr
4614
+ tags: pointwise
4562
4615
 
4563
4616
  - func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
4564
4617
  variants: function, method
4565
4618
  dispatch:
4566
4619
  CompositeExplicitAutograd: rad2deg_
4567
4620
  SparseCPU, SparseCUDA: rad2deg_sparse_
4568
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_
4621
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_
4622
+ tags: pointwise
4569
4623
 
4570
4624
  - func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4571
4625
  dispatch:
4572
4626
  CompositeExplicitAutograd: rad2deg_out
4573
4627
  SparseCPU, SparseCUDA: rad2deg_sparse_out
4574
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_out
4628
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_out
4629
+ tags: pointwise
4575
4630
 
4576
4631
  - func: deg2rad(Tensor self) -> Tensor
4577
4632
  variants: function, method
4578
4633
  dispatch:
4579
4634
  CompositeExplicitAutograd: deg2rad
4580
4635
  SparseCPU, SparseCUDA: deg2rad_sparse
4581
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr
4636
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr
4582
4637
  tags: pointwise
4583
4638
 
4584
4639
  - func: deg2rad_(Tensor(a!) self) -> Tensor(a!)
@@ -4586,14 +4641,14 @@
4586
4641
  dispatch:
4587
4642
  CompositeExplicitAutograd: deg2rad_
4588
4643
  SparseCPU, SparseCUDA: deg2rad_sparse_
4589
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr_
4644
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_
4590
4645
  tags: pointwise
4591
4646
 
4592
4647
  - func: deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4593
4648
  dispatch:
4594
4649
  CompositeExplicitAutograd: deg2rad_out
4595
4650
  SparseCPU, SparseCUDA: deg2rad_sparse_out
4596
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr_out
4651
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_out
4597
4652
  tags: pointwise
4598
4653
 
4599
4654
  - func: scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -4811,7 +4866,7 @@
4811
4866
  variants: function, method
4812
4867
  dispatch:
4813
4868
  SparseCPU, SparseCUDA: neg_sparse
4814
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr
4869
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr
4815
4870
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg
4816
4871
  tags: [core, pointwise]
4817
4872
 
@@ -4821,7 +4876,7 @@
4821
4876
  variants: function, method
4822
4877
  dispatch:
4823
4878
  SparseCPU, SparseCUDA: neg_sparse_
4824
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_
4879
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_
4825
4880
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg_
4826
4881
  tags: pointwise
4827
4882
 
@@ -4833,7 +4888,7 @@
4833
4888
  CPU, CUDA: neg_out
4834
4889
  MPS: neg_out_mps
4835
4890
  SparseCPU, SparseCUDA: neg_out_sparse
4836
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_out
4891
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_out
4837
4892
  tags: pointwise
4838
4893
  # Alias for neg
4839
4894
 
@@ -4917,7 +4972,7 @@
4917
4972
  variants: function, method
4918
4973
  dispatch:
4919
4974
  SparseCPU, SparseCUDA: round_sparse
4920
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr
4975
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr
4921
4976
  tags: [core, pointwise]
4922
4977
 
4923
4978
  - func: round_(Tensor(a!) self) -> Tensor(a!)
@@ -4926,7 +4981,7 @@
4926
4981
  variants: function, method
4927
4982
  dispatch:
4928
4983
  SparseCPU, SparseCUDA: round_sparse_
4929
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_
4984
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_
4930
4985
  tags: pointwise
4931
4986
 
4932
4987
  - func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -4938,7 +4993,7 @@
4938
4993
  CUDA: round_out
4939
4994
  MPS: round_out_mps
4940
4995
  SparseCPU, SparseCUDA: round_sparse_out
4941
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_out
4996
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
4942
4997
  tags: pointwise
4943
4998
 
4944
4999
  - func: round.decimals(Tensor self, *, int decimals) -> Tensor
@@ -4964,7 +5019,7 @@
4964
5019
 
4965
5020
  - func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
4966
5021
  device_check: NoCheck # TensorIterator
4967
- tags: nondeterministic_seeded
5022
+ tags: [pointwise, nondeterministic_seeded]
4968
5023
 
4969
5024
  - func: rrelu_(Tensor(a!) self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
4970
5025
  tags: nondeterministic_seeded
@@ -4981,7 +5036,7 @@
4981
5036
  QuantizedCUDA: relu_quantized_cuda
4982
5037
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu
4983
5038
  SparseCPU, SparseCUDA: relu_sparse
4984
- SparseCsrCPU, SparseCsrCUDA: relu_sparse_csr
5039
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr
4985
5040
  tags: [core, pointwise]
4986
5041
 
4987
5042
  - func: relu_(Tensor(a!) self) -> Tensor(a!)
@@ -4995,12 +5050,13 @@
4995
5050
  QuantizedCUDA: relu_quantized_cuda_
4996
5051
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu_
4997
5052
  SparseCPU, SparseCUDA: relu_sparse_
4998
- SparseCsrCPU, SparseCsrCUDA: relu_sparse_csr_
5053
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr_
4999
5054
  autogen: relu.out
5000
5055
  tags: pointwise
5001
5056
 
5002
5057
  - func: relu6(Tensor self) -> Tensor
5003
5058
  python_module: nn
5059
+ tags: pointwise
5004
5060
 
5005
5061
  - func: relu6_(Tensor(a!) self) -> Tensor(a!)
5006
5062
  python_module: nn
@@ -5085,6 +5141,7 @@
5085
5141
  structured_delegate: hardshrink.out
5086
5142
  device_check: NoCheck # TensorIterator
5087
5143
  variants: function, method
5144
+ tags: pointwise
5088
5145
 
5089
5146
  - func: hardshrink_backward.grad_input(Tensor grad_out, Tensor self, Scalar lambd, *, Tensor(a!) grad_input) -> Tensor(a!)
5090
5147
  structured: True
@@ -5128,7 +5185,7 @@
5128
5185
  device_guard: False
5129
5186
  dispatch:
5130
5187
  CompositeExplicitAutograd: select_symint
5131
- SparseCsrCPU, SparseCsrCUDA: select_sparse_csr
5188
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_sparse_csr
5132
5189
  NestedTensorCPU, NestedTensorCUDA: select_nested
5133
5190
  tags: core
5134
5191
 
@@ -5149,6 +5206,7 @@
5149
5206
 
5150
5207
  - func: selu(Tensor self) -> Tensor
5151
5208
  device_check: NoCheck # TensorIterator
5209
+ tags: pointwise
5152
5210
 
5153
5211
  - func: selu_(Tensor(a!) self) -> Tensor(a!)
5154
5212
  device_check: NoCheck # TensorIterator
@@ -5157,6 +5215,7 @@
5157
5215
  device_check: NoCheck # TensorIterator
5158
5216
  dispatch:
5159
5217
  CompositeExplicitAutograd: celu
5218
+ tags: pointwise
5160
5219
 
5161
5220
  - func: celu_(Tensor(a!) self, Scalar alpha=1.0) -> Tensor(a!)
5162
5221
  device_check: NoCheck # TensorIterator
@@ -5207,6 +5266,7 @@
5207
5266
  - func: mish(Tensor self) -> Tensor
5208
5267
  structured_delegate: mish.out
5209
5268
  python_module: nn
5269
+ tags: pointwise
5210
5270
 
5211
5271
  - func: mish_(Tensor(a!) self) -> Tensor(a!)
5212
5272
  structured_delegate: mish.out
@@ -5277,9 +5337,9 @@
5277
5337
  structured_delegate: sin.out
5278
5338
  variants: function, method
5279
5339
  dispatch:
5280
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
5340
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr
5281
5341
  SparseCPU, SparseCUDA: sin_sparse
5282
- NestedTensorCPU, NestedTensorCUDA: sin_nested
5342
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_sin
5283
5343
  tags: [core, pointwise]
5284
5344
 
5285
5345
  - func: sin_(Tensor(a!) self) -> Tensor(a!)
@@ -5287,7 +5347,7 @@
5287
5347
  structured_delegate: sin.out
5288
5348
  variants: function, method
5289
5349
  dispatch:
5290
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_
5350
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_
5291
5351
  SparseCPU, SparseCUDA: sin_sparse_
5292
5352
  tags: pointwise
5293
5353
 
@@ -5298,7 +5358,7 @@
5298
5358
  dispatch:
5299
5359
  CPU, CUDA: sin_out
5300
5360
  MPS: sin_out_mps
5301
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_out
5361
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_out
5302
5362
  SparseCPU, SparseCUDA: sin_sparse_out
5303
5363
  tags: pointwise
5304
5364
 
@@ -5325,7 +5385,7 @@
5325
5385
  variants: function, method
5326
5386
  dispatch:
5327
5387
  SparseCPU, SparseCUDA: sinh_sparse
5328
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr
5388
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr
5329
5389
  tags: [core, pointwise]
5330
5390
 
5331
5391
  - func: sinh_(Tensor(a!) self) -> Tensor(a!)
@@ -5334,7 +5394,7 @@
5334
5394
  variants: function, method
5335
5395
  dispatch:
5336
5396
  SparseCPU, SparseCUDA: sinh_sparse_
5337
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_
5397
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_
5338
5398
  tags: pointwise
5339
5399
 
5340
5400
  - func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5345,7 +5405,7 @@
5345
5405
  CPU, CUDA: sinh_out
5346
5406
  MPS: sinh_out_mps
5347
5407
  SparseCPU, SparseCUDA: sinh_sparse_out
5348
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_out
5408
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_out
5349
5409
 
5350
5410
  # Returns a copy of this `Variable` that is detached from its autograd graph.
5351
5411
  # This method is OK to call if the `Variable` is a view.
@@ -5732,7 +5792,7 @@
5732
5792
  dispatch:
5733
5793
  NestedTensorCPU: NestedTensor_sum_dim_CPU
5734
5794
  SparseCPU, SparseCUDA: sum_sparse_coo
5735
- SparseCsrCPU, SparseCsrCUDA: sum_sparse_compressed
5795
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_sparse_compressed
5736
5796
  tags: core
5737
5797
 
5738
5798
  - func: sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
@@ -5777,8 +5837,9 @@
5777
5837
  structured_delegate: sqrt.out
5778
5838
  variants: function, method
5779
5839
  dispatch:
5840
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_sqrt
5780
5841
  SparseCPU, SparseCUDA: sqrt_sparse
5781
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr
5842
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr
5782
5843
  tags: [core, pointwise]
5783
5844
 
5784
5845
  - func: sqrt_(Tensor(a!) self) -> Tensor(a!)
@@ -5787,7 +5848,7 @@
5787
5848
  variants: function, method
5788
5849
  dispatch:
5789
5850
  SparseCPU, SparseCUDA: sqrt_sparse_
5790
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_
5851
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_
5791
5852
  tags: pointwise
5792
5853
 
5793
5854
  - func: sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5798,7 +5859,7 @@
5798
5859
  CPU, CUDA: sqrt_out
5799
5860
  MPS: sqrt_out_mps
5800
5861
  SparseCPU, SparseCUDA: sqrt_sparse_out
5801
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_out
5862
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
5802
5863
  tags: pointwise
5803
5864
 
5804
5865
  - func: square(Tensor self) -> Tensor
@@ -5936,7 +5997,7 @@
5936
5997
  variants: function, method
5937
5998
  dispatch:
5938
5999
  SparseCPU, SparseCUDA: tan_sparse
5939
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr
6000
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr
5940
6001
  tags: [core, pointwise]
5941
6002
 
5942
6003
  - func: tan_(Tensor(a!) self) -> Tensor(a!)
@@ -5945,7 +6006,7 @@
5945
6006
  variants: function, method
5946
6007
  dispatch:
5947
6008
  SparseCPU, SparseCUDA: tan_sparse_
5948
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_
6009
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_
5949
6010
  tags: pointwise
5950
6011
 
5951
6012
  - func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5956,7 +6017,7 @@
5956
6017
  CPU, CUDA: tan_out
5957
6018
  MPS: tan_out_mps
5958
6019
  SparseCPU, SparseCUDA: tan_sparse_out
5959
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_out
6020
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_out
5960
6021
  tags: pointwise
5961
6022
 
5962
6023
  - func: tanh(Tensor self) -> Tensor
@@ -5967,7 +6028,7 @@
5967
6028
  QuantizedCPU: tanh_quantized_cpu
5968
6029
  MkldnnCPU: mkldnn_tanh
5969
6030
  SparseCPU, SparseCUDA: tanh_sparse
5970
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr
6031
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr
5971
6032
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh
5972
6033
  tags: [core, pointwise]
5973
6034
 
@@ -5978,7 +6039,7 @@
5978
6039
  dispatch:
5979
6040
  MkldnnCPU: mkldnn_tanh_
5980
6041
  SparseCPU, SparseCUDA: tanh_sparse_
5981
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_
6042
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_
5982
6043
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh_
5983
6044
  tags: pointwise
5984
6045
 
@@ -5990,7 +6051,7 @@
5990
6051
  CPU, CUDA: tanh_out
5991
6052
  MPS: tanh_out_mps
5992
6053
  SparseCPU, SparseCUDA: tanh_sparse_out
5993
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_out
6054
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
5994
6055
  tags: pointwise
5995
6056
 
5996
6057
  - func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor
@@ -6006,6 +6067,7 @@
6006
6067
  structured_delegate: threshold.out
6007
6068
  dispatch:
6008
6069
  QuantizedCPU: threshold_quantized_cpu
6070
+ tags: pointwise
6009
6071
 
6010
6072
  - func: threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!)
6011
6073
  device_check: NoCheck # TensorIterator
@@ -6027,7 +6089,7 @@
6027
6089
  CPU, CUDA: threshold_backward_out
6028
6090
  MPS: threshold_backward_out_mps
6029
6091
  SparseCPU, SparseCUDA: threshold_backward_sparse_out
6030
- SparseCsrCPU, SparseCsrCUDA: threshold_backward_sparse_compressed_out
6092
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed_out
6031
6093
 
6032
6094
  - func: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor
6033
6095
  variants: function
@@ -6035,7 +6097,7 @@
6035
6097
  dispatch:
6036
6098
  MkldnnCPU: mkldnn_relu_backward
6037
6099
  SparseCPU, SparseCUDA: threshold_backward_sparse
6038
- SparseCsrCPU, SparseCsrCUDA: threshold_backward_sparse_compressed
6100
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed
6039
6101
  NestedTensorCPU, NestedTensorCUDA: threshold_backwards_nested
6040
6102
  tags: pointwise
6041
6103
 
@@ -6185,12 +6247,12 @@
6185
6247
  CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
6186
6248
  autogen: _nested_view_from_buffer_copy.out
6187
6249
 
6188
- - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
6250
+ - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor(a)
6189
6251
  variants: function
6190
6252
  device_check: NoCheck
6191
6253
  dispatch: {}
6192
6254
 
6193
- - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
6255
+ - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor
6194
6256
  variants: function
6195
6257
  device_check: NoCheck
6196
6258
  tags: view_copy
@@ -6227,6 +6289,16 @@
6227
6289
  device_check: NoCheck
6228
6290
  dispatch: {}
6229
6291
 
6292
+ - func: _nested_get_min_seqlen(Tensor self) -> Tensor
6293
+ variants: function
6294
+ device_check: NoCheck
6295
+ dispatch: {}
6296
+
6297
+ - func: _nested_get_max_seqlen(Tensor self) -> Tensor
6298
+ variants: function
6299
+ device_check: NoCheck
6300
+ dispatch: {}
6301
+
6230
6302
  - func: _nested_get_jagged_dummy(Tensor any) -> Tensor
6231
6303
  category_override: dummy
6232
6304
  dispatch: {}
@@ -6251,7 +6323,7 @@
6251
6323
  variants: function, method
6252
6324
  dispatch:
6253
6325
  SparseCPU, SparseCUDA: trunc_sparse
6254
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
6326
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr
6255
6327
  tags: [core, pointwise]
6256
6328
 
6257
6329
  - func: trunc_(Tensor(a!) self) -> Tensor(a!)
@@ -6260,7 +6332,7 @@
6260
6332
  variants: function, method
6261
6333
  dispatch:
6262
6334
  SparseCPU, SparseCUDA: trunc_sparse_
6263
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_
6335
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_
6264
6336
  tags: pointwise
6265
6337
 
6266
6338
  - func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -6271,7 +6343,7 @@
6271
6343
  CPU, CUDA: trunc_out
6272
6344
  MPS: trunc_out_mps
6273
6345
  SparseCPU, SparseCUDA: trunc_sparse_out
6274
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_out
6346
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
6275
6347
  tags: pointwise
6276
6348
  # Alias for trunc
6277
6349
 
@@ -6443,12 +6515,14 @@
6443
6515
  variants: function, method
6444
6516
  dispatch:
6445
6517
  CPU, CUDA, MPS: where
6518
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_where
6446
6519
  tags: [core, pointwise]
6447
6520
 
6448
6521
  - func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
6449
6522
  device_check: NoCheck # TensorIterator
6450
6523
  dispatch:
6451
6524
  CPU, CUDA, MPS: where_self_out
6525
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_where_out
6452
6526
 
6453
6527
  - func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
6454
6528
  variants: function
@@ -6780,7 +6854,7 @@
6780
6854
  dispatch:
6781
6855
  CompositeExplicitAutograd: clone
6782
6856
  SparseCPU, SparseCUDA: clone_sparse
6783
- SparseCsrCPU, SparseCsrCUDA: clone_sparse_compressed
6857
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: clone_sparse_compressed
6784
6858
  MkldnnCPU: mkldnn_clone
6785
6859
  QuantizedCPU, QuantizedCUDA: quantized_clone
6786
6860
  NestedTensorCPU, NestedTensorCUDA: clone_nested
@@ -6804,7 +6878,7 @@
6804
6878
  variants: function, method
6805
6879
  dispatch:
6806
6880
  SparseCPU, SparseCUDA: resize_as_sparse_
6807
- SparseCsrCPU, SparseCsrCUDA: resize_as_sparse_compressed_
6881
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: resize_as_sparse_compressed_
6808
6882
  autogen: resize_as_sparse, resize_as_sparse.out
6809
6883
 
6810
6884
  - func: zero_(Tensor(a!) self) -> Tensor(a!)
@@ -6951,6 +7025,7 @@
6951
7025
  CPU: addmm_out_cpu
6952
7026
  CUDA: addmm_out_cuda
6953
7027
  MPS: addmm_out_mps
7028
+ XPU: addmm_out_xpu
6954
7029
  SparseCPU: addmm_out_sparse_dense_cpu
6955
7030
  SparseCUDA: addmm_out_sparse_dense_cuda
6956
7031
  SparseCsrCPU: addmm_out_sparse_compressed_cpu
@@ -6962,7 +7037,7 @@
6962
7037
  dispatch:
6963
7038
  SparseCPU: addmm_sparse_dense_cpu
6964
7039
  SparseCUDA: addmm_sparse_dense_cuda
6965
- SparseCsrCPU, SparseCsrCUDA: addmm_sparse_compressed_dense
7040
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: addmm_sparse_compressed_dense
6966
7041
  tags: core
6967
7042
 
6968
7043
  - func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
@@ -6979,17 +7054,18 @@
6979
7054
  dispatch:
6980
7055
  CPU: addmm_activation_out_cpu
6981
7056
  CUDA: addmm_activation_out_cuda
7057
+ XPU: addmm_activation_out_xpu
6982
7058
 
6983
7059
  - func: _addmm_activation(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False) -> Tensor
6984
7060
  structured_delegate: _addmm_activation.out
6985
7061
  variants: function, method
6986
7062
 
6987
- - func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)
7063
+ - func: _scaled_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
6988
7064
  variants: function
6989
7065
  dispatch:
6990
7066
  CUDA: _scaled_mm_cuda
6991
7067
 
6992
- - func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
7068
+ - func: _scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
6993
7069
  variants: function
6994
7070
  dispatch:
6995
7071
  CUDA: _scaled_mm_out_cuda
@@ -7184,7 +7260,7 @@
7184
7260
  variants: method
7185
7261
  dispatch:
7186
7262
  SparseCPU, SparseCUDA: sparse_mask
7187
- SparseCsrCPU, SparseCsrCUDA: sparse_mask_sparse_compressed
7263
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_mask_sparse_compressed
7188
7264
  autogen: sparse_mask.out
7189
7265
 
7190
7266
  - func: _sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor
@@ -7204,7 +7280,7 @@
7204
7280
  variants: method
7205
7281
  dispatch:
7206
7282
  SparseCPU, SparseCUDA: sparse_to_dense
7207
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_dense
7283
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_dense
7208
7284
  MkldnnCPU: mkldnn_to_dense
7209
7285
  autogen: _to_dense.out
7210
7286
 
@@ -7385,7 +7461,7 @@
7385
7461
  dispatch:
7386
7462
  CPU, CUDA: dense_to_sparse
7387
7463
  SparseCPU, SparseCUDA: sparse_coo_to_sparse
7388
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
7464
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7389
7465
  autogen: _to_sparse.sparse_dim_out
7390
7466
 
7391
7467
  - func: to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
@@ -7397,7 +7473,7 @@
7397
7473
  dispatch:
7398
7474
  CPU, CUDA: dense_to_sparse
7399
7475
  SparseCPU, SparseCUDA: sparse_coo_to_sparse
7400
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
7476
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7401
7477
  autogen: _to_sparse.out
7402
7478
 
7403
7479
  - func: to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
@@ -7409,7 +7485,7 @@
7409
7485
  dispatch:
7410
7486
  CPU, CUDA: dense_to_sparse_csr
7411
7487
  SparseCPU, SparseCUDA: coo_to_sparse_csr
7412
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csr
7488
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_csr
7413
7489
  autogen: _to_sparse_csr.out
7414
7490
 
7415
7491
  - func: to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
@@ -7421,7 +7497,7 @@
7421
7497
  dispatch:
7422
7498
  CPU, CUDA: dense_to_sparse_csc
7423
7499
  SparseCPU, SparseCUDA: coo_to_sparse_csc
7424
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csc
7500
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_csc
7425
7501
  autogen: _to_sparse_csc.out
7426
7502
 
7427
7503
  - func: to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
@@ -7433,7 +7509,7 @@
7433
7509
  dispatch:
7434
7510
  CPU, CUDA: dense_to_sparse_bsr
7435
7511
  SparseCPU, SparseCUDA: coo_to_sparse_bsr
7436
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsr
7512
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_bsr
7437
7513
  autogen: _to_sparse_bsr.out
7438
7514
 
7439
7515
  - func: to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
@@ -7445,7 +7521,7 @@
7445
7521
  dispatch:
7446
7522
  CPU, CUDA: dense_to_sparse_bsc
7447
7523
  SparseCPU, SparseCUDA: coo_to_sparse_bsc
7448
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsc
7524
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_bsc
7449
7525
  autogen: _to_sparse_bsc.out
7450
7526
 
7451
7527
  - func: _to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor)
@@ -7695,6 +7771,7 @@
7695
7771
 
7696
7772
  - func: cartesian_prod(Tensor[] tensors) -> Tensor
7697
7773
  variants: function
7774
+ tags: maybe_aliasing_or_mutating
7698
7775
 
7699
7776
  - func: combinations(Tensor self, int r=2, bool with_replacement=False) -> Tensor
7700
7777
  variants: function
@@ -7976,6 +8053,7 @@
7976
8053
  variants: function, method
7977
8054
  dispatch:
7978
8055
  CompositeExplicitAutograd: masked_scatter
8056
+ tags: core
7979
8057
 
7980
8058
  - func: masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor
7981
8059
  dispatch:
@@ -8210,7 +8288,7 @@
8210
8288
  structured: True
8211
8289
  variants: function
8212
8290
  dispatch:
8213
- CPU, CUDA: scatter_reduce_two
8291
+ CPU, CUDA, MPS: scatter_reduce_two
8214
8292
 
8215
8293
  - func: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8216
8294
  structured_delegate: eq.Scalar_out
@@ -8431,21 +8509,21 @@
8431
8509
  device_check: NoCheck # TensorIterator
8432
8510
  variants: method, function
8433
8511
  dispatch:
8434
- CPU, CUDA: __lshift__
8512
+ CPU, CUDA, MPS: __lshift__
8435
8513
  tags: pointwise
8436
8514
 
8437
8515
  - func: __lshift__.Tensor(Tensor self, Tensor other) -> Tensor
8438
8516
  device_check: NoCheck # TensorIterator
8439
8517
  variants: method, function
8440
8518
  dispatch:
8441
- CPU, CUDA: __lshift__
8519
+ CPU, CUDA, MPS: __lshift__
8442
8520
  tags: pointwise
8443
8521
 
8444
8522
  - func: __ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8445
8523
  device_check: NoCheck # TensorIterator
8446
8524
  variants: method
8447
8525
  dispatch:
8448
- CPU, CUDA: __ilshift__
8526
+ CPU, CUDA, MPS: __ilshift__
8449
8527
  autogen: __lshift__.Scalar_out
8450
8528
  tags: pointwise
8451
8529
 
@@ -8453,7 +8531,7 @@
8453
8531
  device_check: NoCheck # TensorIterator
8454
8532
  variants: method
8455
8533
  dispatch:
8456
- CPU, CUDA: __ilshift__
8534
+ CPU, CUDA, MPS: __ilshift__
8457
8535
  autogen: __lshift__.Tensor_out
8458
8536
  tags: pointwise
8459
8537
 
@@ -8474,7 +8552,7 @@
8474
8552
  structured: True
8475
8553
  structured_inherits: TensorIteratorBase
8476
8554
  dispatch:
8477
- CPU, CUDA: bitwise_left_shift_out
8555
+ CPU, CUDA, MPS: bitwise_left_shift_out
8478
8556
  tags: pointwise
8479
8557
 
8480
8558
  - func: bitwise_left_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor
@@ -8510,28 +8588,28 @@
8510
8588
  device_check: NoCheck # TensorIterator
8511
8589
  variants: method, function
8512
8590
  dispatch:
8513
- CPU, CUDA: __rshift__
8591
+ CPU, CUDA, MPS: __rshift__
8514
8592
  tags: pointwise
8515
8593
 
8516
8594
  - func: __rshift__.Tensor(Tensor self, Tensor other) -> Tensor
8517
8595
  device_check: NoCheck # TensorIterator
8518
8596
  variants: method, function
8519
8597
  dispatch:
8520
- CPU, CUDA: __rshift__
8598
+ CPU, CUDA, MPS: __rshift__
8521
8599
  tags: pointwise
8522
8600
 
8523
8601
  - func: __irshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8524
8602
  device_check: NoCheck # TensorIterator
8525
8603
  variants: method
8526
8604
  dispatch:
8527
- CPU, CUDA: __irshift__
8605
+ CPU, CUDA, MPS: __irshift__
8528
8606
  autogen: __rshift__.Scalar_out
8529
8607
 
8530
8608
  - func: __irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
8531
8609
  device_check: NoCheck # TensorIterator
8532
8610
  variants: method
8533
8611
  dispatch:
8534
- CPU, CUDA: __irshift__
8612
+ CPU, CUDA, MPS: __irshift__
8535
8613
  autogen: __rshift__.Tensor_out
8536
8614
 
8537
8615
  - func: bitwise_right_shift.Tensor(Tensor self, Tensor other) -> Tensor
@@ -8551,7 +8629,7 @@
8551
8629
  structured: True
8552
8630
  structured_inherits: TensorIteratorBase
8553
8631
  dispatch:
8554
- CPU, CUDA: bitwise_right_shift_out
8632
+ CPU, CUDA, MPS: bitwise_right_shift_out
8555
8633
  tags: pointwise
8556
8634
 
8557
8635
  - func: bitwise_right_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor
@@ -8612,18 +8690,18 @@
8612
8690
  - func: addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
8613
8691
  variants: method
8614
8692
  dispatch:
8615
- CPU, CUDA: addbmm_
8693
+ CPU, CUDA, XPU: addbmm_
8616
8694
  MPS: addbmm_mps_
8617
8695
 
8618
8696
  - func: addbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
8619
8697
  dispatch:
8620
- CPU, CUDA: addbmm_out
8698
+ CPU, CUDA, XPU: addbmm_out
8621
8699
  MPS: addbmm_out_mps
8622
8700
 
8623
8701
  - func: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
8624
8702
  variants: method, function
8625
8703
  dispatch:
8626
- CPU, CUDA: addbmm
8704
+ CPU, CUDA, XPU: addbmm
8627
8705
  MPS: addbmm_mps
8628
8706
 
8629
8707
  - func: random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)
@@ -8737,12 +8815,14 @@
8737
8815
  dispatch:
8738
8816
  CPU: tril_indices_cpu
8739
8817
  CUDA: tril_indices_cuda
8818
+ MPS: tril_indices_mps
8740
8819
  autogen: tril_indices.out
8741
8820
 
8742
8821
  - func: triu_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
8743
8822
  dispatch:
8744
8823
  CPU: triu_indices_cpu
8745
8824
  CUDA: triu_indices_cuda
8825
+ MPS: triu_indices_mps
8746
8826
  autogen: triu_indices.out
8747
8827
 
8748
8828
  - func: trace(Tensor self) -> Tensor
@@ -8858,6 +8938,7 @@
8858
8938
  variants: method, function
8859
8939
  dispatch:
8860
8940
  QuantizedCPU: eq_quantized_cpu
8941
+ NestedTensorCPU, NestedTensorCUDA: eq_tensor_nested
8861
8942
  tags: [core, pointwise]
8862
8943
 
8863
8944
  - func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
@@ -9196,11 +9277,13 @@
9196
9277
  - func: nonzero_static.out(Tensor self, *, int size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)
9197
9278
  dispatch:
9198
9279
  CPU: nonzero_static_out_cpu
9280
+ CUDA: nonzero_static_out_cuda
9199
9281
 
9200
9282
  - func: nonzero_static(Tensor self, *, int size, int fill_value=-1) -> Tensor
9201
9283
  variants: method, function
9202
9284
  dispatch:
9203
9285
  CPU: nonzero_static_cpu
9286
+ CUDA: nonzero_static_cuda
9204
9287
 
9205
9288
  - func: nonzero_numpy(Tensor self) -> Tensor[]
9206
9289
  variants: method, function
@@ -9502,7 +9585,7 @@
9502
9585
  variants: method, function
9503
9586
  dispatch:
9504
9587
  SparseCPU, SparseCUDA: erfinv_sparse
9505
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr
9588
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr
9506
9589
  tags: pointwise
9507
9590
 
9508
9591
  - func: erfinv_(Tensor(a!) self) -> Tensor(a!)
@@ -9511,7 +9594,7 @@
9511
9594
  variants: method
9512
9595
  dispatch:
9513
9596
  SparseCPU, SparseCUDA: erfinv_sparse_
9514
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_
9597
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_
9515
9598
  tags: pointwise
9516
9599
 
9517
9600
  - func: erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9522,7 +9605,7 @@
9522
9605
  CPU, CUDA: erfinv_out
9523
9606
  MPS: erfinv_out_mps
9524
9607
  SparseCPU, SparseCUDA: erfinv_sparse_out
9525
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_out
9608
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_out
9526
9609
  tags: pointwise
9527
9610
 
9528
9611
  - func: i0(Tensor self) -> Tensor
@@ -9539,7 +9622,7 @@
9539
9622
  structured: True
9540
9623
  structured_inherits: TensorIteratorBase
9541
9624
  dispatch:
9542
- CPU, CUDA: i0_out
9625
+ CPU, CUDA, MPS: i0_out
9543
9626
  tags: pointwise
9544
9627
 
9545
9628
  - func: sign(Tensor self) -> Tensor
@@ -9548,7 +9631,7 @@
9548
9631
  variants: function, method
9549
9632
  dispatch:
9550
9633
  SparseCPU, SparseCUDA: sign_sparse
9551
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr
9634
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr
9552
9635
  tags: [core, pointwise]
9553
9636
 
9554
9637
  - func: sign_(Tensor(a!) self) -> Tensor(a!)
@@ -9557,7 +9640,7 @@
9557
9640
  variants: method
9558
9641
  dispatch:
9559
9642
  SparseCPU, SparseCUDA: sign_sparse_
9560
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_
9643
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_
9561
9644
  tags: pointwise
9562
9645
 
9563
9646
  - func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9568,7 +9651,7 @@
9568
9651
  CPU, CUDA: sign_out
9569
9652
  MPS: sign_out_mps
9570
9653
  SparseCPU, SparseCUDA: sign_sparse_out
9571
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_out
9654
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_out
9572
9655
  tags: pointwise
9573
9656
 
9574
9657
  - func: signbit(Tensor self) -> Tensor
@@ -9576,7 +9659,7 @@
9576
9659
  structured_delegate: signbit.out
9577
9660
  dispatch:
9578
9661
  SparseCPU, SparseCUDA: signbit_sparse
9579
- SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr
9662
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr
9580
9663
  tags: pointwise
9581
9664
 
9582
9665
  - func: signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9587,7 +9670,7 @@
9587
9670
  CUDA: signbit_out
9588
9671
  MPS: signbit_out_mps
9589
9672
  SparseCPU, SparseCUDA: signbit_sparse_out
9590
- SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr_out
9673
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr_out
9591
9674
  tags: pointwise
9592
9675
 
9593
9676
  - func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor
@@ -10038,9 +10121,10 @@
10038
10121
  - func: argsort.stable(Tensor self, *, bool stable, int dim=-1, bool descending=False) -> Tensor
10039
10122
  device_check: NoCheck # TensorIterator
10040
10123
  variants: method, function
10041
- dispatch:
10042
- CPU, CUDA, MPS: argsort_stable
10043
- autogen: argsort.stable_out
10124
+
10125
+ - func: argsort.stable_out(Tensor self, *, bool stable, int dim=-1, bool descending=False, Tensor(a!) out) -> Tensor(a!)
10126
+ device_check: NoCheck # TensorIterator
10127
+ variants: function
10044
10128
 
10045
10129
  - func: argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor
10046
10130
  variants: method, function
@@ -10114,7 +10198,7 @@
10114
10198
  - func: unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor
10115
10199
  variants: function
10116
10200
  dispatch:
10117
- CPU, CUDA: unfold_backward
10201
+ CPU, CUDA, MPS: unfold_backward
10118
10202
  autogen: unfold_backward.out
10119
10203
 
10120
10204
  - func: equal(Tensor self, Tensor other) -> bool
@@ -10220,7 +10304,7 @@
10220
10304
  CPU, CUDA: normal_
10221
10305
  MPS: normal_mps_
10222
10306
  Meta: normal_meta_
10223
- SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
10307
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: normal_sparse_csr_
10224
10308
  NestedTensorCPU, NestedTensorCUDA: normal_nested_
10225
10309
  autogen: normal.out
10226
10310
 
@@ -11044,6 +11128,22 @@
11044
11128
  CUDA: foreach_tensor_lerp_list_cuda_
11045
11129
  autogen: _foreach_lerp.Scalar_out
11046
11130
 
11131
+ - func: _foreach_lerp.ScalarList(Tensor[] self, Tensor[] tensors1, Scalar[] weight) -> Tensor[]
11132
+ device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
11133
+ variants: function
11134
+ dispatch:
11135
+ CompositeExplicitAutograd: foreach_tensor_lerp_scalarlist_kernel_slow
11136
+ CUDA: foreach_tensor_lerp_scalarlist_cuda
11137
+ autogen: _foreach_lerp.ScalarList_out
11138
+
11139
+ - func: _foreach_lerp_.ScalarList(Tensor(a!)[] self, Tensor[] tensors1, Scalar[] weight) -> ()
11140
+ device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
11141
+ variants: function
11142
+ dispatch:
11143
+ CompositeExplicitAutograd: foreach_tensor_lerp_scalarlist_kernel_slow_
11144
+ CUDA: foreach_tensor_lerp_scalarlist_cuda_
11145
+ autogen: _foreach_lerp.ScalarList_out
11146
+
11047
11147
  - func: _foreach_lgamma(Tensor[] self) -> Tensor[]
11048
11148
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
11049
11149
  variants: function
@@ -11232,6 +11332,21 @@
11232
11332
  CUDA: foreach_tensor_round_cuda_
11233
11333
  autogen: _foreach_round.out
11234
11334
 
11335
+ - func: _foreach_rsqrt(Tensor[] self) -> Tensor[]
11336
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
11337
+ variants: function
11338
+ dispatch:
11339
+ CompositeExplicitAutograd: foreach_tensor_rsqrt_slow
11340
+ CUDA: foreach_tensor_rsqrt_cuda
11341
+
11342
+ - func: _foreach_rsqrt_(Tensor(a!)[] self) -> ()
11343
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
11344
+ variants: function
11345
+ dispatch:
11346
+ CompositeExplicitAutograd: foreach_tensor_rsqrt_slow_
11347
+ CUDA: foreach_tensor_rsqrt_cuda_
11348
+ autogen: _foreach_rsqrt.out
11349
+
11235
11350
  - func: _foreach_sigmoid(Tensor[] self) -> Tensor[]
11236
11351
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
11237
11352
  variants: function
@@ -11675,6 +11790,7 @@
11675
11790
  structured_delegate: elu.out
11676
11791
  device_check: NoCheck # TensorIterator
11677
11792
  python_module: nn
11793
+ tags: pointwise
11678
11794
 
11679
11795
  - func: elu_backward.grad_input(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result, *, Tensor(a!) grad_input) -> Tensor(a!)
11680
11796
  structured: True
@@ -11748,6 +11864,7 @@
11748
11864
  python_module: nn
11749
11865
  dispatch:
11750
11866
  QuantizedCPU: hardsigmoid_quantized_cpu
11867
+ tags: pointwise
11751
11868
 
11752
11869
  - func: hardsigmoid_(Tensor(a!) self) -> Tensor(a!)
11753
11870
  structured_delegate: hardsigmoid.out
@@ -11779,7 +11896,7 @@
11779
11896
  dispatch:
11780
11897
  CPU, CUDA, MPS: hardtanh
11781
11898
  QuantizedCPU: hardtanh_quantized_cpu
11782
- tags: core
11899
+ tags: [pointwise, core]
11783
11900
 
11784
11901
  - func: hardtanh_backward.grad_input(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val, *, Tensor(a!) grad_input) -> Tensor(a!)
11785
11902
  python_module: nn
@@ -11903,19 +12020,20 @@
11903
12020
  CUDA: log_sigmoid_backward_cuda
11904
12021
  MPS: log_sigmoid_backward_mps
11905
12022
 
11906
- - func: rrelu_with_noise.out(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)
12023
+ - func: rrelu_with_noise.out(Tensor self, Tensor(b!) noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)
11907
12024
  python_module: nn
11908
12025
  tags: nondeterministic_seeded
11909
12026
  dispatch:
11910
12027
  CPU: rrelu_with_noise_out_cpu
11911
12028
  CUDA: rrelu_with_noise_out_cuda
11912
12029
 
11913
- - func: rrelu_with_noise(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
12030
+ - func: rrelu_with_noise(Tensor self, Tensor(b!) noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
11914
12031
  python_module: nn
11915
12032
  dispatch:
11916
12033
  CPU: rrelu_with_noise_cpu
11917
12034
  CUDA: rrelu_with_noise_cuda
11918
12035
  tags: nondeterministic_seeded
12036
+ autogen: rrelu_with_noise_functional
11919
12037
 
11920
12038
  - func: rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result) -> Tensor
11921
12039
  python_module: nn
@@ -11923,7 +12041,7 @@
11923
12041
  CompositeExplicitAutograd: rrelu_with_noise_backward
11924
12042
  autogen: rrelu_with_noise_backward.out
11925
12043
 
11926
- - func: rrelu_with_noise_(Tensor(a!) self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
12044
+ - func: rrelu_with_noise_(Tensor(a!) self, Tensor(b!) noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
11927
12045
  python_module: nn
11928
12046
  tags: nondeterministic_seeded
11929
12047
  dispatch:
@@ -11943,6 +12061,7 @@
11943
12061
  structured_delegate: softplus.out
11944
12062
  device_check: NoCheck # TensorIterator
11945
12063
  python_module: nn
12064
+ tags: pointwise
11946
12065
 
11947
12066
  - func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)
11948
12067
  structured: True
@@ -11969,6 +12088,7 @@
11969
12088
  structured_delegate: softshrink.out
11970
12089
  device_check: NoCheck # TensorIterator
11971
12090
  python_module: nn
12091
+ tags: pointwise
11972
12092
 
11973
12093
  - func: softshrink_backward.grad_input(Tensor grad_output, Tensor self, Scalar lambd, *, Tensor(a!) grad_input) -> Tensor(a!)
11974
12094
  structured: True
@@ -12613,6 +12733,7 @@
12613
12733
  dispatch:
12614
12734
  CPU: upsample_bicubic2d_out_cpu
12615
12735
  CUDA: upsample_bicubic2d_out_cuda
12736
+ MPS: upsample_bicubic2d_out_mps
12616
12737
 
12617
12738
  - func: upsample_bicubic2d(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
12618
12739
  python_module: nn
@@ -12624,6 +12745,7 @@
12624
12745
  dispatch:
12625
12746
  CPU: upsample_bicubic2d_backward_out_cpu
12626
12747
  CUDA: upsample_bicubic2d_backward_out_cuda
12748
+ MPS: upsample_bicubic2d_backward_out_mps
12627
12749
 
12628
12750
  - func: upsample_bicubic2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
12629
12751
  python_module: nn
@@ -13004,17 +13126,20 @@
13004
13126
  dispatch:
13005
13127
  CPU: im2col_out_cpu
13006
13128
  CUDA: im2col_out_cuda
13129
+ MPS: im2col_out_mps
13007
13130
 
13008
13131
  - func: im2col(Tensor self, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
13009
13132
  python_module: nn
13010
13133
  dispatch:
13011
13134
  CPU: im2col_cpu
13012
13135
  CUDA: im2col_cuda
13136
+ MPS: im2col_mps
13013
13137
 
13014
13138
  - func: isfinite(Tensor self) -> Tensor
13015
13139
  variants: function, method
13016
13140
  device_check: NoCheck
13017
13141
  device_guard: False
13142
+ tags: pointwise
13018
13143
 
13019
13144
  - func: isinf(Tensor self) -> Tensor
13020
13145
  variants: function, method
@@ -13022,9 +13147,10 @@
13022
13147
  device_guard: False
13023
13148
  dispatch:
13024
13149
  CompositeExplicitAutograd: isinf
13150
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_isinf
13025
13151
  SparseCPU, SparseCUDA: isinf_sparse
13026
13152
  SparseMeta: isinf_sparse_meta
13027
- SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
13153
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isinf_sparse_csr
13028
13154
  autogen: isinf.out
13029
13155
  tags: [core, pointwise]
13030
13156
 
@@ -13037,34 +13163,36 @@
13037
13163
  variants: function, method
13038
13164
  structured_delegate: isposinf.out
13039
13165
  dispatch:
13166
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_isposinf
13040
13167
  SparseCPU, SparseCUDA: isposinf_sparse
13041
- SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr
13168
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr
13042
13169
  tags: pointwise
13043
13170
 
13044
13171
  - func: isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
13045
13172
  structured: True
13046
13173
  structured_inherits: TensorIteratorBase
13047
13174
  dispatch:
13048
- CPU, CUDA: isposinf_out
13175
+ CPU, CUDA, MPS: isposinf_out
13049
13176
  SparseCPU, SparseCUDA: isposinf_sparse_out
13050
- SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr_out
13177
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr_out
13051
13178
  tags: pointwise
13052
13179
 
13053
13180
  - func: isneginf(Tensor self) -> Tensor
13054
13181
  variants: function, method
13055
13182
  structured_delegate: isneginf.out
13056
13183
  dispatch:
13184
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_isneginf
13057
13185
  SparseCPU, SparseCUDA: isneginf_sparse
13058
- SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr
13186
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr
13059
13187
  tags: pointwise
13060
13188
 
13061
13189
  - func: isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
13062
13190
  structured: True
13063
13191
  structured_inherits: TensorIteratorBase
13064
13192
  dispatch:
13065
- CPU, CUDA: isneginf_out
13193
+ CPU, CUDA, MPS: isneginf_out
13066
13194
  SparseCPU, SparseCUDA: isneginf_sparse_out
13067
- SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr_out
13195
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr_out
13068
13196
  tags: pointwise
13069
13197
 
13070
13198
  # NOTE [_add_batch_dim and _remove_batch_dim]
@@ -13075,7 +13203,7 @@
13075
13203
  variants: function
13076
13204
 
13077
13205
  # See NOTE [_add_batch_dim and _remove_batch_dim]
13078
- - func: _remove_batch_dim(Tensor self, int level, int batch_size, int out_dim) -> Tensor
13206
+ - func: _remove_batch_dim(Tensor self, int level, SymInt batch_size, int out_dim) -> Tensor
13079
13207
  variants: function
13080
13208
 
13081
13209
  ## Functions related to the `torch.special` namespace
@@ -13375,7 +13503,7 @@
13375
13503
  structured: True
13376
13504
  structured_inherits: TensorIteratorBase
13377
13505
  dispatch:
13378
- CPU, CUDA: special_i1_out
13506
+ CPU, CUDA, MPS: special_i1_out
13379
13507
  tags: pointwise
13380
13508
 
13381
13509
  - func: special_i1e(Tensor self) -> Tensor
@@ -13787,10 +13915,16 @@
13787
13915
  - func: linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots)
13788
13916
  python_module: linalg
13789
13917
  variants: function
13918
+ dispatch:
13919
+ CompositeImplicitAutograd: linalg_lu_factor
13920
+ MPS: linalg_lu_factor_mps
13790
13921
 
13791
13922
  - func: linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots)
13792
13923
  python_module: linalg
13793
13924
  variants: function
13925
+ dispatch:
13926
+ CompositeImplicitAutograd: linalg_lu_factor_out
13927
+ MPS: linalg_lu_factor_out_mps
13794
13928
 
13795
13929
  - func: linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)
13796
13930
  python_module: linalg
@@ -14176,6 +14310,11 @@
14176
14310
  - func: linalg_solve(Tensor A, Tensor B, *, bool left=True) -> Tensor
14177
14311
  python_module: linalg
14178
14312
 
14313
+ - func: _spsolve(Tensor A, Tensor B, *, bool left=True) -> Tensor
14314
+ python_module: sparse
14315
+ dispatch:
14316
+ SparseCsrCUDA: _sparse_csr_linear_solve
14317
+
14179
14318
  - func: linalg_solve.out(Tensor A, Tensor B, *, bool left=True, Tensor(a!) out) -> Tensor(a!)
14180
14319
  python_module: linalg
14181
14320
 
@@ -14352,7 +14491,7 @@
14352
14491
  CPU, CUDA: _segment_reduce_backward_kernel
14353
14492
  autogen: _segment_reduce_backward.out
14354
14493
 
14355
- - func: pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0) -> Tensor
14494
+ - func: pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0, str padding_side="right") -> Tensor
14356
14495
  python_module: nn
14357
14496
  variants: function
14358
14497
 
@@ -14458,7 +14597,7 @@
14458
14597
  variants: function
14459
14598
  dispatch:
14460
14599
  CompositeExplicitAutogradNonFunctional: select_copy_symint
14461
- SparseCsrCPU, SparseCsrCUDA: select_copy_sparse_csr
14600
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_copy_sparse_csr
14462
14601
  tags: view_copy
14463
14602
  autogen: select_copy.int_out
14464
14603
 
@@ -14648,11 +14787,18 @@
14648
14787
  variants: function
14649
14788
  dispatch:
14650
14789
  CUDA: _fbgemm_jagged_to_padded_dense_forward
14790
+ CPU: _jagged_to_padded_dense_forward_cpu
14651
14791
 
14652
14792
  - func: _padded_dense_to_jagged_forward(Tensor dense, Tensor[] offsets, SymInt? total_L=None) -> Tensor
14653
14793
  variants: function
14654
14794
  dispatch:
14655
14795
  CUDA: _fbgemm_dense_to_jagged_forward_symint
14796
+ CPU: _padded_dense_to_jagged_forward_cpu
14797
+
14798
+ - func: _nested_from_padded_tensor(Tensor padded, Tensor offsets, Tensor dummy, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None, SymInt? sum_S=None) -> Tensor
14799
+ variants: function
14800
+ device_check: NoCheck
14801
+ dispatch: {}
14656
14802
 
14657
14803
  - func: _nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor
14658
14804
  dispatch:
@@ -14660,6 +14806,11 @@
14660
14806
  NestedTensorCUDA: NestedTensor_softmax_dropout_cuda
14661
14807
  tags: nondeterministic_seeded
14662
14808
 
14809
+ - func: _safe_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
14810
+ dispatch:
14811
+ CompositeExplicitAutograd: _safe_softmax
14812
+ NestedTensorCPU, NestedTensorCUDA: _safe_softmax
14813
+
14663
14814
  # Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
14664
14815
  - func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
14665
14816
  variants: function
@@ -14674,24 +14825,29 @@
14674
14825
  CUDA, NestedTensorCUDA: native_multi_head_attention_cuda
14675
14826
  autogen: _native_multi_head_attention.out
14676
14827
 
14677
- - func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> Tensor
14828
+ - func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> Tensor
14678
14829
  python_module: nn
14679
14830
  variants: function
14680
14831
  autogen: scaled_dot_product_attention.out
14681
14832
  tags: nondeterministic_seeded
14682
14833
 
14683
14834
  # This aten function is kept so that we can test the choice function from Python
14684
- - func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> int
14835
+ - func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> int
14685
14836
  dispatch:
14686
14837
  Meta: _fused_sdp_choice_meta
14687
14838
  CPU, NestedTensorCPU: _fused_sdp_choice_cpp
14688
14839
  CUDA, NestedTensorCUDA: _fused_sdp_choice_cuda
14689
14840
  tags: nondeterministic_seeded
14690
14841
 
14691
- - func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
14842
+ - func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None, bool enable_gqa=False) -> (Tensor, Tensor)
14692
14843
  variants: function
14693
14844
  tags: nondeterministic_seeded
14694
14845
 
14846
+ - func: _scaled_dot_product_attention_math_for_mps(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
14847
+ dispatch:
14848
+ MPS: _scaled_dot_product_attention_math_mps
14849
+ tags: nondeterministic_seeded
14850
+
14695
14851
  - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14696
14852
  dispatch:
14697
14853
  CUDA: _scaled_dot_product_flash_attention_cuda
@@ -14703,6 +14859,11 @@
14703
14859
  CPU: _scaled_dot_product_flash_attention_cpu
14704
14860
  tags: nondeterministic_seeded
14705
14861
 
14862
+ - func: _scaled_dot_product_fused_attention_overrideable(Tensor query, Tensor key, Tensor value, Tensor? attn_bias=None, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14863
+ dispatch:
14864
+ CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable
14865
+ tags: nondeterministic_seeded
14866
+
14706
14867
  - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14707
14868
  device_check: NoCheck
14708
14869
  variants: function
@@ -14716,6 +14877,12 @@
14716
14877
  dispatch:
14717
14878
  CPU: _scaled_dot_product_flash_attention_cpu_backward
14718
14879
 
14880
+ - func: _scaled_dot_product_fused_attention_overrideable_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor attn_bias, bool[4] grad_input_mask, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value, Tensor grad_attn_bias)
14881
+ device_check: NoCheck
14882
+ variants: function
14883
+ dispatch:
14884
+ CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable_backward
14885
+
14719
14886
  - func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
14720
14887
  dispatch:
14721
14888
  CUDA: _scaled_dot_product_efficient_attention_cuda
@@ -14728,12 +14895,12 @@
14728
14895
  CUDA: _scaled_dot_product_efficient_attention_backward_cuda
14729
14896
  tags: nondeterministic_seeded
14730
14897
 
14731
- - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14898
+ - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14732
14899
  dispatch:
14733
14900
  CUDA: _scaled_dot_product_cudnn_attention_cuda
14734
14901
  tags: nondeterministic_seeded
14735
14902
 
14736
- - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14903
+ - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14737
14904
  dispatch:
14738
14905
  CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
14739
14906
  tags: nondeterministic_seeded
@@ -15563,6 +15730,7 @@
15563
15730
  dispatch:
15564
15731
  CPU: _fused_adam_kernel_cpu_
15565
15732
  CUDA: _fused_adam_kernel_cuda_
15733
+ MPS: _fused_adam_kernel_mps_
15566
15734
  autogen: _fused_adam, _fused_adam.out
15567
15735
 
15568
15736
  - func: _fused_adam_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15573,6 +15741,7 @@
15573
15741
  dispatch:
15574
15742
  CPU: _fused_adam_kernel_cpu_
15575
15743
  CUDA: _fused_adam_kernel_cuda_
15744
+ MPS: _fused_adam_kernel_mps_
15576
15745
  autogen: _fused_adam.tensor_lr, _fused_adam.tensor_lr_out
15577
15746
 
15578
15747
  - func: _fused_adamw_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15581,6 +15750,7 @@
15581
15750
  dispatch:
15582
15751
  CPU: _fused_adamw_kernel_cpu_
15583
15752
  CUDA: _fused_adamw_kernel_cuda_
15753
+ MPS: _fused_adamw_kernel_mps_
15584
15754
  autogen: _fused_adamw, _fused_adamw.out
15585
15755
 
15586
15756
  - func: _fused_adamw_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15591,6 +15761,7 @@
15591
15761
  dispatch:
15592
15762
  CPU: _fused_adamw_kernel_cpu_
15593
15763
  CUDA: _fused_adamw_kernel_cuda_
15764
+ MPS: _fused_adamw_kernel_mps_
15594
15765
  autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
15595
15766
 
15596
15767
  - func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15599,6 +15770,7 @@
15599
15770
  dispatch:
15600
15771
  CPU: _fused_sgd_kernel_cpu_
15601
15772
  CUDA: _fused_sgd_kernel_cuda_
15773
+ MPS: _fused_sgd_kernel_mps_
15602
15774
  autogen: _fused_sgd, _fused_sgd.out
15603
15775
 
15604
15776
  - func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15609,6 +15781,7 @@
15609
15781
  dispatch:
15610
15782
  CPU: _fused_sgd_kernel_cpu_
15611
15783
  CUDA: _fused_sgd_kernel_cuda_
15784
+ MPS: _fused_sgd_kernel_mps_
15612
15785
  autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
15613
15786
 
15614
15787
  - func: _fused_adagrad_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor(d!)[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()