torch-rb 0.17.1 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -187,7 +187,10 @@
187
187
  dispatch:
188
188
  CPU: _functional_assert_async_msg_cpu
189
189
 
190
- - func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
190
+ - func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None, *, Device? device=None, Layout? layout=None) -> ()
191
+ dispatch:
192
+ CompositeExplicitAutograd: _assert_tensor_metadata
193
+ Meta: _assert_tensor_metadata_meta_symint
191
194
 
192
195
  - func: _print(str s) -> ()
193
196
  dispatch:
@@ -309,25 +312,25 @@
309
312
  - func: _shape_as_tensor(Tensor self) -> Tensor
310
313
 
311
314
  - func: dropout(Tensor input, float p, bool train) -> Tensor
312
- tags: nondeterministic_seeded
315
+ tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
313
316
 
314
317
  - func: dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
315
318
  tags: nondeterministic_seeded
316
319
 
317
320
  - func: feature_dropout(Tensor input, float p, bool train) -> Tensor
318
- tags: nondeterministic_seeded
321
+ tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
319
322
 
320
323
  - func: feature_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
321
324
  tags: nondeterministic_seeded
322
325
 
323
326
  - func: alpha_dropout(Tensor input, float p, bool train) -> Tensor
324
- tags: nondeterministic_seeded
327
+ tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
325
328
 
326
329
  - func: alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
327
330
  tags: nondeterministic_seeded
328
331
 
329
332
  - func: feature_alpha_dropout(Tensor input, float p, bool train) -> Tensor
330
- tags: nondeterministic_seeded
333
+ tags: [nondeterministic_seeded, maybe_aliasing_or_mutating]
331
334
 
332
335
  - func: feature_alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
333
336
  tags: nondeterministic_seeded
@@ -338,7 +341,7 @@
338
341
  dispatch:
339
342
  CompositeExplicitAutograd: abs
340
343
  SparseCPU, SparseCUDA: abs_sparse
341
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr
344
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr
342
345
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs
343
346
  tags: [core, pointwise]
344
347
 
@@ -348,7 +351,7 @@
348
351
  dispatch:
349
352
  CompositeExplicitAutograd: abs_
350
353
  SparseCPU, SparseCUDA: abs_sparse_
351
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_
354
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_
352
355
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs_
353
356
 
354
357
  - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -357,7 +360,7 @@
357
360
  CPU, CUDA: abs_out
358
361
  MPS: abs_out_mps
359
362
  SparseCPU, SparseCUDA: abs_sparse_out
360
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_out
363
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_out
361
364
  tags: pointwise
362
365
 
363
366
  # Note [Adding an alias]
@@ -400,14 +403,14 @@
400
403
  variants: function, method
401
404
  dispatch:
402
405
  CPU, CUDA: angle
403
- SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr
406
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr
404
407
  tags: pointwise
405
408
 
406
409
  - func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
407
410
  device_check: NoCheck # TensorIterator
408
411
  dispatch:
409
412
  CPU, CUDA: angle_out
410
- SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr_out
413
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr_out
411
414
  tags: pointwise
412
415
 
413
416
  - func: view_as_real(Tensor(a) self) -> Tensor(a)
@@ -425,7 +428,7 @@
425
428
  structured_delegate: sgn.out
426
429
  dispatch:
427
430
  SparseCPU, SparseCUDA: sgn_sparse
428
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr
431
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr
429
432
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn
430
433
  tags: pointwise
431
434
 
@@ -434,7 +437,7 @@
434
437
  structured_delegate: sgn.out
435
438
  dispatch:
436
439
  SparseCPU, SparseCUDA: sgn_sparse_
437
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_
440
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_
438
441
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn_
439
442
  tags: pointwise
440
443
 
@@ -445,7 +448,7 @@
445
448
  CPU, CUDA: sgn_out
446
449
  MPS: sgn_out_mps
447
450
  SparseCPU, SparseCUDA: sgn_sparse_out
448
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
451
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_out
449
452
  tags: pointwise
450
453
 
451
454
  - func: chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
@@ -472,26 +475,26 @@
472
475
  variants: function, method
473
476
  dispatch:
474
477
  CompositeExplicitAutograd: _conj_physical
475
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr
478
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr
476
479
  autogen: _conj_physical.out
477
480
 
478
481
  - func: conj_physical(Tensor self) -> Tensor
479
482
  variants: function, method
480
- tags: pointwise
483
+ tags: [pointwise, maybe_aliasing_or_mutating]
481
484
 
482
485
  - func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
483
486
  dispatch:
484
487
  CPU, CUDA: conj_physical_out
485
488
  MPS: conj_physical_out_mps
486
489
  SparseCPU, SparseCUDA: conj_physical_out_sparse
487
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
490
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_out
488
491
  tags: pointwise
489
492
 
490
493
  - func: conj_physical_(Tensor(a!) self) -> Tensor(a!)
491
494
  variants: function, method
492
495
  dispatch:
493
496
  CompositeExplicitAutograd: conj_physical_
494
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_
497
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_
495
498
  tags: pointwise
496
499
 
497
500
  - func: resolve_conj(Tensor(a) self) -> Tensor(a)
@@ -537,9 +540,11 @@
537
540
 
538
541
  - func: avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor
539
542
  tags: core
543
+ autogen: avg_pool1d.out
540
544
 
541
545
  - func: adaptive_avg_pool1d(Tensor self, int[1] output_size) -> Tensor
542
546
  tags: core
547
+ autogen: adaptive_avg_pool1d.out
543
548
 
544
549
  # Return: (Tensor output, Tensor indices)
545
550
  - func: adaptive_max_pool1d(Tensor self, int[1] output_size) -> (Tensor, Tensor)
@@ -639,6 +644,7 @@
639
644
  CPU: addmv_out_cpu
640
645
  CUDA: addmv_out_cuda
641
646
  MPS: addmv_out_mps
647
+ XPU: addmv_out_xpu
642
648
  SparseCsrCPU: addmv_out_sparse_compressed
643
649
  SparseCsrCUDA: addmv_out_sparse_compressed_cuda
644
650
 
@@ -694,6 +700,9 @@
694
700
  device_check: NoCheck # TensorIterator
695
701
  structured_delegate: all.out
696
702
  variants: function, method
703
+ dispatch:
704
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_all
705
+
697
706
 
698
707
  - func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
699
708
  device_check: NoCheck # TensorIterator
@@ -863,7 +872,7 @@
863
872
  structured_delegate: asinh.out
864
873
  dispatch:
865
874
  SparseCPU, SparseCUDA: asinh_sparse
866
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr
875
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr
867
876
  tags: [core, pointwise]
868
877
 
869
878
  - func: asinh_(Tensor(a!) self) -> Tensor(a!)
@@ -871,7 +880,7 @@
871
880
  structured_delegate: asinh.out
872
881
  dispatch:
873
882
  SparseCPU, SparseCUDA: asinh_sparse_
874
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_
883
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_
875
884
  tags: pointwise
876
885
 
877
886
  - func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -881,7 +890,7 @@
881
890
  CPU, CUDA: asinh_out
882
891
  MPS: asinh_out_mps
883
892
  SparseCPU, SparseCUDA: asinh_sparse_out
884
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_out
893
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_out
885
894
  tags: pointwise
886
895
 
887
896
  # arcsinh, alias for asinh
@@ -898,7 +907,7 @@
898
907
  variants: function, method
899
908
  dispatch:
900
909
  SparseCPU, SparseCUDA: atanh_sparse
901
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr
910
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr
902
911
  tags: [core, pointwise]
903
912
 
904
913
  - func: atanh_(Tensor(a!) self) -> Tensor(a!)
@@ -906,7 +915,7 @@
906
915
  variants: function, method
907
916
  dispatch:
908
917
  SparseCPU, SparseCUDA: atanh_sparse_
909
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_
918
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_
910
919
  tags: pointwise
911
920
 
912
921
  - func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -916,7 +925,7 @@
916
925
  CPU, CUDA: atanh_out
917
926
  MPS: atanh_out_mps
918
927
  SparseCPU, SparseCUDA: atanh_sparse_out
919
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_out
928
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_out
920
929
  tags: pointwise
921
930
  # arctanh, alias for atanh
922
931
 
@@ -954,7 +963,7 @@
954
963
  structured_delegate: asin.out
955
964
  dispatch:
956
965
  SparseCPU, SparseCUDA: asin_sparse
957
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr
966
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr
958
967
  tags: [core, pointwise]
959
968
 
960
969
  - func: asin_(Tensor(a!) self) -> Tensor(a!)
@@ -963,7 +972,7 @@
963
972
  structured_delegate: asin.out
964
973
  dispatch:
965
974
  SparseCPU, SparseCUDA: asin_sparse_
966
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_
975
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_
967
976
  tags: pointwise
968
977
 
969
978
  - func: asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -974,7 +983,7 @@
974
983
  CPU, CUDA: asin_out
975
984
  MPS: asin_out_mps
976
985
  SparseCPU, SparseCUDA: asin_sparse_out
977
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_out
986
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_out
978
987
  tags: pointwise
979
988
 
980
989
  # arcsin, alias of asin
@@ -992,7 +1001,7 @@
992
1001
  variants: function, method
993
1002
  dispatch:
994
1003
  SparseCPU, SparseCUDA: atan_sparse
995
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr
1004
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr
996
1005
  tags: [core, pointwise]
997
1006
 
998
1007
  - func: atan_(Tensor(a!) self) -> Tensor(a!)
@@ -1001,7 +1010,7 @@
1001
1010
  variants: function, method
1002
1011
  dispatch:
1003
1012
  SparseCPU, SparseCUDA: atan_sparse_
1004
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_
1013
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_
1005
1014
  tags: pointwise
1006
1015
 
1007
1016
  - func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -1012,7 +1021,7 @@
1012
1021
  CPU, CUDA: atan_out
1013
1022
  MPS: atan_out_mps
1014
1023
  SparseCPU, SparseCUDA: atan_sparse_out
1015
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_out
1024
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_out
1016
1025
  tags: pointwise
1017
1026
 
1018
1027
  # arctan, alias of atan
@@ -1026,17 +1035,20 @@
1026
1035
 
1027
1036
  - func: atleast_1d(Tensor self) -> Tensor
1028
1037
  variants: function
1038
+ tags: maybe_aliasing_or_mutating
1029
1039
 
1030
1040
  - func: atleast_1d.Sequence(Tensor[] tensors) -> Tensor[]
1031
1041
 
1032
1042
  - func: atleast_2d(Tensor self) -> Tensor
1033
1043
  variants: function
1044
+ tags: maybe_aliasing_or_mutating
1034
1045
 
1035
1046
  - func: atleast_2d.Sequence(Tensor[] tensors) -> Tensor[]
1036
1047
  variants: function
1037
1048
 
1038
1049
  - func: atleast_3d(Tensor self) -> Tensor
1039
1050
  variants: function
1051
+ tags: maybe_aliasing_or_mutating
1040
1052
 
1041
1053
  - func: atleast_3d.Sequence(Tensor[] tensors) -> Tensor[]
1042
1054
  variants: function
@@ -1056,6 +1068,7 @@
1056
1068
  CPU: baddbmm_out_cpu
1057
1069
  CUDA: baddbmm_out_cuda
1058
1070
  MPS: baddbmm_out_mps
1071
+ XPU: baddbmm_out_xpu
1059
1072
  SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
1060
1073
 
1061
1074
  - func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -1069,6 +1082,7 @@
1069
1082
  autogen: bartlett_window.periodic_out
1070
1083
 
1071
1084
  - func: batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor
1085
+ tags: maybe_aliasing_or_mutating
1072
1086
 
1073
1087
  - func: quantized_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor var, float eps, float output_scale, int output_zero_point) -> Tensor
1074
1088
  dispatch:
@@ -1076,6 +1090,7 @@
1076
1090
  autogen: quantized_batch_norm.out
1077
1091
 
1078
1092
  - func: _batch_norm_impl_index(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> (Tensor, Tensor, Tensor, Tensor, int)
1093
+ tags: maybe_aliasing_or_mutating
1079
1094
 
1080
1095
  - func: _batch_norm_impl_index_backward(int impl_index, Tensor input, Tensor grad_output, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var_transform, bool train, float eps, bool[3] output_mask, Tensor reservedSpace) -> (Tensor, Tensor, Tensor)
1081
1096
 
@@ -1353,6 +1368,7 @@
1353
1368
  CPU: bmm_out_cpu
1354
1369
  CUDA: bmm_out_cuda
1355
1370
  MPS: bmm_out_mps
1371
+ XPU: bmm_out_xpu
1356
1372
  SparseCPU: bmm_out_sparse_cpu
1357
1373
  SparseCUDA: bmm_out_sparse_cuda
1358
1374
  SparseCsrCUDA: bmm_out_sparse_csr_cuda
@@ -1423,7 +1439,7 @@
1423
1439
  variants: function, method
1424
1440
  dispatch:
1425
1441
  SparseCPU, SparseCUDA: ceil_sparse
1426
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr
1442
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr
1427
1443
  tags: [core, pointwise]
1428
1444
 
1429
1445
  - func: ceil_(Tensor(a!) self) -> Tensor(a!)
@@ -1432,7 +1448,7 @@
1432
1448
  variants: function, method
1433
1449
  dispatch:
1434
1450
  SparseCPU, SparseCUDA: ceil_sparse_
1435
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_
1451
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_
1436
1452
  tags: pointwise
1437
1453
 
1438
1454
  - func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -1443,7 +1459,7 @@
1443
1459
  CPU, CUDA: ceil_out
1444
1460
  MPS: ceil_out_mps
1445
1461
  SparseCPU, SparseCUDA: ceil_sparse_out
1446
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_out
1462
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
1447
1463
  tags: pointwise
1448
1464
 
1449
1465
  # alias for torch.linalg.multi_dot
@@ -1457,6 +1473,7 @@
1457
1473
  variants: function, method
1458
1474
  device_check: NoCheck
1459
1475
  device_guard: False
1476
+ tags: maybe_aliasing_or_mutating
1460
1477
 
1461
1478
  - func: chunk(Tensor(a -> *) self, int chunks, int dim=0) -> Tensor(a)[]
1462
1479
  variants: function, method
@@ -1762,7 +1779,7 @@
1762
1779
  MkldnnCPU: copy_mkldnn_
1763
1780
  SparseCPU, SparseCUDA: copy_sparse_wrapper_
1764
1781
  CompositeExplicitAutograd: copy_
1765
- SparseCsrCPU, SparseCsrCUDA: copy_sparse_compressed_
1782
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: copy_sparse_compressed_
1766
1783
  NestedTensorCPU, NestedTensorCUDA: copy_nested_
1767
1784
  autogen: copy.out
1768
1785
 
@@ -1783,7 +1800,7 @@
1783
1800
  variants: function, method
1784
1801
  structured_delegate: cos.out
1785
1802
  dispatch:
1786
- NestedTensorCPU, NestedTensorCUDA: cos_nested
1803
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_cos
1787
1804
  tags: [core, pointwise]
1788
1805
 
1789
1806
  - func: cos_(Tensor(a!) self) -> Tensor(a!)
@@ -2338,7 +2355,7 @@
2338
2355
 
2339
2356
  - func: _embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
2340
2357
  dispatch:
2341
- CompositeImplicitAutograd: _embedding_bag_backward_symint
2358
+ CPU, CUDA: _embedding_bag_backward_symint
2342
2359
 
2343
2360
  - func: _embedding_bag_sparse_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
2344
2361
  dispatch:
@@ -2370,8 +2387,10 @@
2370
2387
  MPS: empty_mps
2371
2388
  Meta: empty_meta_symint
2372
2389
  MkldnnCPU: empty_mkldnn
2373
- SparseCPU, SparseCUDA, SparseMeta: empty_sparse
2374
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
2390
+ SparseCPU, SparseCUDA: empty_sparse
2391
+ SparseMeta: empty_sparse_symint
2392
+ SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
2393
+ SparseCsrMeta: empty_sparse_compressed_symint
2375
2394
  QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
2376
2395
  tags: core
2377
2396
 
@@ -2446,7 +2465,7 @@
2446
2465
  CUDA: resize_cuda_
2447
2466
  MPS: resize_mps_
2448
2467
  QuantizedCPU: quantized_resize_cpu_
2449
- SparseCsrCPU, SparseCsrCUDA: resize_sparse_csr_
2468
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: resize_sparse_csr_
2450
2469
  autogen: resize, resize.out
2451
2470
 
2452
2471
  # This is a utility function to enable users to resize out tensor while registering kernels for out variants.
@@ -2497,7 +2516,7 @@
2497
2516
  variants: function, method
2498
2517
  dispatch:
2499
2518
  SparseCPU, SparseCUDA: erf_sparse
2500
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr
2519
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr
2501
2520
  tags: [core, pointwise]
2502
2521
 
2503
2522
  - func: erf_(Tensor(a!) self) -> Tensor(a!)
@@ -2506,7 +2525,7 @@
2506
2525
  variants: function, method
2507
2526
  dispatch:
2508
2527
  SparseCPU, SparseCUDA: erf_sparse_
2509
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_
2528
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_
2510
2529
  tags: pointwise
2511
2530
 
2512
2531
  - func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2517,7 +2536,7 @@
2517
2536
  CPU, CUDA: erf_out
2518
2537
  MPS: erf_out_mps
2519
2538
  SparseCPU, SparseCUDA: erf_sparse_out
2520
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_out
2539
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_out
2521
2540
  tags: pointwise
2522
2541
 
2523
2542
  - func: erfc(Tensor self) -> Tensor
@@ -2585,7 +2604,7 @@
2585
2604
  variants: function, method
2586
2605
  dispatch:
2587
2606
  SparseCPU, SparseCUDA: expm1_sparse
2588
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
2607
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr
2589
2608
  tags: [core, pointwise]
2590
2609
 
2591
2610
  - func: expm1_(Tensor(a!) self) -> Tensor(a!)
@@ -2594,7 +2613,7 @@
2594
2613
  variants: function, method
2595
2614
  dispatch:
2596
2615
  SparseCPU, SparseCUDA: expm1_sparse_
2597
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_
2616
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_
2598
2617
  tags: pointwise
2599
2618
 
2600
2619
  - func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2605,7 +2624,7 @@
2605
2624
  CPU, CUDA: expm1_out
2606
2625
  MPS: expm1_out_mps
2607
2626
  SparseCPU, SparseCUDA: expm1_sparse_out
2608
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_out
2627
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_out
2609
2628
  tags: pointwise
2610
2629
 
2611
2630
  - func: expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)
@@ -2683,7 +2702,7 @@
2683
2702
  MPS: fill_scalar_mps
2684
2703
  QuantizedCPU, QuantizedCUDA: fill_quantized_
2685
2704
  Meta: fill_meta_
2686
- SparseCsrCPU, SparseCsrCUDA: fill_sparse_csr_
2705
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: fill_sparse_csr_
2687
2706
  NestedTensorCPU, NestedTensorCUDA: fill_nested_
2688
2707
  autogen: fill.Scalar_out
2689
2708
 
@@ -2704,7 +2723,7 @@
2704
2723
  variants: function, method
2705
2724
  dispatch:
2706
2725
  SparseCPU, SparseCUDA: floor_sparse
2707
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr
2726
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr
2708
2727
  tags: [core, pointwise]
2709
2728
 
2710
2729
  - func: floor_(Tensor(a!) self) -> Tensor(a!)
@@ -2713,7 +2732,7 @@
2713
2732
  variants: function, method
2714
2733
  dispatch:
2715
2734
  SparseCPU, SparseCUDA: floor_sparse_
2716
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_
2735
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_
2717
2736
  tags: pointwise
2718
2737
 
2719
2738
  - func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2724,7 +2743,7 @@
2724
2743
  CPU, CUDA: floor_out
2725
2744
  MPS: floor_out_mps
2726
2745
  SparseCPU, SparseCUDA: floor_sparse_out
2727
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_out
2746
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
2728
2747
  tags: pointwise
2729
2748
 
2730
2749
  - func: floor_divide(Tensor self, Tensor other) -> Tensor
@@ -2769,7 +2788,7 @@
2769
2788
  variants: function, method
2770
2789
  dispatch:
2771
2790
  SparseCPU, SparseCUDA: frac_sparse
2772
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr
2791
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr
2773
2792
  tags: pointwise
2774
2793
 
2775
2794
  - func: frac_(Tensor(a!) self) -> Tensor(a!)
@@ -2778,7 +2797,7 @@
2778
2797
  variants: function, method
2779
2798
  dispatch:
2780
2799
  SparseCPU, SparseCUDA: frac_sparse_
2781
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr_
2800
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_
2782
2801
  tags: pointwise
2783
2802
 
2784
2803
  - func: frac.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -2789,7 +2808,7 @@
2789
2808
  CPU, CUDA: frac_out
2790
2809
  MPS: frac_out_mps
2791
2810
  SparseCPU, SparseCUDA: frac_sparse_out
2792
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr_out
2811
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_out
2793
2812
  tags: pointwise
2794
2813
 
2795
2814
  - func: full.names(int[] size, Scalar fill_value, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -2814,6 +2833,7 @@
2814
2833
  # non-differentiable so NonFunctional doesn't apply
2815
2834
  CompositeExplicitAutograd: full_like
2816
2835
  autogen: full_like.out
2836
+ tags: core
2817
2837
 
2818
2838
  - func: from_file(str filename, bool? shared=None, int? size=0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
2819
2839
  dispatch:
@@ -3061,6 +3081,18 @@
3061
3081
  dispatch:
3062
3082
  CompositeExplicitAutograd: _unsafe_index
3063
3083
 
3084
+ # Used by inductor to generate masked loads
3085
+ # Note that we don't support boolean indexing, to avoid dynamic output shapes
3086
+ - func: _unsafe_masked_index(Tensor self, Tensor mask, Tensor?[] indices, Scalar fill) -> Tensor
3087
+ variants: function
3088
+ dispatch:
3089
+ CompositeExplicitAutograd: _unsafe_masked_index
3090
+
3091
+ - func: _unsafe_masked_index_put_accumulate(Tensor self, Tensor mask, Tensor?[] indices, Tensor values) -> Tensor
3092
+ variants: function
3093
+ dispatch:
3094
+ CompositeExplicitAutograd: _unsafe_masked_index_put_accumulate
3095
+
3064
3096
  - func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
3065
3097
  structured: True
3066
3098
  variants: function
@@ -3160,8 +3192,9 @@
3160
3192
  device_guard: False
3161
3193
  dispatch:
3162
3194
  CPU, CUDA, MPS: isnan
3195
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_isnan
3163
3196
  SparseCPU, SparseCUDA: isnan_sparse
3164
- SparseCsrCPU, SparseCsrCUDA: isnan_sparse_csr
3197
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isnan_sparse_csr
3165
3198
  autogen: isnan.out
3166
3199
  tags: [core, pointwise]
3167
3200
 
@@ -3270,7 +3303,9 @@
3270
3303
  autogen: native_layer_norm_backward.out
3271
3304
  tags: core
3272
3305
 
3273
- - func: rms_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, float? eps=None) -> Tensor
3306
+ - func: rms_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, float? eps=None) -> Tensor
3307
+ dispatch:
3308
+ CompositeImplicitAutograd: rms_norm_symint
3274
3309
 
3275
3310
  - func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
3276
3311
  variants: function, method
@@ -3336,9 +3371,10 @@
3336
3371
  dispatch:
3337
3372
  CUDA: _cslt_compress
3338
3373
 
3339
- - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
3374
+ - func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0, int split_k=1, bool split_k_one_kernel=True) -> Tensor
3340
3375
  dispatch:
3341
3376
  CUDA: _cslt_sparse_mm
3377
+ tags: needs_fixed_stride_order
3342
3378
 
3343
3379
  - func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
3344
3380
  dispatch:
@@ -3381,6 +3417,10 @@
3381
3417
 
3382
3418
  - func: fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor
3383
3419
 
3420
+ - func: _wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor
3421
+
3422
+ - func: _wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
3423
+
3384
3424
  - func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
3385
3425
 
3386
3426
  - func: fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
@@ -3487,7 +3527,7 @@
3487
3527
  variants: function, method
3488
3528
  dispatch:
3489
3529
  SparseCPU, SparseCUDA: log1p_sparse
3490
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
3530
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr
3491
3531
  tags: [core, pointwise]
3492
3532
 
3493
3533
  - func: log1p_(Tensor(a!) self) -> Tensor(a!)
@@ -3496,7 +3536,7 @@
3496
3536
  variants: function, method
3497
3537
  dispatch:
3498
3538
  SparseCPU, SparseCUDA: log1p_sparse_
3499
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_
3539
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_
3500
3540
  tags: pointwise
3501
3541
 
3502
3542
  - func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -3507,7 +3547,7 @@
3507
3547
  CPU, CUDA: log1p_out
3508
3548
  MPS: log1p_out_mps
3509
3549
  SparseCPU, SparseCUDA: log1p_sparse_out
3510
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_out
3550
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_out
3511
3551
  tags: pointwise
3512
3552
 
3513
3553
  - func: log2(Tensor self) -> Tensor
@@ -3899,11 +3939,10 @@
3899
3939
  tags: core
3900
3940
 
3901
3941
  # For normal naming convention this should be `mean.out`. However since we already have `mean.out` we have to rename this.
3902
- # FIXME: fix CI jobs and re-enable this
3903
- #- func: mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
3904
- # device_check: NoCheck # TensorIterator
3905
- # dispatch:
3906
- # CompositeExplicitAutograd: mean_dtype_out
3942
+ - func: mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
3943
+ device_check: NoCheck # TensorIterator
3944
+ dispatch:
3945
+ CompositeExplicitAutograd: mean_dtype_out
3907
3946
 
3908
3947
  - func: mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
3909
3948
  structured_delegate: mean.out
@@ -4095,7 +4134,7 @@
4095
4134
  variants: function, method
4096
4135
  dispatch:
4097
4136
  SparseCPU, SparseCUDA: _sparse_mm
4098
- SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm
4137
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm
4099
4138
  tags: core
4100
4139
 
4101
4140
  - func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
@@ -4104,8 +4143,9 @@
4104
4143
  CPU: mm_out_cpu
4105
4144
  CUDA: mm_out_cuda
4106
4145
  MPS: mm_out_mps
4146
+ XPU: mm_out_xpu
4107
4147
  SparseCPU, SparseCUDA: _sparse_mm_out
4108
- SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm_out
4148
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm_out
4109
4149
 
4110
4150
  - func: _int_mm(Tensor self, Tensor mat2) -> Tensor
4111
4151
  dispatch:
@@ -4119,15 +4159,24 @@
4119
4159
 
4120
4160
  - func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
4121
4161
  dispatch:
4122
- CPU: _convert_weight_to_int4pack_cpu
4123
4162
  CUDA: _convert_weight_to_int4pack_cuda
4163
+ MPS: _convert_weight_to_int4pack_mps
4124
4164
 
4125
4165
  - func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
4126
4166
  dispatch:
4127
- CPU: _weight_int4pack_mm_cpu
4128
4167
  MPS: _weight_int4pack_mm_mps
4129
4168
  CUDA: _weight_int4pack_mm_cuda
4130
4169
 
4170
+ # Split int4 pack weight between cpu and other devices due to
4171
+ # https://github.com/pytorch/ao/issues/1117#issuecomment-2451252756.
4172
+ - func: _convert_weight_to_int4pack_for_cpu(Tensor self, int innerKTiles) -> Tensor
4173
+ dispatch:
4174
+ CPU: _convert_weight_to_int4pack_cpu
4175
+
4176
+ - func: _weight_int4pack_mm_for_cpu(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
4177
+ dispatch:
4178
+ CPU: _weight_int4pack_mm_cpu
4179
+
4131
4180
  - func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
4132
4181
  dispatch:
4133
4182
  CPU: _weight_int8pack_mm_cpu
@@ -4165,7 +4214,7 @@
4165
4214
  variants: function, method
4166
4215
  dispatch:
4167
4216
  SparseCPU, SparseCUDA: mul_sparse
4168
- SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr
4217
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr
4169
4218
  MkldnnCPU: mkldnn_mul
4170
4219
  ZeroTensor: mul_zerotensor
4171
4220
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
@@ -4177,7 +4226,7 @@
4177
4226
  variants: method
4178
4227
  dispatch:
4179
4228
  SparseCPU, SparseCUDA: mul_sparse_
4180
- SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr_
4229
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr_
4181
4230
  MkldnnCPU: mkldnn_mul_
4182
4231
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Tensor
4183
4232
  tags: pointwise
@@ -4191,7 +4240,7 @@
4191
4240
  MPS: mul_out_mps
4192
4241
  SparseCPU: mul_out_sparse_cpu
4193
4242
  SparseCUDA: mul_out_sparse_cuda
4194
- SparseCsrCPU, SparseCsrCUDA: mul_out_sparse_csr
4243
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr
4195
4244
  MkldnnCPU: mkldnn_mul_out
4196
4245
  tags: pointwise
4197
4246
  # For C++ only, until we have conversion from C++ numbers to Tensor
@@ -4201,7 +4250,7 @@
4201
4250
  variants: function, method
4202
4251
  dispatch:
4203
4252
  CompositeExplicitAutograd: mul
4204
- SparseCsrCPU, SparseCsrCUDA: mul_scalar_sparse_csr
4253
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_scalar_sparse_csr
4205
4254
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Scalar
4206
4255
  tags: [core, pointwise]
4207
4256
 
@@ -4210,7 +4259,7 @@
4210
4259
  variants: method
4211
4260
  dispatch:
4212
4261
  CompositeExplicitAutograd: mul_
4213
- SparseCsrCPU, SparseCsrCUDA: mul__scalar_sparse_csr
4262
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul__scalar_sparse_csr
4214
4263
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Scalar
4215
4264
  autogen: mul.Scalar_out
4216
4265
  tags: pointwise
@@ -4530,9 +4579,11 @@
4530
4579
  - func: is_pinned(Tensor self, Device? device=None) -> bool
4531
4580
  variants: method
4532
4581
  dispatch:
4533
- NestedTensorCUDA, CUDA: is_pinned_cuda
4534
- MPS: is_pinned_mps
4535
- CompositeExplicitAutograd: is_pinned_default
4582
+ # the NestedTensor keys are necessary because NestedTensor has been removed
4583
+ # from the CompositeExplicitAutograd keyset see Note [NestedTensor Not Included in Backend Keys]
4584
+ CompositeExplicitAutograd, NestedTensorCPU: is_pinned
4585
+ SparseCsrCPU: is_pinned_sparse_compressed
4586
+ SparseCPU: is_pinned_sparse_coo
4536
4587
 
4537
4588
  # TODO: add a copy kwarg that guarantees that the tensor is put into fresh
4538
4589
  # pinned memory
@@ -4542,9 +4593,10 @@
4542
4593
  # Unlike pin_memory, this is guaranteed to give a new non-aliasing tensor
4543
4594
  - func: _pin_memory(Tensor self, Device? device=None) -> Tensor
4544
4595
  dispatch:
4545
- CUDA: _pin_memory_cuda
4546
- MPS: _pin_memory_mps
4547
- NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
4596
+ CompositeExplicitAutograd: _pin_memory
4597
+ NestedTensorCPU: _pin_memory_nested
4598
+ SparseCPU: _pin_memory_sparse_coo
4599
+ SparseCsrCPU: _pin_memory_sparse_compressed
4548
4600
  autogen: _pin_memory.out
4549
4601
 
4550
4602
  - func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
@@ -4558,27 +4610,30 @@
4558
4610
  dispatch:
4559
4611
  CompositeExplicitAutograd: rad2deg
4560
4612
  SparseCPU, SparseCUDA: rad2deg_sparse
4561
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr
4613
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr
4614
+ tags: pointwise
4562
4615
 
4563
4616
  - func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
4564
4617
  variants: function, method
4565
4618
  dispatch:
4566
4619
  CompositeExplicitAutograd: rad2deg_
4567
4620
  SparseCPU, SparseCUDA: rad2deg_sparse_
4568
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_
4621
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_
4622
+ tags: pointwise
4569
4623
 
4570
4624
  - func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4571
4625
  dispatch:
4572
4626
  CompositeExplicitAutograd: rad2deg_out
4573
4627
  SparseCPU, SparseCUDA: rad2deg_sparse_out
4574
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_out
4628
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_out
4629
+ tags: pointwise
4575
4630
 
4576
4631
  - func: deg2rad(Tensor self) -> Tensor
4577
4632
  variants: function, method
4578
4633
  dispatch:
4579
4634
  CompositeExplicitAutograd: deg2rad
4580
4635
  SparseCPU, SparseCUDA: deg2rad_sparse
4581
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr
4636
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr
4582
4637
  tags: pointwise
4583
4638
 
4584
4639
  - func: deg2rad_(Tensor(a!) self) -> Tensor(a!)
@@ -4586,14 +4641,14 @@
4586
4641
  dispatch:
4587
4642
  CompositeExplicitAutograd: deg2rad_
4588
4643
  SparseCPU, SparseCUDA: deg2rad_sparse_
4589
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr_
4644
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_
4590
4645
  tags: pointwise
4591
4646
 
4592
4647
  - func: deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
4593
4648
  dispatch:
4594
4649
  CompositeExplicitAutograd: deg2rad_out
4595
4650
  SparseCPU, SparseCUDA: deg2rad_sparse_out
4596
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr_out
4651
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_out
4597
4652
  tags: pointwise
4598
4653
 
4599
4654
  - func: scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -4811,7 +4866,7 @@
4811
4866
  variants: function, method
4812
4867
  dispatch:
4813
4868
  SparseCPU, SparseCUDA: neg_sparse
4814
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr
4869
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr
4815
4870
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg
4816
4871
  tags: [core, pointwise]
4817
4872
 
@@ -4821,7 +4876,7 @@
4821
4876
  variants: function, method
4822
4877
  dispatch:
4823
4878
  SparseCPU, SparseCUDA: neg_sparse_
4824
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_
4879
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_
4825
4880
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg_
4826
4881
  tags: pointwise
4827
4882
 
@@ -4833,7 +4888,7 @@
4833
4888
  CPU, CUDA: neg_out
4834
4889
  MPS: neg_out_mps
4835
4890
  SparseCPU, SparseCUDA: neg_out_sparse
4836
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_out
4891
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_out
4837
4892
  tags: pointwise
4838
4893
  # Alias for neg
4839
4894
 
@@ -4917,7 +4972,7 @@
4917
4972
  variants: function, method
4918
4973
  dispatch:
4919
4974
  SparseCPU, SparseCUDA: round_sparse
4920
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr
4975
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr
4921
4976
  tags: [core, pointwise]
4922
4977
 
4923
4978
  - func: round_(Tensor(a!) self) -> Tensor(a!)
@@ -4926,7 +4981,7 @@
4926
4981
  variants: function, method
4927
4982
  dispatch:
4928
4983
  SparseCPU, SparseCUDA: round_sparse_
4929
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_
4984
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_
4930
4985
  tags: pointwise
4931
4986
 
4932
4987
  - func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -4938,7 +4993,7 @@
4938
4993
  CUDA: round_out
4939
4994
  MPS: round_out_mps
4940
4995
  SparseCPU, SparseCUDA: round_sparse_out
4941
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_out
4996
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
4942
4997
  tags: pointwise
4943
4998
 
4944
4999
  - func: round.decimals(Tensor self, *, int decimals) -> Tensor
@@ -4964,7 +5019,7 @@
4964
5019
 
4965
5020
  - func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
4966
5021
  device_check: NoCheck # TensorIterator
4967
- tags: nondeterministic_seeded
5022
+ tags: [pointwise, nondeterministic_seeded]
4968
5023
 
4969
5024
  - func: rrelu_(Tensor(a!) self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
4970
5025
  tags: nondeterministic_seeded
@@ -4981,7 +5036,7 @@
4981
5036
  QuantizedCUDA: relu_quantized_cuda
4982
5037
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu
4983
5038
  SparseCPU, SparseCUDA: relu_sparse
4984
- SparseCsrCPU, SparseCsrCUDA: relu_sparse_csr
5039
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr
4985
5040
  tags: [core, pointwise]
4986
5041
 
4987
5042
  - func: relu_(Tensor(a!) self) -> Tensor(a!)
@@ -4995,12 +5050,13 @@
4995
5050
  QuantizedCUDA: relu_quantized_cuda_
4996
5051
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu_
4997
5052
  SparseCPU, SparseCUDA: relu_sparse_
4998
- SparseCsrCPU, SparseCsrCUDA: relu_sparse_csr_
5053
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr_
4999
5054
  autogen: relu.out
5000
5055
  tags: pointwise
5001
5056
 
5002
5057
  - func: relu6(Tensor self) -> Tensor
5003
5058
  python_module: nn
5059
+ tags: pointwise
5004
5060
 
5005
5061
  - func: relu6_(Tensor(a!) self) -> Tensor(a!)
5006
5062
  python_module: nn
@@ -5085,6 +5141,7 @@
5085
5141
  structured_delegate: hardshrink.out
5086
5142
  device_check: NoCheck # TensorIterator
5087
5143
  variants: function, method
5144
+ tags: pointwise
5088
5145
 
5089
5146
  - func: hardshrink_backward.grad_input(Tensor grad_out, Tensor self, Scalar lambd, *, Tensor(a!) grad_input) -> Tensor(a!)
5090
5147
  structured: True
@@ -5128,7 +5185,7 @@
5128
5185
  device_guard: False
5129
5186
  dispatch:
5130
5187
  CompositeExplicitAutograd: select_symint
5131
- SparseCsrCPU, SparseCsrCUDA: select_sparse_csr
5188
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_sparse_csr
5132
5189
  NestedTensorCPU, NestedTensorCUDA: select_nested
5133
5190
  tags: core
5134
5191
 
@@ -5149,6 +5206,7 @@
5149
5206
 
5150
5207
  - func: selu(Tensor self) -> Tensor
5151
5208
  device_check: NoCheck # TensorIterator
5209
+ tags: pointwise
5152
5210
 
5153
5211
  - func: selu_(Tensor(a!) self) -> Tensor(a!)
5154
5212
  device_check: NoCheck # TensorIterator
@@ -5157,6 +5215,7 @@
5157
5215
  device_check: NoCheck # TensorIterator
5158
5216
  dispatch:
5159
5217
  CompositeExplicitAutograd: celu
5218
+ tags: pointwise
5160
5219
 
5161
5220
  - func: celu_(Tensor(a!) self, Scalar alpha=1.0) -> Tensor(a!)
5162
5221
  device_check: NoCheck # TensorIterator
@@ -5207,6 +5266,7 @@
5207
5266
  - func: mish(Tensor self) -> Tensor
5208
5267
  structured_delegate: mish.out
5209
5268
  python_module: nn
5269
+ tags: pointwise
5210
5270
 
5211
5271
  - func: mish_(Tensor(a!) self) -> Tensor(a!)
5212
5272
  structured_delegate: mish.out
@@ -5277,9 +5337,9 @@
5277
5337
  structured_delegate: sin.out
5278
5338
  variants: function, method
5279
5339
  dispatch:
5280
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
5340
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr
5281
5341
  SparseCPU, SparseCUDA: sin_sparse
5282
- NestedTensorCPU, NestedTensorCUDA: sin_nested
5342
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_sin
5283
5343
  tags: [core, pointwise]
5284
5344
 
5285
5345
  - func: sin_(Tensor(a!) self) -> Tensor(a!)
@@ -5287,7 +5347,7 @@
5287
5347
  structured_delegate: sin.out
5288
5348
  variants: function, method
5289
5349
  dispatch:
5290
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_
5350
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_
5291
5351
  SparseCPU, SparseCUDA: sin_sparse_
5292
5352
  tags: pointwise
5293
5353
 
@@ -5298,7 +5358,7 @@
5298
5358
  dispatch:
5299
5359
  CPU, CUDA: sin_out
5300
5360
  MPS: sin_out_mps
5301
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_out
5361
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_out
5302
5362
  SparseCPU, SparseCUDA: sin_sparse_out
5303
5363
  tags: pointwise
5304
5364
 
@@ -5325,7 +5385,7 @@
5325
5385
  variants: function, method
5326
5386
  dispatch:
5327
5387
  SparseCPU, SparseCUDA: sinh_sparse
5328
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr
5388
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr
5329
5389
  tags: [core, pointwise]
5330
5390
 
5331
5391
  - func: sinh_(Tensor(a!) self) -> Tensor(a!)
@@ -5334,7 +5394,7 @@
5334
5394
  variants: function, method
5335
5395
  dispatch:
5336
5396
  SparseCPU, SparseCUDA: sinh_sparse_
5337
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_
5397
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_
5338
5398
  tags: pointwise
5339
5399
 
5340
5400
  - func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5345,7 +5405,7 @@
5345
5405
  CPU, CUDA: sinh_out
5346
5406
  MPS: sinh_out_mps
5347
5407
  SparseCPU, SparseCUDA: sinh_sparse_out
5348
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_out
5408
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_out
5349
5409
 
5350
5410
  # Returns a copy of this `Variable` that is detached from its autograd graph.
5351
5411
  # This method is OK to call if the `Variable` is a view.
@@ -5732,7 +5792,7 @@
5732
5792
  dispatch:
5733
5793
  NestedTensorCPU: NestedTensor_sum_dim_CPU
5734
5794
  SparseCPU, SparseCUDA: sum_sparse_coo
5735
- SparseCsrCPU, SparseCsrCUDA: sum_sparse_compressed
5795
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_sparse_compressed
5736
5796
  tags: core
5737
5797
 
5738
5798
  - func: sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
@@ -5777,8 +5837,9 @@
5777
5837
  structured_delegate: sqrt.out
5778
5838
  variants: function, method
5779
5839
  dispatch:
5840
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_sqrt
5780
5841
  SparseCPU, SparseCUDA: sqrt_sparse
5781
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr
5842
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr
5782
5843
  tags: [core, pointwise]
5783
5844
 
5784
5845
  - func: sqrt_(Tensor(a!) self) -> Tensor(a!)
@@ -5787,7 +5848,7 @@
5787
5848
  variants: function, method
5788
5849
  dispatch:
5789
5850
  SparseCPU, SparseCUDA: sqrt_sparse_
5790
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_
5851
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_
5791
5852
  tags: pointwise
5792
5853
 
5793
5854
  - func: sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5798,7 +5859,7 @@
5798
5859
  CPU, CUDA: sqrt_out
5799
5860
  MPS: sqrt_out_mps
5800
5861
  SparseCPU, SparseCUDA: sqrt_sparse_out
5801
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_out
5862
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
5802
5863
  tags: pointwise
5803
5864
 
5804
5865
  - func: square(Tensor self) -> Tensor
@@ -5936,7 +5997,7 @@
5936
5997
  variants: function, method
5937
5998
  dispatch:
5938
5999
  SparseCPU, SparseCUDA: tan_sparse
5939
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr
6000
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr
5940
6001
  tags: [core, pointwise]
5941
6002
 
5942
6003
  - func: tan_(Tensor(a!) self) -> Tensor(a!)
@@ -5945,7 +6006,7 @@
5945
6006
  variants: function, method
5946
6007
  dispatch:
5947
6008
  SparseCPU, SparseCUDA: tan_sparse_
5948
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_
6009
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_
5949
6010
  tags: pointwise
5950
6011
 
5951
6012
  - func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -5956,7 +6017,7 @@
5956
6017
  CPU, CUDA: tan_out
5957
6018
  MPS: tan_out_mps
5958
6019
  SparseCPU, SparseCUDA: tan_sparse_out
5959
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_out
6020
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_out
5960
6021
  tags: pointwise
5961
6022
 
5962
6023
  - func: tanh(Tensor self) -> Tensor
@@ -5967,7 +6028,7 @@
5967
6028
  QuantizedCPU: tanh_quantized_cpu
5968
6029
  MkldnnCPU: mkldnn_tanh
5969
6030
  SparseCPU, SparseCUDA: tanh_sparse
5970
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr
6031
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr
5971
6032
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh
5972
6033
  tags: [core, pointwise]
5973
6034
 
@@ -5978,7 +6039,7 @@
5978
6039
  dispatch:
5979
6040
  MkldnnCPU: mkldnn_tanh_
5980
6041
  SparseCPU, SparseCUDA: tanh_sparse_
5981
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_
6042
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_
5982
6043
  NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh_
5983
6044
  tags: pointwise
5984
6045
 
@@ -5990,7 +6051,7 @@
5990
6051
  CPU, CUDA: tanh_out
5991
6052
  MPS: tanh_out_mps
5992
6053
  SparseCPU, SparseCUDA: tanh_sparse_out
5993
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_out
6054
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
5994
6055
  tags: pointwise
5995
6056
 
5996
6057
  - func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor
@@ -6006,6 +6067,7 @@
6006
6067
  structured_delegate: threshold.out
6007
6068
  dispatch:
6008
6069
  QuantizedCPU: threshold_quantized_cpu
6070
+ tags: pointwise
6009
6071
 
6010
6072
  - func: threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!)
6011
6073
  device_check: NoCheck # TensorIterator
@@ -6027,7 +6089,7 @@
6027
6089
  CPU, CUDA: threshold_backward_out
6028
6090
  MPS: threshold_backward_out_mps
6029
6091
  SparseCPU, SparseCUDA: threshold_backward_sparse_out
6030
- SparseCsrCPU, SparseCsrCUDA: threshold_backward_sparse_compressed_out
6092
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed_out
6031
6093
 
6032
6094
  - func: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor
6033
6095
  variants: function
@@ -6035,7 +6097,7 @@
6035
6097
  dispatch:
6036
6098
  MkldnnCPU: mkldnn_relu_backward
6037
6099
  SparseCPU, SparseCUDA: threshold_backward_sparse
6038
- SparseCsrCPU, SparseCsrCUDA: threshold_backward_sparse_compressed
6100
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed
6039
6101
  NestedTensorCPU, NestedTensorCUDA: threshold_backwards_nested
6040
6102
  tags: pointwise
6041
6103
 
@@ -6185,12 +6247,12 @@
6185
6247
  CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
6186
6248
  autogen: _nested_view_from_buffer_copy.out
6187
6249
 
6188
- - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
6250
+ - func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor(a)
6189
6251
  variants: function
6190
6252
  device_check: NoCheck
6191
6253
  dispatch: {}
6192
6254
 
6193
- - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
6255
+ - func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor
6194
6256
  variants: function
6195
6257
  device_check: NoCheck
6196
6258
  tags: view_copy
@@ -6227,6 +6289,16 @@
6227
6289
  device_check: NoCheck
6228
6290
  dispatch: {}
6229
6291
 
6292
+ - func: _nested_get_min_seqlen(Tensor self) -> Tensor
6293
+ variants: function
6294
+ device_check: NoCheck
6295
+ dispatch: {}
6296
+
6297
+ - func: _nested_get_max_seqlen(Tensor self) -> Tensor
6298
+ variants: function
6299
+ device_check: NoCheck
6300
+ dispatch: {}
6301
+
6230
6302
  - func: _nested_get_jagged_dummy(Tensor any) -> Tensor
6231
6303
  category_override: dummy
6232
6304
  dispatch: {}
@@ -6251,7 +6323,7 @@
6251
6323
  variants: function, method
6252
6324
  dispatch:
6253
6325
  SparseCPU, SparseCUDA: trunc_sparse
6254
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
6326
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr
6255
6327
  tags: [core, pointwise]
6256
6328
 
6257
6329
  - func: trunc_(Tensor(a!) self) -> Tensor(a!)
@@ -6260,7 +6332,7 @@
6260
6332
  variants: function, method
6261
6333
  dispatch:
6262
6334
  SparseCPU, SparseCUDA: trunc_sparse_
6263
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_
6335
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_
6264
6336
  tags: pointwise
6265
6337
 
6266
6338
  - func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -6271,7 +6343,7 @@
6271
6343
  CPU, CUDA: trunc_out
6272
6344
  MPS: trunc_out_mps
6273
6345
  SparseCPU, SparseCUDA: trunc_sparse_out
6274
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_out
6346
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
6275
6347
  tags: pointwise
6276
6348
  # Alias for trunc
6277
6349
 
@@ -6443,12 +6515,14 @@
6443
6515
  variants: function, method
6444
6516
  dispatch:
6445
6517
  CPU, CUDA, MPS: where
6518
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_where
6446
6519
  tags: [core, pointwise]
6447
6520
 
6448
6521
  - func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
6449
6522
  device_check: NoCheck # TensorIterator
6450
6523
  dispatch:
6451
6524
  CPU, CUDA, MPS: where_self_out
6525
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_where_out
6452
6526
 
6453
6527
  - func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
6454
6528
  variants: function
@@ -6780,7 +6854,7 @@
6780
6854
  dispatch:
6781
6855
  CompositeExplicitAutograd: clone
6782
6856
  SparseCPU, SparseCUDA: clone_sparse
6783
- SparseCsrCPU, SparseCsrCUDA: clone_sparse_compressed
6857
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: clone_sparse_compressed
6784
6858
  MkldnnCPU: mkldnn_clone
6785
6859
  QuantizedCPU, QuantizedCUDA: quantized_clone
6786
6860
  NestedTensorCPU, NestedTensorCUDA: clone_nested
@@ -6804,7 +6878,7 @@
6804
6878
  variants: function, method
6805
6879
  dispatch:
6806
6880
  SparseCPU, SparseCUDA: resize_as_sparse_
6807
- SparseCsrCPU, SparseCsrCUDA: resize_as_sparse_compressed_
6881
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: resize_as_sparse_compressed_
6808
6882
  autogen: resize_as_sparse, resize_as_sparse.out
6809
6883
 
6810
6884
  - func: zero_(Tensor(a!) self) -> Tensor(a!)
@@ -6951,6 +7025,7 @@
6951
7025
  CPU: addmm_out_cpu
6952
7026
  CUDA: addmm_out_cuda
6953
7027
  MPS: addmm_out_mps
7028
+ XPU: addmm_out_xpu
6954
7029
  SparseCPU: addmm_out_sparse_dense_cpu
6955
7030
  SparseCUDA: addmm_out_sparse_dense_cuda
6956
7031
  SparseCsrCPU: addmm_out_sparse_compressed_cpu
@@ -6962,7 +7037,7 @@
6962
7037
  dispatch:
6963
7038
  SparseCPU: addmm_sparse_dense_cpu
6964
7039
  SparseCUDA: addmm_sparse_dense_cuda
6965
- SparseCsrCPU, SparseCsrCUDA: addmm_sparse_compressed_dense
7040
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: addmm_sparse_compressed_dense
6966
7041
  tags: core
6967
7042
 
6968
7043
  - func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
@@ -6979,17 +7054,18 @@
6979
7054
  dispatch:
6980
7055
  CPU: addmm_activation_out_cpu
6981
7056
  CUDA: addmm_activation_out_cuda
7057
+ XPU: addmm_activation_out_xpu
6982
7058
 
6983
7059
  - func: _addmm_activation(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False) -> Tensor
6984
7060
  structured_delegate: _addmm_activation.out
6985
7061
  variants: function, method
6986
7062
 
6987
- - func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)
7063
+ - func: _scaled_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
6988
7064
  variants: function
6989
7065
  dispatch:
6990
7066
  CUDA: _scaled_mm_cuda
6991
7067
 
6992
- - func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
7068
+ - func: _scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
6993
7069
  variants: function
6994
7070
  dispatch:
6995
7071
  CUDA: _scaled_mm_out_cuda
@@ -7184,7 +7260,7 @@
7184
7260
  variants: method
7185
7261
  dispatch:
7186
7262
  SparseCPU, SparseCUDA: sparse_mask
7187
- SparseCsrCPU, SparseCsrCUDA: sparse_mask_sparse_compressed
7263
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_mask_sparse_compressed
7188
7264
  autogen: sparse_mask.out
7189
7265
 
7190
7266
  - func: _sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor
@@ -7204,7 +7280,7 @@
7204
7280
  variants: method
7205
7281
  dispatch:
7206
7282
  SparseCPU, SparseCUDA: sparse_to_dense
7207
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_dense
7283
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_dense
7208
7284
  MkldnnCPU: mkldnn_to_dense
7209
7285
  autogen: _to_dense.out
7210
7286
 
@@ -7385,7 +7461,7 @@
7385
7461
  dispatch:
7386
7462
  CPU, CUDA: dense_to_sparse
7387
7463
  SparseCPU, SparseCUDA: sparse_coo_to_sparse
7388
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
7464
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7389
7465
  autogen: _to_sparse.sparse_dim_out
7390
7466
 
7391
7467
  - func: to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
@@ -7397,7 +7473,7 @@
7397
7473
  dispatch:
7398
7474
  CPU, CUDA: dense_to_sparse
7399
7475
  SparseCPU, SparseCUDA: sparse_coo_to_sparse
7400
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
7476
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
7401
7477
  autogen: _to_sparse.out
7402
7478
 
7403
7479
  - func: to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
@@ -7409,7 +7485,7 @@
7409
7485
  dispatch:
7410
7486
  CPU, CUDA: dense_to_sparse_csr
7411
7487
  SparseCPU, SparseCUDA: coo_to_sparse_csr
7412
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csr
7488
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_csr
7413
7489
  autogen: _to_sparse_csr.out
7414
7490
 
7415
7491
  - func: to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
@@ -7421,7 +7497,7 @@
7421
7497
  dispatch:
7422
7498
  CPU, CUDA: dense_to_sparse_csc
7423
7499
  SparseCPU, SparseCUDA: coo_to_sparse_csc
7424
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csc
7500
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_csc
7425
7501
  autogen: _to_sparse_csc.out
7426
7502
 
7427
7503
  - func: to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
@@ -7433,7 +7509,7 @@
7433
7509
  dispatch:
7434
7510
  CPU, CUDA: dense_to_sparse_bsr
7435
7511
  SparseCPU, SparseCUDA: coo_to_sparse_bsr
7436
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsr
7512
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_bsr
7437
7513
  autogen: _to_sparse_bsr.out
7438
7514
 
7439
7515
  - func: to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
@@ -7445,7 +7521,7 @@
7445
7521
  dispatch:
7446
7522
  CPU, CUDA: dense_to_sparse_bsc
7447
7523
  SparseCPU, SparseCUDA: coo_to_sparse_bsc
7448
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsc
7524
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_bsc
7449
7525
  autogen: _to_sparse_bsc.out
7450
7526
 
7451
7527
  - func: _to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor)
@@ -7695,6 +7771,7 @@
7695
7771
 
7696
7772
  - func: cartesian_prod(Tensor[] tensors) -> Tensor
7697
7773
  variants: function
7774
+ tags: maybe_aliasing_or_mutating
7698
7775
 
7699
7776
  - func: combinations(Tensor self, int r=2, bool with_replacement=False) -> Tensor
7700
7777
  variants: function
@@ -7976,6 +8053,7 @@
7976
8053
  variants: function, method
7977
8054
  dispatch:
7978
8055
  CompositeExplicitAutograd: masked_scatter
8056
+ tags: core
7979
8057
 
7980
8058
  - func: masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor
7981
8059
  dispatch:
@@ -8210,7 +8288,7 @@
8210
8288
  structured: True
8211
8289
  variants: function
8212
8290
  dispatch:
8213
- CPU, CUDA: scatter_reduce_two
8291
+ CPU, CUDA, MPS: scatter_reduce_two
8214
8292
 
8215
8293
  - func: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8216
8294
  structured_delegate: eq.Scalar_out
@@ -8431,21 +8509,21 @@
8431
8509
  device_check: NoCheck # TensorIterator
8432
8510
  variants: method, function
8433
8511
  dispatch:
8434
- CPU, CUDA: __lshift__
8512
+ CPU, CUDA, MPS: __lshift__
8435
8513
  tags: pointwise
8436
8514
 
8437
8515
  - func: __lshift__.Tensor(Tensor self, Tensor other) -> Tensor
8438
8516
  device_check: NoCheck # TensorIterator
8439
8517
  variants: method, function
8440
8518
  dispatch:
8441
- CPU, CUDA: __lshift__
8519
+ CPU, CUDA, MPS: __lshift__
8442
8520
  tags: pointwise
8443
8521
 
8444
8522
  - func: __ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8445
8523
  device_check: NoCheck # TensorIterator
8446
8524
  variants: method
8447
8525
  dispatch:
8448
- CPU, CUDA: __ilshift__
8526
+ CPU, CUDA, MPS: __ilshift__
8449
8527
  autogen: __lshift__.Scalar_out
8450
8528
  tags: pointwise
8451
8529
 
@@ -8453,7 +8531,7 @@
8453
8531
  device_check: NoCheck # TensorIterator
8454
8532
  variants: method
8455
8533
  dispatch:
8456
- CPU, CUDA: __ilshift__
8534
+ CPU, CUDA, MPS: __ilshift__
8457
8535
  autogen: __lshift__.Tensor_out
8458
8536
  tags: pointwise
8459
8537
 
@@ -8474,7 +8552,7 @@
8474
8552
  structured: True
8475
8553
  structured_inherits: TensorIteratorBase
8476
8554
  dispatch:
8477
- CPU, CUDA: bitwise_left_shift_out
8555
+ CPU, CUDA, MPS: bitwise_left_shift_out
8478
8556
  tags: pointwise
8479
8557
 
8480
8558
  - func: bitwise_left_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor
@@ -8510,28 +8588,28 @@
8510
8588
  device_check: NoCheck # TensorIterator
8511
8589
  variants: method, function
8512
8590
  dispatch:
8513
- CPU, CUDA: __rshift__
8591
+ CPU, CUDA, MPS: __rshift__
8514
8592
  tags: pointwise
8515
8593
 
8516
8594
  - func: __rshift__.Tensor(Tensor self, Tensor other) -> Tensor
8517
8595
  device_check: NoCheck # TensorIterator
8518
8596
  variants: method, function
8519
8597
  dispatch:
8520
- CPU, CUDA: __rshift__
8598
+ CPU, CUDA, MPS: __rshift__
8521
8599
  tags: pointwise
8522
8600
 
8523
8601
  - func: __irshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
8524
8602
  device_check: NoCheck # TensorIterator
8525
8603
  variants: method
8526
8604
  dispatch:
8527
- CPU, CUDA: __irshift__
8605
+ CPU, CUDA, MPS: __irshift__
8528
8606
  autogen: __rshift__.Scalar_out
8529
8607
 
8530
8608
  - func: __irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
8531
8609
  device_check: NoCheck # TensorIterator
8532
8610
  variants: method
8533
8611
  dispatch:
8534
- CPU, CUDA: __irshift__
8612
+ CPU, CUDA, MPS: __irshift__
8535
8613
  autogen: __rshift__.Tensor_out
8536
8614
 
8537
8615
  - func: bitwise_right_shift.Tensor(Tensor self, Tensor other) -> Tensor
@@ -8551,7 +8629,7 @@
8551
8629
  structured: True
8552
8630
  structured_inherits: TensorIteratorBase
8553
8631
  dispatch:
8554
- CPU, CUDA: bitwise_right_shift_out
8632
+ CPU, CUDA, MPS: bitwise_right_shift_out
8555
8633
  tags: pointwise
8556
8634
 
8557
8635
  - func: bitwise_right_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor
@@ -8612,18 +8690,18 @@
8612
8690
  - func: addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
8613
8691
  variants: method
8614
8692
  dispatch:
8615
- CPU, CUDA: addbmm_
8693
+ CPU, CUDA, XPU: addbmm_
8616
8694
  MPS: addbmm_mps_
8617
8695
 
8618
8696
  - func: addbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
8619
8697
  dispatch:
8620
- CPU, CUDA: addbmm_out
8698
+ CPU, CUDA, XPU: addbmm_out
8621
8699
  MPS: addbmm_out_mps
8622
8700
 
8623
8701
  - func: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
8624
8702
  variants: method, function
8625
8703
  dispatch:
8626
- CPU, CUDA: addbmm
8704
+ CPU, CUDA, XPU: addbmm
8627
8705
  MPS: addbmm_mps
8628
8706
 
8629
8707
  - func: random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)
@@ -8737,12 +8815,14 @@
8737
8815
  dispatch:
8738
8816
  CPU: tril_indices_cpu
8739
8817
  CUDA: tril_indices_cuda
8818
+ MPS: tril_indices_mps
8740
8819
  autogen: tril_indices.out
8741
8820
 
8742
8821
  - func: triu_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
8743
8822
  dispatch:
8744
8823
  CPU: triu_indices_cpu
8745
8824
  CUDA: triu_indices_cuda
8825
+ MPS: triu_indices_mps
8746
8826
  autogen: triu_indices.out
8747
8827
 
8748
8828
  - func: trace(Tensor self) -> Tensor
@@ -8858,6 +8938,7 @@
8858
8938
  variants: method, function
8859
8939
  dispatch:
8860
8940
  QuantizedCPU: eq_quantized_cpu
8941
+ NestedTensorCPU, NestedTensorCUDA: eq_tensor_nested
8861
8942
  tags: [core, pointwise]
8862
8943
 
8863
8944
  - func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
@@ -9196,11 +9277,13 @@
9196
9277
  - func: nonzero_static.out(Tensor self, *, int size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)
9197
9278
  dispatch:
9198
9279
  CPU: nonzero_static_out_cpu
9280
+ CUDA: nonzero_static_out_cuda
9199
9281
 
9200
9282
  - func: nonzero_static(Tensor self, *, int size, int fill_value=-1) -> Tensor
9201
9283
  variants: method, function
9202
9284
  dispatch:
9203
9285
  CPU: nonzero_static_cpu
9286
+ CUDA: nonzero_static_cuda
9204
9287
 
9205
9288
  - func: nonzero_numpy(Tensor self) -> Tensor[]
9206
9289
  variants: method, function
@@ -9502,7 +9585,7 @@
9502
9585
  variants: method, function
9503
9586
  dispatch:
9504
9587
  SparseCPU, SparseCUDA: erfinv_sparse
9505
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr
9588
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr
9506
9589
  tags: pointwise
9507
9590
 
9508
9591
  - func: erfinv_(Tensor(a!) self) -> Tensor(a!)
@@ -9511,7 +9594,7 @@
9511
9594
  variants: method
9512
9595
  dispatch:
9513
9596
  SparseCPU, SparseCUDA: erfinv_sparse_
9514
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_
9597
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_
9515
9598
  tags: pointwise
9516
9599
 
9517
9600
  - func: erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9522,7 +9605,7 @@
9522
9605
  CPU, CUDA: erfinv_out
9523
9606
  MPS: erfinv_out_mps
9524
9607
  SparseCPU, SparseCUDA: erfinv_sparse_out
9525
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_out
9608
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_out
9526
9609
  tags: pointwise
9527
9610
 
9528
9611
  - func: i0(Tensor self) -> Tensor
@@ -9539,7 +9622,7 @@
9539
9622
  structured: True
9540
9623
  structured_inherits: TensorIteratorBase
9541
9624
  dispatch:
9542
- CPU, CUDA: i0_out
9625
+ CPU, CUDA, MPS: i0_out
9543
9626
  tags: pointwise
9544
9627
 
9545
9628
  - func: sign(Tensor self) -> Tensor
@@ -9548,7 +9631,7 @@
9548
9631
  variants: function, method
9549
9632
  dispatch:
9550
9633
  SparseCPU, SparseCUDA: sign_sparse
9551
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr
9634
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr
9552
9635
  tags: [core, pointwise]
9553
9636
 
9554
9637
  - func: sign_(Tensor(a!) self) -> Tensor(a!)
@@ -9557,7 +9640,7 @@
9557
9640
  variants: method
9558
9641
  dispatch:
9559
9642
  SparseCPU, SparseCUDA: sign_sparse_
9560
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_
9643
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_
9561
9644
  tags: pointwise
9562
9645
 
9563
9646
  - func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9568,7 +9651,7 @@
9568
9651
  CPU, CUDA: sign_out
9569
9652
  MPS: sign_out_mps
9570
9653
  SparseCPU, SparseCUDA: sign_sparse_out
9571
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_out
9654
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_out
9572
9655
  tags: pointwise
9573
9656
 
9574
9657
  - func: signbit(Tensor self) -> Tensor
@@ -9576,7 +9659,7 @@
9576
9659
  structured_delegate: signbit.out
9577
9660
  dispatch:
9578
9661
  SparseCPU, SparseCUDA: signbit_sparse
9579
- SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr
9662
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr
9580
9663
  tags: pointwise
9581
9664
 
9582
9665
  - func: signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -9587,7 +9670,7 @@
9587
9670
  CUDA: signbit_out
9588
9671
  MPS: signbit_out_mps
9589
9672
  SparseCPU, SparseCUDA: signbit_sparse_out
9590
- SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr_out
9673
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr_out
9591
9674
  tags: pointwise
9592
9675
 
9593
9676
  - func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor
@@ -10038,9 +10121,10 @@
10038
10121
  - func: argsort.stable(Tensor self, *, bool stable, int dim=-1, bool descending=False) -> Tensor
10039
10122
  device_check: NoCheck # TensorIterator
10040
10123
  variants: method, function
10041
- dispatch:
10042
- CPU, CUDA, MPS: argsort_stable
10043
- autogen: argsort.stable_out
10124
+
10125
+ - func: argsort.stable_out(Tensor self, *, bool stable, int dim=-1, bool descending=False, Tensor(a!) out) -> Tensor(a!)
10126
+ device_check: NoCheck # TensorIterator
10127
+ variants: function
10044
10128
 
10045
10129
  - func: argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor
10046
10130
  variants: method, function
@@ -10114,7 +10198,7 @@
10114
10198
  - func: unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor
10115
10199
  variants: function
10116
10200
  dispatch:
10117
- CPU, CUDA: unfold_backward
10201
+ CPU, CUDA, MPS: unfold_backward
10118
10202
  autogen: unfold_backward.out
10119
10203
 
10120
10204
  - func: equal(Tensor self, Tensor other) -> bool
@@ -10220,7 +10304,7 @@
10220
10304
  CPU, CUDA: normal_
10221
10305
  MPS: normal_mps_
10222
10306
  Meta: normal_meta_
10223
- SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
10307
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: normal_sparse_csr_
10224
10308
  NestedTensorCPU, NestedTensorCUDA: normal_nested_
10225
10309
  autogen: normal.out
10226
10310
 
@@ -11044,6 +11128,22 @@
11044
11128
  CUDA: foreach_tensor_lerp_list_cuda_
11045
11129
  autogen: _foreach_lerp.Scalar_out
11046
11130
 
11131
+ - func: _foreach_lerp.ScalarList(Tensor[] self, Tensor[] tensors1, Scalar[] weight) -> Tensor[]
11132
+ device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
11133
+ variants: function
11134
+ dispatch:
11135
+ CompositeExplicitAutograd: foreach_tensor_lerp_scalarlist_kernel_slow
11136
+ CUDA: foreach_tensor_lerp_scalarlist_cuda
11137
+ autogen: _foreach_lerp.ScalarList_out
11138
+
11139
+ - func: _foreach_lerp_.ScalarList(Tensor(a!)[] self, Tensor[] tensors1, Scalar[] weight) -> ()
11140
+ device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
11141
+ variants: function
11142
+ dispatch:
11143
+ CompositeExplicitAutograd: foreach_tensor_lerp_scalarlist_kernel_slow_
11144
+ CUDA: foreach_tensor_lerp_scalarlist_cuda_
11145
+ autogen: _foreach_lerp.ScalarList_out
11146
+
11047
11147
  - func: _foreach_lgamma(Tensor[] self) -> Tensor[]
11048
11148
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
11049
11149
  variants: function
@@ -11232,6 +11332,21 @@
11232
11332
  CUDA: foreach_tensor_round_cuda_
11233
11333
  autogen: _foreach_round.out
11234
11334
 
11335
+ - func: _foreach_rsqrt(Tensor[] self) -> Tensor[]
11336
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
11337
+ variants: function
11338
+ dispatch:
11339
+ CompositeExplicitAutograd: foreach_tensor_rsqrt_slow
11340
+ CUDA: foreach_tensor_rsqrt_cuda
11341
+
11342
+ - func: _foreach_rsqrt_(Tensor(a!)[] self) -> ()
11343
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
11344
+ variants: function
11345
+ dispatch:
11346
+ CompositeExplicitAutograd: foreach_tensor_rsqrt_slow_
11347
+ CUDA: foreach_tensor_rsqrt_cuda_
11348
+ autogen: _foreach_rsqrt.out
11349
+
11235
11350
  - func: _foreach_sigmoid(Tensor[] self) -> Tensor[]
11236
11351
  device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
11237
11352
  variants: function
@@ -11675,6 +11790,7 @@
11675
11790
  structured_delegate: elu.out
11676
11791
  device_check: NoCheck # TensorIterator
11677
11792
  python_module: nn
11793
+ tags: pointwise
11678
11794
 
11679
11795
  - func: elu_backward.grad_input(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result, *, Tensor(a!) grad_input) -> Tensor(a!)
11680
11796
  structured: True
@@ -11748,6 +11864,7 @@
11748
11864
  python_module: nn
11749
11865
  dispatch:
11750
11866
  QuantizedCPU: hardsigmoid_quantized_cpu
11867
+ tags: pointwise
11751
11868
 
11752
11869
  - func: hardsigmoid_(Tensor(a!) self) -> Tensor(a!)
11753
11870
  structured_delegate: hardsigmoid.out
@@ -11779,7 +11896,7 @@
11779
11896
  dispatch:
11780
11897
  CPU, CUDA, MPS: hardtanh
11781
11898
  QuantizedCPU: hardtanh_quantized_cpu
11782
- tags: core
11899
+ tags: [pointwise, core]
11783
11900
 
11784
11901
  - func: hardtanh_backward.grad_input(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val, *, Tensor(a!) grad_input) -> Tensor(a!)
11785
11902
  python_module: nn
@@ -11903,19 +12020,20 @@
11903
12020
  CUDA: log_sigmoid_backward_cuda
11904
12021
  MPS: log_sigmoid_backward_mps
11905
12022
 
11906
- - func: rrelu_with_noise.out(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)
12023
+ - func: rrelu_with_noise.out(Tensor self, Tensor(b!) noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)
11907
12024
  python_module: nn
11908
12025
  tags: nondeterministic_seeded
11909
12026
  dispatch:
11910
12027
  CPU: rrelu_with_noise_out_cpu
11911
12028
  CUDA: rrelu_with_noise_out_cuda
11912
12029
 
11913
- - func: rrelu_with_noise(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
12030
+ - func: rrelu_with_noise(Tensor self, Tensor(b!) noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
11914
12031
  python_module: nn
11915
12032
  dispatch:
11916
12033
  CPU: rrelu_with_noise_cpu
11917
12034
  CUDA: rrelu_with_noise_cuda
11918
12035
  tags: nondeterministic_seeded
12036
+ autogen: rrelu_with_noise_functional
11919
12037
 
11920
12038
  - func: rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result) -> Tensor
11921
12039
  python_module: nn
@@ -11923,7 +12041,7 @@
11923
12041
  CompositeExplicitAutograd: rrelu_with_noise_backward
11924
12042
  autogen: rrelu_with_noise_backward.out
11925
12043
 
11926
- - func: rrelu_with_noise_(Tensor(a!) self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
12044
+ - func: rrelu_with_noise_(Tensor(a!) self, Tensor(b!) noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
11927
12045
  python_module: nn
11928
12046
  tags: nondeterministic_seeded
11929
12047
  dispatch:
@@ -11943,6 +12061,7 @@
11943
12061
  structured_delegate: softplus.out
11944
12062
  device_check: NoCheck # TensorIterator
11945
12063
  python_module: nn
12064
+ tags: pointwise
11946
12065
 
11947
12066
  - func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)
11948
12067
  structured: True
@@ -11969,6 +12088,7 @@
11969
12088
  structured_delegate: softshrink.out
11970
12089
  device_check: NoCheck # TensorIterator
11971
12090
  python_module: nn
12091
+ tags: pointwise
11972
12092
 
11973
12093
  - func: softshrink_backward.grad_input(Tensor grad_output, Tensor self, Scalar lambd, *, Tensor(a!) grad_input) -> Tensor(a!)
11974
12094
  structured: True
@@ -12613,6 +12733,7 @@
12613
12733
  dispatch:
12614
12734
  CPU: upsample_bicubic2d_out_cpu
12615
12735
  CUDA: upsample_bicubic2d_out_cuda
12736
+ MPS: upsample_bicubic2d_out_mps
12616
12737
 
12617
12738
  - func: upsample_bicubic2d(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
12618
12739
  python_module: nn
@@ -12624,6 +12745,7 @@
12624
12745
  dispatch:
12625
12746
  CPU: upsample_bicubic2d_backward_out_cpu
12626
12747
  CUDA: upsample_bicubic2d_backward_out_cuda
12748
+ MPS: upsample_bicubic2d_backward_out_mps
12627
12749
 
12628
12750
  - func: upsample_bicubic2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
12629
12751
  python_module: nn
@@ -13004,17 +13126,20 @@
13004
13126
  dispatch:
13005
13127
  CPU: im2col_out_cpu
13006
13128
  CUDA: im2col_out_cuda
13129
+ MPS: im2col_out_mps
13007
13130
 
13008
13131
  - func: im2col(Tensor self, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
13009
13132
  python_module: nn
13010
13133
  dispatch:
13011
13134
  CPU: im2col_cpu
13012
13135
  CUDA: im2col_cuda
13136
+ MPS: im2col_mps
13013
13137
 
13014
13138
  - func: isfinite(Tensor self) -> Tensor
13015
13139
  variants: function, method
13016
13140
  device_check: NoCheck
13017
13141
  device_guard: False
13142
+ tags: pointwise
13018
13143
 
13019
13144
  - func: isinf(Tensor self) -> Tensor
13020
13145
  variants: function, method
@@ -13022,9 +13147,10 @@
13022
13147
  device_guard: False
13023
13148
  dispatch:
13024
13149
  CompositeExplicitAutograd: isinf
13150
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_isinf
13025
13151
  SparseCPU, SparseCUDA: isinf_sparse
13026
13152
  SparseMeta: isinf_sparse_meta
13027
- SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
13153
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isinf_sparse_csr
13028
13154
  autogen: isinf.out
13029
13155
  tags: [core, pointwise]
13030
13156
 
@@ -13037,34 +13163,36 @@
13037
13163
  variants: function, method
13038
13164
  structured_delegate: isposinf.out
13039
13165
  dispatch:
13166
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_isposinf
13040
13167
  SparseCPU, SparseCUDA: isposinf_sparse
13041
- SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr
13168
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr
13042
13169
  tags: pointwise
13043
13170
 
13044
13171
  - func: isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
13045
13172
  structured: True
13046
13173
  structured_inherits: TensorIteratorBase
13047
13174
  dispatch:
13048
- CPU, CUDA: isposinf_out
13175
+ CPU, CUDA, MPS: isposinf_out
13049
13176
  SparseCPU, SparseCUDA: isposinf_sparse_out
13050
- SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr_out
13177
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr_out
13051
13178
  tags: pointwise
13052
13179
 
13053
13180
  - func: isneginf(Tensor self) -> Tensor
13054
13181
  variants: function, method
13055
13182
  structured_delegate: isneginf.out
13056
13183
  dispatch:
13184
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_isneginf
13057
13185
  SparseCPU, SparseCUDA: isneginf_sparse
13058
- SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr
13186
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr
13059
13187
  tags: pointwise
13060
13188
 
13061
13189
  - func: isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
13062
13190
  structured: True
13063
13191
  structured_inherits: TensorIteratorBase
13064
13192
  dispatch:
13065
- CPU, CUDA: isneginf_out
13193
+ CPU, CUDA, MPS: isneginf_out
13066
13194
  SparseCPU, SparseCUDA: isneginf_sparse_out
13067
- SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr_out
13195
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr_out
13068
13196
  tags: pointwise
13069
13197
 
13070
13198
  # NOTE [_add_batch_dim and _remove_batch_dim]
@@ -13075,7 +13203,7 @@
13075
13203
  variants: function
13076
13204
 
13077
13205
  # See NOTE [_add_batch_dim and _remove_batch_dim]
13078
- - func: _remove_batch_dim(Tensor self, int level, int batch_size, int out_dim) -> Tensor
13206
+ - func: _remove_batch_dim(Tensor self, int level, SymInt batch_size, int out_dim) -> Tensor
13079
13207
  variants: function
13080
13208
 
13081
13209
  ## Functions related to the `torch.special` namespace
@@ -13375,7 +13503,7 @@
13375
13503
  structured: True
13376
13504
  structured_inherits: TensorIteratorBase
13377
13505
  dispatch:
13378
- CPU, CUDA: special_i1_out
13506
+ CPU, CUDA, MPS: special_i1_out
13379
13507
  tags: pointwise
13380
13508
 
13381
13509
  - func: special_i1e(Tensor self) -> Tensor
@@ -13787,10 +13915,16 @@
13787
13915
  - func: linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots)
13788
13916
  python_module: linalg
13789
13917
  variants: function
13918
+ dispatch:
13919
+ CompositeImplicitAutograd: linalg_lu_factor
13920
+ MPS: linalg_lu_factor_mps
13790
13921
 
13791
13922
  - func: linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots)
13792
13923
  python_module: linalg
13793
13924
  variants: function
13925
+ dispatch:
13926
+ CompositeImplicitAutograd: linalg_lu_factor_out
13927
+ MPS: linalg_lu_factor_out_mps
13794
13928
 
13795
13929
  - func: linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)
13796
13930
  python_module: linalg
@@ -14176,6 +14310,11 @@
14176
14310
  - func: linalg_solve(Tensor A, Tensor B, *, bool left=True) -> Tensor
14177
14311
  python_module: linalg
14178
14312
 
14313
+ - func: _spsolve(Tensor A, Tensor B, *, bool left=True) -> Tensor
14314
+ python_module: sparse
14315
+ dispatch:
14316
+ SparseCsrCUDA: _sparse_csr_linear_solve
14317
+
14179
14318
  - func: linalg_solve.out(Tensor A, Tensor B, *, bool left=True, Tensor(a!) out) -> Tensor(a!)
14180
14319
  python_module: linalg
14181
14320
 
@@ -14352,7 +14491,7 @@
14352
14491
  CPU, CUDA: _segment_reduce_backward_kernel
14353
14492
  autogen: _segment_reduce_backward.out
14354
14493
 
14355
- - func: pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0) -> Tensor
14494
+ - func: pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0, str padding_side="right") -> Tensor
14356
14495
  python_module: nn
14357
14496
  variants: function
14358
14497
 
@@ -14458,7 +14597,7 @@
14458
14597
  variants: function
14459
14598
  dispatch:
14460
14599
  CompositeExplicitAutogradNonFunctional: select_copy_symint
14461
- SparseCsrCPU, SparseCsrCUDA: select_copy_sparse_csr
14600
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_copy_sparse_csr
14462
14601
  tags: view_copy
14463
14602
  autogen: select_copy.int_out
14464
14603
 
@@ -14648,11 +14787,18 @@
14648
14787
  variants: function
14649
14788
  dispatch:
14650
14789
  CUDA: _fbgemm_jagged_to_padded_dense_forward
14790
+ CPU: _jagged_to_padded_dense_forward_cpu
14651
14791
 
14652
14792
  - func: _padded_dense_to_jagged_forward(Tensor dense, Tensor[] offsets, SymInt? total_L=None) -> Tensor
14653
14793
  variants: function
14654
14794
  dispatch:
14655
14795
  CUDA: _fbgemm_dense_to_jagged_forward_symint
14796
+ CPU: _padded_dense_to_jagged_forward_cpu
14797
+
14798
+ - func: _nested_from_padded_tensor(Tensor padded, Tensor offsets, Tensor dummy, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None, SymInt? sum_S=None) -> Tensor
14799
+ variants: function
14800
+ device_check: NoCheck
14801
+ dispatch: {}
14656
14802
 
14657
14803
  - func: _nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor
14658
14804
  dispatch:
@@ -14660,6 +14806,11 @@
14660
14806
  NestedTensorCUDA: NestedTensor_softmax_dropout_cuda
14661
14807
  tags: nondeterministic_seeded
14662
14808
 
14809
+ - func: _safe_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
14810
+ dispatch:
14811
+ CompositeExplicitAutograd: _safe_softmax
14812
+ NestedTensorCPU, NestedTensorCUDA: _safe_softmax
14813
+
14663
14814
  # Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
14664
14815
  - func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
14665
14816
  variants: function
@@ -14674,24 +14825,29 @@
14674
14825
  CUDA, NestedTensorCUDA: native_multi_head_attention_cuda
14675
14826
  autogen: _native_multi_head_attention.out
14676
14827
 
14677
- - func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> Tensor
14828
+ - func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> Tensor
14678
14829
  python_module: nn
14679
14830
  variants: function
14680
14831
  autogen: scaled_dot_product_attention.out
14681
14832
  tags: nondeterministic_seeded
14682
14833
 
14683
14834
  # This aten function is kept so that we can test the choice function from Python
14684
- - func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> int
14835
+ - func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> int
14685
14836
  dispatch:
14686
14837
  Meta: _fused_sdp_choice_meta
14687
14838
  CPU, NestedTensorCPU: _fused_sdp_choice_cpp
14688
14839
  CUDA, NestedTensorCUDA: _fused_sdp_choice_cuda
14689
14840
  tags: nondeterministic_seeded
14690
14841
 
14691
- - func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
14842
+ - func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None, bool enable_gqa=False) -> (Tensor, Tensor)
14692
14843
  variants: function
14693
14844
  tags: nondeterministic_seeded
14694
14845
 
14846
+ - func: _scaled_dot_product_attention_math_for_mps(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
14847
+ dispatch:
14848
+ MPS: _scaled_dot_product_attention_math_mps
14849
+ tags: nondeterministic_seeded
14850
+
14695
14851
  - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14696
14852
  dispatch:
14697
14853
  CUDA: _scaled_dot_product_flash_attention_cuda
@@ -14703,6 +14859,11 @@
14703
14859
  CPU: _scaled_dot_product_flash_attention_cpu
14704
14860
  tags: nondeterministic_seeded
14705
14861
 
14862
+ - func: _scaled_dot_product_fused_attention_overrideable(Tensor query, Tensor key, Tensor value, Tensor? attn_bias=None, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14863
+ dispatch:
14864
+ CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable
14865
+ tags: nondeterministic_seeded
14866
+
14706
14867
  - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14707
14868
  device_check: NoCheck
14708
14869
  variants: function
@@ -14716,6 +14877,12 @@
14716
14877
  dispatch:
14717
14878
  CPU: _scaled_dot_product_flash_attention_cpu_backward
14718
14879
 
14880
+ - func: _scaled_dot_product_fused_attention_overrideable_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor attn_bias, bool[4] grad_input_mask, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value, Tensor grad_attn_bias)
14881
+ device_check: NoCheck
14882
+ variants: function
14883
+ dispatch:
14884
+ CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable_backward
14885
+
14719
14886
  - func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
14720
14887
  dispatch:
14721
14888
  CUDA: _scaled_dot_product_efficient_attention_cuda
@@ -14728,12 +14895,12 @@
14728
14895
  CUDA: _scaled_dot_product_efficient_attention_backward_cuda
14729
14896
  tags: nondeterministic_seeded
14730
14897
 
14731
- - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14898
+ - func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14732
14899
  dispatch:
14733
14900
  CUDA: _scaled_dot_product_cudnn_attention_cuda
14734
14901
  tags: nondeterministic_seeded
14735
14902
 
14736
- - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14903
+ - func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14737
14904
  dispatch:
14738
14905
  CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
14739
14906
  tags: nondeterministic_seeded
@@ -15563,6 +15730,7 @@
15563
15730
  dispatch:
15564
15731
  CPU: _fused_adam_kernel_cpu_
15565
15732
  CUDA: _fused_adam_kernel_cuda_
15733
+ MPS: _fused_adam_kernel_mps_
15566
15734
  autogen: _fused_adam, _fused_adam.out
15567
15735
 
15568
15736
  - func: _fused_adam_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15573,6 +15741,7 @@
15573
15741
  dispatch:
15574
15742
  CPU: _fused_adam_kernel_cpu_
15575
15743
  CUDA: _fused_adam_kernel_cuda_
15744
+ MPS: _fused_adam_kernel_mps_
15576
15745
  autogen: _fused_adam.tensor_lr, _fused_adam.tensor_lr_out
15577
15746
 
15578
15747
  - func: _fused_adamw_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15581,6 +15750,7 @@
15581
15750
  dispatch:
15582
15751
  CPU: _fused_adamw_kernel_cpu_
15583
15752
  CUDA: _fused_adamw_kernel_cuda_
15753
+ MPS: _fused_adamw_kernel_mps_
15584
15754
  autogen: _fused_adamw, _fused_adamw.out
15585
15755
 
15586
15756
  - func: _fused_adamw_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15591,6 +15761,7 @@
15591
15761
  dispatch:
15592
15762
  CPU: _fused_adamw_kernel_cpu_
15593
15763
  CUDA: _fused_adamw_kernel_cuda_
15764
+ MPS: _fused_adamw_kernel_mps_
15594
15765
  autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
15595
15766
 
15596
15767
  - func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15599,6 +15770,7 @@
15599
15770
  dispatch:
15600
15771
  CPU: _fused_sgd_kernel_cpu_
15601
15772
  CUDA: _fused_sgd_kernel_cuda_
15773
+ MPS: _fused_sgd_kernel_mps_
15602
15774
  autogen: _fused_sgd, _fused_sgd.out
15603
15775
 
15604
15776
  - func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
@@ -15609,6 +15781,7 @@
15609
15781
  dispatch:
15610
15782
  CPU: _fused_sgd_kernel_cpu_
15611
15783
  CUDA: _fused_sgd_kernel_cuda_
15784
+ MPS: _fused_sgd_kernel_mps_
15612
15785
  autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
15613
15786
 
15614
15787
  - func: _fused_adagrad_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor(d!)[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()