torch-rb 0.19.1 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +13 -10
- data/codegen/native_functions.yaml +336 -276
- data/ext/torch/device.cpp +3 -0
- data/ext/torch/ext.cpp +1 -2
- data/ext/torch/ivalue.cpp +2 -0
- data/ext/torch/nn.cpp +3 -1
- data/ext/torch/ruby_arg_parser.cpp +7 -3
- data/ext/torch/ruby_arg_parser.h +5 -2
- data/ext/torch/templates.h +18 -36
- data/ext/torch/tensor.cpp +11 -8
- data/ext/torch/torch.cpp +6 -3
- data/ext/torch/utils.h +3 -1
- data/lib/torch/distributions/distribution.rb +26 -0
- data/lib/torch/distributions/exponential_family.rb +6 -0
- data/lib/torch/distributions/normal.rb +22 -0
- data/lib/torch/distributions/utils.rb +10 -0
- data/lib/torch/nn/conv1d.rb +11 -3
- data/lib/torch/nn/conv2d.rb +11 -3
- data/lib/torch/nn/conv3d.rb +11 -3
- data/lib/torch/nn/convnd.rb +1 -1
- data/lib/torch/nn/embedding.rb +10 -3
- data/lib/torch/nn/embedding_bag.rb +10 -3
- data/lib/torch/nn/functional.rb +20 -6
- data/lib/torch/nn/functional_attention.rb +30 -15
- data/lib/torch/nn/multihead_attention.rb +17 -7
- data/lib/torch/nn/rnn_base.rb +10 -3
- data/lib/torch/nn/transformer.rb +19 -10
- data/lib/torch/nn/transformer_decoder_layer.rb +7 -4
- data/lib/torch/nn/transformer_encoder_layer.rb +7 -4
- data/lib/torch/tensor.rb +1 -0
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +7 -1
- metadata +8 -4
@@ -288,13 +288,13 @@
|
|
288
288
|
dispatch:
|
289
289
|
CPU: native_dropout_cpu
|
290
290
|
CUDA: native_dropout_cuda
|
291
|
-
NestedTensorCPU, NestedTensorCUDA: native_dropout_nested
|
291
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: native_dropout_nested
|
292
292
|
tags: [nondeterministic_seeded, core]
|
293
293
|
autogen: native_dropout.out
|
294
294
|
|
295
295
|
- func: native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor
|
296
296
|
dispatch:
|
297
|
-
CPU, NestedTensorCPU, NestedTensorCUDA: native_dropout_backward
|
297
|
+
CPU, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: native_dropout_backward
|
298
298
|
CUDA: native_dropout_backward_cuda
|
299
299
|
autogen: native_dropout_backward.out
|
300
300
|
tags: pointwise
|
@@ -342,7 +342,7 @@
|
|
342
342
|
CompositeExplicitAutograd: abs
|
343
343
|
SparseCPU, SparseCUDA: abs_sparse
|
344
344
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr
|
345
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs
|
345
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_abs
|
346
346
|
tags: [core, pointwise]
|
347
347
|
|
348
348
|
- func: abs_(Tensor(a!) self) -> Tensor(a!)
|
@@ -352,13 +352,12 @@
|
|
352
352
|
CompositeExplicitAutograd: abs_
|
353
353
|
SparseCPU, SparseCUDA: abs_sparse_
|
354
354
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_
|
355
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs_
|
355
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_abs_
|
356
356
|
|
357
357
|
- func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
358
358
|
device_check: NoCheck # TensorIterator
|
359
359
|
dispatch:
|
360
|
-
CPU, CUDA: abs_out
|
361
|
-
MPS: abs_out_mps
|
360
|
+
CPU, CUDA, MPS: abs_out
|
362
361
|
SparseCPU, SparseCUDA: abs_sparse_out
|
363
362
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_out
|
364
363
|
tags: pointwise
|
@@ -403,6 +402,7 @@
|
|
403
402
|
variants: function, method
|
404
403
|
dispatch:
|
405
404
|
CPU, CUDA: angle
|
405
|
+
MPS: angle_mps
|
406
406
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr
|
407
407
|
tags: pointwise
|
408
408
|
|
@@ -410,6 +410,7 @@
|
|
410
410
|
device_check: NoCheck # TensorIterator
|
411
411
|
dispatch:
|
412
412
|
CPU, CUDA: angle_out
|
413
|
+
MPS: angle_out_mps
|
413
414
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr_out
|
414
415
|
tags: pointwise
|
415
416
|
|
@@ -429,7 +430,7 @@
|
|
429
430
|
dispatch:
|
430
431
|
SparseCPU, SparseCUDA: sgn_sparse
|
431
432
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr
|
432
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn
|
433
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sgn
|
433
434
|
tags: pointwise
|
434
435
|
|
435
436
|
- func: sgn_(Tensor(a!) self) -> Tensor(a!)
|
@@ -438,7 +439,7 @@
|
|
438
439
|
dispatch:
|
439
440
|
SparseCPU, SparseCUDA: sgn_sparse_
|
440
441
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_
|
441
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn_
|
442
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sgn_
|
442
443
|
tags: pointwise
|
443
444
|
|
444
445
|
- func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -525,8 +526,7 @@
|
|
525
526
|
structured: True
|
526
527
|
structured_inherits: TensorIteratorBase
|
527
528
|
dispatch:
|
528
|
-
CPU, CUDA: acos_out
|
529
|
-
MPS: acos_out_mps
|
529
|
+
CPU, CUDA, MPS: acos_out
|
530
530
|
tags: pointwise
|
531
531
|
|
532
532
|
# arccos, alias of acos
|
@@ -558,7 +558,7 @@
|
|
558
558
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr
|
559
559
|
MkldnnCPU: mkldnn_add
|
560
560
|
ZeroTensor: add_zerotensor
|
561
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
561
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
562
562
|
tags: [core, pointwise]
|
563
563
|
|
564
564
|
- func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
@@ -569,7 +569,7 @@
|
|
569
569
|
SparseCPU, SparseCUDA, SparseMeta: add_sparse_
|
570
570
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr_
|
571
571
|
MkldnnCPU: mkldnn_add_
|
572
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
572
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
573
573
|
tags: pointwise
|
574
574
|
|
575
575
|
- func: add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
@@ -701,7 +701,7 @@
|
|
701
701
|
structured_delegate: all.out
|
702
702
|
variants: function, method
|
703
703
|
dispatch:
|
704
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_all
|
704
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_all
|
705
705
|
|
706
706
|
|
707
707
|
- func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
|
@@ -940,7 +940,7 @@
|
|
940
940
|
- func: as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a)
|
941
941
|
variants: function, method
|
942
942
|
dispatch:
|
943
|
-
ZeroTensor, CPU, CUDA: as_strided_tensorimpl
|
943
|
+
ZeroTensor, CPU, CUDA, MTIA: as_strided_tensorimpl
|
944
944
|
Meta: as_strided_tensorimpl_meta_symint
|
945
945
|
MPS: as_strided_tensorimpl_mps
|
946
946
|
QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
|
@@ -980,8 +980,7 @@
|
|
980
980
|
structured: True
|
981
981
|
structured_inherits: TensorIteratorBase
|
982
982
|
dispatch:
|
983
|
-
CPU, CUDA: asin_out
|
984
|
-
MPS: asin_out_mps
|
983
|
+
CPU, CUDA, MPS: asin_out
|
985
984
|
SparseCPU, SparseCUDA: asin_sparse_out
|
986
985
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_out
|
987
986
|
tags: pointwise
|
@@ -1018,8 +1017,7 @@
|
|
1018
1017
|
structured: True
|
1019
1018
|
structured_inherits: TensorIteratorBase
|
1020
1019
|
dispatch:
|
1021
|
-
CPU, CUDA: atan_out
|
1022
|
-
MPS: atan_out_mps
|
1020
|
+
CPU, CUDA, MPS: atan_out
|
1023
1021
|
SparseCPU, SparseCUDA: atan_sparse_out
|
1024
1022
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_out
|
1025
1023
|
tags: pointwise
|
@@ -1071,6 +1069,16 @@
|
|
1071
1069
|
XPU: baddbmm_out_xpu
|
1072
1070
|
SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
|
1073
1071
|
|
1072
|
+
- func: baddbmm.dtype(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
1073
|
+
variants: function
|
1074
|
+
dispatch:
|
1075
|
+
CUDA: _baddbmm_dtype_cuda
|
1076
|
+
|
1077
|
+
- func: baddbmm.dtype_out(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
1078
|
+
variants: function
|
1079
|
+
dispatch:
|
1080
|
+
CUDA: _baddbmm_out_dtype_cuda
|
1081
|
+
|
1074
1082
|
- func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
1075
1083
|
dispatch:
|
1076
1084
|
CompositeExplicitAutograd: bartlett_window
|
@@ -1183,7 +1191,7 @@
|
|
1183
1191
|
CompositeExplicitAutograd: binary_cross_entropy_with_logits
|
1184
1192
|
autogen: binary_cross_entropy_with_logits.out
|
1185
1193
|
|
1186
|
-
- func: bincount(Tensor self, Tensor? weights=None,
|
1194
|
+
- func: bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor
|
1187
1195
|
variants: function, method
|
1188
1196
|
dispatch:
|
1189
1197
|
CPU: _bincount_cpu
|
@@ -1209,8 +1217,7 @@
|
|
1209
1217
|
structured: True
|
1210
1218
|
structured_inherits: TensorIteratorBase
|
1211
1219
|
dispatch:
|
1212
|
-
CPU, CUDA: bitwise_not_out
|
1213
|
-
MPS: bitwise_not_out_mps
|
1220
|
+
CPU, CUDA, MPS, MTIA: bitwise_not_out
|
1214
1221
|
tags: pointwise
|
1215
1222
|
|
1216
1223
|
- func: copysign.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1260,7 +1267,7 @@
|
|
1260
1267
|
variants: function, method
|
1261
1268
|
dispatch:
|
1262
1269
|
CompositeExplicitAutograd: logical_not
|
1263
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not
|
1270
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_logical_not
|
1264
1271
|
tags: [core, pointwise]
|
1265
1272
|
|
1266
1273
|
- func: logical_not_(Tensor(a!) self) -> Tensor(a!)
|
@@ -1268,7 +1275,7 @@
|
|
1268
1275
|
variants: method
|
1269
1276
|
dispatch:
|
1270
1277
|
CompositeExplicitAutograd: logical_not_
|
1271
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not_
|
1278
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_logical_not_
|
1272
1279
|
tags: pointwise
|
1273
1280
|
|
1274
1281
|
- func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1316,7 +1323,7 @@
|
|
1316
1323
|
- func: logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
1317
1324
|
device_check: NoCheck # TensorIterator
|
1318
1325
|
dispatch:
|
1319
|
-
CPU, CUDA: logical_and_out
|
1326
|
+
CPU, CUDA, MTIA: logical_and_out
|
1320
1327
|
MPS: logical_and_out_mps
|
1321
1328
|
tags: pointwise
|
1322
1329
|
|
@@ -1337,7 +1344,7 @@
|
|
1337
1344
|
- func: logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
1338
1345
|
device_check: NoCheck # TensorIterator
|
1339
1346
|
dispatch:
|
1340
|
-
CPU, CUDA: logical_or_out
|
1347
|
+
CPU, CUDA, MTIA: logical_or_out
|
1341
1348
|
MPS: logical_or_out_mps
|
1342
1349
|
tags: pointwise
|
1343
1350
|
|
@@ -1373,6 +1380,16 @@
|
|
1373
1380
|
SparseCUDA: bmm_out_sparse_cuda
|
1374
1381
|
SparseCsrCUDA: bmm_out_sparse_csr_cuda
|
1375
1382
|
|
1383
|
+
- func: bmm.dtype(Tensor self, Tensor mat2, ScalarType out_dtype) -> Tensor
|
1384
|
+
variants: function
|
1385
|
+
dispatch:
|
1386
|
+
CUDA: _bmm_dtype_cuda
|
1387
|
+
|
1388
|
+
- func: bmm.dtype_out(Tensor self, Tensor mat2, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
|
1389
|
+
variants: function
|
1390
|
+
dispatch:
|
1391
|
+
CUDA: _bmm_out_dtype_cuda
|
1392
|
+
|
1376
1393
|
- func: broadcast_tensors(Tensor[] tensors) -> Tensor[]
|
1377
1394
|
device_check: NoCheck
|
1378
1395
|
device_guard: False
|
@@ -1392,7 +1409,7 @@
|
|
1392
1409
|
dispatch:
|
1393
1410
|
SparseCPU, SparseCUDA: cat_sparse
|
1394
1411
|
QuantizedCPU: cat_quantized_cpu
|
1395
|
-
NestedTensorCPU, NestedTensorCUDA: cat_nested
|
1412
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: cat_nested
|
1396
1413
|
tags: core
|
1397
1414
|
|
1398
1415
|
- func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1456,8 +1473,7 @@
|
|
1456
1473
|
structured: True
|
1457
1474
|
structured_inherits: TensorIteratorBase
|
1458
1475
|
dispatch:
|
1459
|
-
CPU, CUDA: ceil_out
|
1460
|
-
MPS: ceil_out_mps
|
1476
|
+
CPU, CUDA, MPS: ceil_out
|
1461
1477
|
SparseCPU, SparseCUDA: ceil_sparse_out
|
1462
1478
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
|
1463
1479
|
tags: pointwise
|
@@ -1481,7 +1497,7 @@
|
|
1481
1497
|
device_guard: False
|
1482
1498
|
dispatch:
|
1483
1499
|
CompositeImplicitAutograd: chunk
|
1484
|
-
NestedTensorCPU, NestedTensorCUDA: chunk_nested_tensor
|
1500
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: chunk_nested_tensor
|
1485
1501
|
|
1486
1502
|
- func: tensor_split.sections(Tensor(a -> *) self, SymInt sections, int dim=0) -> Tensor(a)[]
|
1487
1503
|
variants: function, method
|
@@ -1528,7 +1544,7 @@
|
|
1528
1544
|
structured: True
|
1529
1545
|
structured_inherits: TensorIteratorBase
|
1530
1546
|
dispatch:
|
1531
|
-
CPU, CUDA: clamp_out
|
1547
|
+
CPU, CUDA, MTIA: clamp_out
|
1532
1548
|
MPS: clamp_out_mps
|
1533
1549
|
tags: pointwise
|
1534
1550
|
|
@@ -1568,7 +1584,7 @@
|
|
1568
1584
|
structured: True
|
1569
1585
|
structured_inherits: TensorIteratorBase
|
1570
1586
|
dispatch:
|
1571
|
-
CPU, CUDA: clamp_max_out
|
1587
|
+
CPU, CUDA, MTIA: clamp_max_out
|
1572
1588
|
MPS: clamp_max_out_mps
|
1573
1589
|
tags: pointwise
|
1574
1590
|
|
@@ -1608,7 +1624,7 @@
|
|
1608
1624
|
structured: True
|
1609
1625
|
structured_inherits: TensorIteratorBase
|
1610
1626
|
dispatch:
|
1611
|
-
CPU, CUDA: clamp_min_out
|
1627
|
+
CPU, CUDA, MTIA: clamp_min_out
|
1612
1628
|
MPS: clamp_min_out_mps
|
1613
1629
|
tags: pointwise
|
1614
1630
|
|
@@ -1657,8 +1673,7 @@
|
|
1657
1673
|
|
1658
1674
|
- func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
|
1659
1675
|
dispatch:
|
1660
|
-
CPU, CUDA: complex_out
|
1661
|
-
MPS: complex_out_mps
|
1676
|
+
CPU, CUDA, MPS: complex_out
|
1662
1677
|
|
1663
1678
|
- func: polar(Tensor abs, Tensor angle) -> Tensor
|
1664
1679
|
variants: function
|
@@ -1667,8 +1682,7 @@
|
|
1667
1682
|
|
1668
1683
|
- func: polar.out(Tensor abs, Tensor angle, *, Tensor(a!) out) -> Tensor(a!)
|
1669
1684
|
dispatch:
|
1670
|
-
CPU, CUDA: polar_out
|
1671
|
-
MPS: polar_out_mps
|
1685
|
+
CPU, CUDA, MPS: polar_out
|
1672
1686
|
|
1673
1687
|
- func: constant_pad_nd(Tensor self, SymInt[] pad, Scalar value=0) -> Tensor
|
1674
1688
|
variants: function
|
@@ -1780,7 +1794,7 @@
|
|
1780
1794
|
SparseCPU, SparseCUDA: copy_sparse_wrapper_
|
1781
1795
|
CompositeExplicitAutograd: copy_
|
1782
1796
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: copy_sparse_compressed_
|
1783
|
-
NestedTensorCPU, NestedTensorCUDA: copy_nested_
|
1797
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: copy_nested_
|
1784
1798
|
autogen: copy.out
|
1785
1799
|
|
1786
1800
|
- func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
|
@@ -1800,7 +1814,7 @@
|
|
1800
1814
|
variants: function, method
|
1801
1815
|
structured_delegate: cos.out
|
1802
1816
|
dispatch:
|
1803
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_cos
|
1817
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_cos
|
1804
1818
|
tags: [core, pointwise]
|
1805
1819
|
|
1806
1820
|
- func: cos_(Tensor(a!) self) -> Tensor(a!)
|
@@ -1814,8 +1828,7 @@
|
|
1814
1828
|
structured: True
|
1815
1829
|
structured_inherits: TensorIteratorBase
|
1816
1830
|
dispatch:
|
1817
|
-
CPU, CUDA: cos_out
|
1818
|
-
MPS: cos_out_mps
|
1831
|
+
CPU, CUDA, MPS, MTIA: cos_out
|
1819
1832
|
tags: pointwise
|
1820
1833
|
|
1821
1834
|
- func: cosh(Tensor self) -> Tensor
|
@@ -1835,8 +1848,7 @@
|
|
1835
1848
|
structured: True
|
1836
1849
|
structured_inherits: TensorIteratorBase
|
1837
1850
|
dispatch:
|
1838
|
-
CPU, CUDA: cosh_out
|
1839
|
-
MPS: cosh_out_mps
|
1851
|
+
CPU, CUDA, MPS: cosh_out
|
1840
1852
|
tags: pointwise
|
1841
1853
|
|
1842
1854
|
- func: cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor
|
@@ -1950,6 +1962,7 @@
|
|
1950
1962
|
dispatch:
|
1951
1963
|
CPU: cummax_helper_cpu
|
1952
1964
|
CUDA: cummax_helper_cuda
|
1965
|
+
MPS: cummax_helper_mps
|
1953
1966
|
|
1954
1967
|
- func: cummin(Tensor self, int dim) -> (Tensor values, Tensor indices)
|
1955
1968
|
device_check: NoCheck # TensorIterator
|
@@ -1974,6 +1987,7 @@
|
|
1974
1987
|
dispatch:
|
1975
1988
|
CPU: cummin_helper_cpu
|
1976
1989
|
CUDA: cummin_helper_cuda
|
1990
|
+
MPS: cummin_helper_mps
|
1977
1991
|
|
1978
1992
|
- func: cummaxmin_backward(Tensor grad, Tensor input, Tensor indices, int dim) -> Tensor
|
1979
1993
|
variants: function
|
@@ -2138,7 +2152,7 @@
|
|
2138
2152
|
dispatch:
|
2139
2153
|
SparseCPU, SparseCUDA: div_sparse
|
2140
2154
|
ZeroTensor: div_zerotensor
|
2141
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_div_Tensor
|
2155
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_div_Tensor
|
2142
2156
|
tags: [core, pointwise]
|
2143
2157
|
|
2144
2158
|
- func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -2154,8 +2168,7 @@
|
|
2154
2168
|
structured: True
|
2155
2169
|
structured_inherits: TensorIteratorBase
|
2156
2170
|
dispatch:
|
2157
|
-
CPU, CUDA: div_out
|
2158
|
-
MPS: div_out_mps
|
2171
|
+
CPU, CUDA, MPS: div_out
|
2159
2172
|
SparseCPU, SparseCUDA: div_out_sparse_zerodim
|
2160
2173
|
tags: pointwise
|
2161
2174
|
|
@@ -2180,8 +2193,7 @@
|
|
2180
2193
|
structured: True
|
2181
2194
|
structured_inherits: TensorIteratorBase
|
2182
2195
|
dispatch:
|
2183
|
-
CPU, CUDA: div_out_mode
|
2184
|
-
MPS: div_out_mode_mps
|
2196
|
+
CPU, CUDA, MPS: div_out_mode
|
2185
2197
|
SparseCPU, SparseCUDA: div_out_sparse_zerodim
|
2186
2198
|
tags: pointwise
|
2187
2199
|
|
@@ -2191,7 +2203,7 @@
|
|
2191
2203
|
variants: function, method
|
2192
2204
|
dispatch:
|
2193
2205
|
CompositeExplicitAutograd: div
|
2194
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_div_Scalar
|
2206
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_div_Scalar
|
2195
2207
|
tags: [core, pointwise]
|
2196
2208
|
|
2197
2209
|
- func: div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
@@ -2291,7 +2303,7 @@
|
|
2291
2303
|
- func: embedding(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor
|
2292
2304
|
dispatch:
|
2293
2305
|
CompositeExplicitAutograd: embedding_symint
|
2294
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_embedding
|
2306
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_embedding
|
2295
2307
|
autogen: embedding.out
|
2296
2308
|
tags: core
|
2297
2309
|
|
@@ -2497,7 +2509,7 @@
|
|
2497
2509
|
QuantizedCPU, QuantizedCUDA: empty_like_quantized
|
2498
2510
|
SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
|
2499
2511
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
|
2500
|
-
NestedTensorCPU, NestedTensorCUDA: empty_like_nested
|
2512
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: empty_like_nested
|
2501
2513
|
autogen: empty_like.out
|
2502
2514
|
|
2503
2515
|
- func: empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -2533,8 +2545,7 @@
|
|
2533
2545
|
structured: True
|
2534
2546
|
structured_inherits: TensorIteratorBase
|
2535
2547
|
dispatch:
|
2536
|
-
CPU, CUDA: erf_out
|
2537
|
-
MPS: erf_out_mps
|
2548
|
+
CPU, CUDA, MPS, MTIA: erf_out
|
2538
2549
|
SparseCPU, SparseCUDA: erf_sparse_out
|
2539
2550
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_out
|
2540
2551
|
tags: pointwise
|
@@ -2556,7 +2567,7 @@
|
|
2556
2567
|
structured: True
|
2557
2568
|
structured_inherits: TensorIteratorBase
|
2558
2569
|
dispatch:
|
2559
|
-
CPU, CUDA: erfc_out
|
2570
|
+
CPU, CUDA, MPS: erfc_out
|
2560
2571
|
tags: pointwise
|
2561
2572
|
|
2562
2573
|
- func: exp(Tensor self) -> Tensor
|
@@ -2576,8 +2587,7 @@
|
|
2576
2587
|
structured: True
|
2577
2588
|
structured_inherits: TensorIteratorBase
|
2578
2589
|
dispatch:
|
2579
|
-
CPU, CUDA: exp_out
|
2580
|
-
MPS: exp_out_mps
|
2590
|
+
CPU, CUDA, MPS, MTIA: exp_out
|
2581
2591
|
tags: pointwise
|
2582
2592
|
|
2583
2593
|
- func: exp2(Tensor self) -> Tensor
|
@@ -2594,8 +2604,7 @@
|
|
2594
2604
|
structured: True
|
2595
2605
|
structured_inherits: TensorIteratorBase
|
2596
2606
|
dispatch:
|
2597
|
-
CPU, CUDA: exp2_out
|
2598
|
-
MPS: exp2_out_mps
|
2607
|
+
CPU, CUDA, MPS: exp2_out
|
2599
2608
|
tags: pointwise
|
2600
2609
|
|
2601
2610
|
- func: expm1(Tensor self) -> Tensor
|
@@ -2621,8 +2630,7 @@
|
|
2621
2630
|
structured: True
|
2622
2631
|
structured_inherits: TensorIteratorBase
|
2623
2632
|
dispatch:
|
2624
|
-
CPU, CUDA: expm1_out
|
2625
|
-
MPS: expm1_out_mps
|
2633
|
+
CPU, CUDA, MPS: expm1_out
|
2626
2634
|
SparseCPU, SparseCUDA: expm1_sparse_out
|
2627
2635
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_out
|
2628
2636
|
tags: pointwise
|
@@ -2703,7 +2711,7 @@
|
|
2703
2711
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
2704
2712
|
Meta: fill_meta_
|
2705
2713
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: fill_sparse_csr_
|
2706
|
-
NestedTensorCPU, NestedTensorCUDA: fill_nested_
|
2714
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: fill_nested_
|
2707
2715
|
autogen: fill.Scalar_out
|
2708
2716
|
|
2709
2717
|
- func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
|
@@ -2714,7 +2722,7 @@
|
|
2714
2722
|
MPS: fill_tensor_mps_
|
2715
2723
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
2716
2724
|
Meta: fill_meta_
|
2717
|
-
NestedTensorCPU, NestedTensorCUDA: fill_nested_
|
2725
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: fill_nested_
|
2718
2726
|
autogen: fill.Tensor_out
|
2719
2727
|
|
2720
2728
|
- func: floor(Tensor self) -> Tensor
|
@@ -2740,8 +2748,7 @@
|
|
2740
2748
|
structured: True
|
2741
2749
|
structured_inherits: TensorIteratorBase
|
2742
2750
|
dispatch:
|
2743
|
-
CPU, CUDA: floor_out
|
2744
|
-
MPS: floor_out_mps
|
2751
|
+
CPU, CUDA, MPS: floor_out
|
2745
2752
|
SparseCPU, SparseCUDA: floor_sparse_out
|
2746
2753
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
|
2747
2754
|
tags: pointwise
|
@@ -2750,23 +2757,20 @@
|
|
2750
2757
|
device_check: NoCheck # TensorIterator
|
2751
2758
|
variants: function, method
|
2752
2759
|
dispatch:
|
2753
|
-
CPU, CUDA: floor_divide
|
2754
|
-
MPS: floor_divide_mps
|
2760
|
+
CPU, CUDA, MPS: floor_divide
|
2755
2761
|
SparseCPU, SparseCUDA: floor_divide_sparse
|
2756
2762
|
|
2757
2763
|
- func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
2758
2764
|
device_check: NoCheck # TensorIterator
|
2759
2765
|
variants: method
|
2760
2766
|
dispatch:
|
2761
|
-
CPU, CUDA: floor_divide_
|
2762
|
-
MPS: floor_divide_mps_
|
2767
|
+
CPU, CUDA, MPS: floor_divide_
|
2763
2768
|
SparseCPU, SparseCUDA: floor_divide_sparse_
|
2764
2769
|
|
2765
2770
|
- func: floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
2766
2771
|
device_check: NoCheck # TensorIterator
|
2767
2772
|
dispatch:
|
2768
|
-
CPU, CUDA: floor_divide_out
|
2769
|
-
MPS: floor_divide_out_mps
|
2773
|
+
CPU, CUDA, MPS: floor_divide_out
|
2770
2774
|
SparseCPU, SparseCUDA: floor_divide_out_sparse_zerodim
|
2771
2775
|
|
2772
2776
|
- func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -3000,6 +3004,7 @@
|
|
3000
3004
|
CPU: _fft_r2c_mkl
|
3001
3005
|
CUDA: _fft_r2c_cufft
|
3002
3006
|
MPS: _fft_r2c_mps
|
3007
|
+
tags: core
|
3003
3008
|
|
3004
3009
|
- func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
|
3005
3010
|
variants: function
|
@@ -3100,6 +3105,7 @@
|
|
3100
3105
|
- dim -> int dim
|
3101
3106
|
dispatch:
|
3102
3107
|
CPU, CUDA: index_copy_out
|
3108
|
+
MPS: index_copy_out_mps
|
3103
3109
|
|
3104
3110
|
- func: index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
|
3105
3111
|
variants: method
|
@@ -3170,7 +3176,7 @@
|
|
3170
3176
|
variants: function
|
3171
3177
|
structured: True
|
3172
3178
|
dispatch:
|
3173
|
-
CPU, CUDA: isin_Tensor_Scalar_out
|
3179
|
+
CPU, CUDA, MPS: isin_Tensor_Scalar_out
|
3174
3180
|
|
3175
3181
|
- func: isin.Tensor_Scalar(Tensor elements, Scalar test_element, *, bool assume_unique=False, bool invert=False) -> Tensor
|
3176
3182
|
variants: function
|
@@ -3181,6 +3187,7 @@
|
|
3181
3187
|
structured: True
|
3182
3188
|
dispatch:
|
3183
3189
|
CPU, CUDA: isin_Scalar_Tensor_out
|
3190
|
+
MPS: isin_Scalar_Tensor_out_mps
|
3184
3191
|
|
3185
3192
|
- func: isin.Scalar_Tensor(Scalar element, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor
|
3186
3193
|
variants: function
|
@@ -3191,8 +3198,8 @@
|
|
3191
3198
|
device_check: NoCheck
|
3192
3199
|
device_guard: False
|
3193
3200
|
dispatch:
|
3194
|
-
CPU, CUDA, MPS: isnan
|
3195
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isnan
|
3201
|
+
CPU, CUDA, MPS, MTIA: isnan
|
3202
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isnan
|
3196
3203
|
SparseCPU, SparseCUDA: isnan_sparse
|
3197
3204
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isnan_sparse_csr
|
3198
3205
|
autogen: isnan.out
|
@@ -3243,7 +3250,7 @@
|
|
3243
3250
|
device_check: NoCheck
|
3244
3251
|
device_guard: False
|
3245
3252
|
dispatch:
|
3246
|
-
NestedTensorCPU, NestedTensorCUDA: nested_is_same_size
|
3253
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_is_same_size
|
3247
3254
|
CompositeExplicitAutograd: is_same_size
|
3248
3255
|
|
3249
3256
|
- func: is_signed(Tensor self) -> bool
|
@@ -3265,20 +3272,20 @@
|
|
3265
3272
|
|
3266
3273
|
- func: kron.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
3267
3274
|
|
3268
|
-
- func: kthvalue(Tensor self,
|
3275
|
+
- func: kthvalue(Tensor self, SymInt k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
|
3269
3276
|
variants: function, method
|
3270
3277
|
dispatch:
|
3271
3278
|
CompositeExplicitAutograd: kthvalue
|
3272
3279
|
|
3273
|
-
- func: kthvalue.values(Tensor self,
|
3280
|
+
- func: kthvalue.values(Tensor self, SymInt k, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
3274
3281
|
dispatch:
|
3275
3282
|
CPU: kthvalue_out_cpu
|
3276
3283
|
CUDA: kthvalue_out_cuda
|
3277
3284
|
|
3278
|
-
- func: kthvalue.dimname(Tensor self,
|
3285
|
+
- func: kthvalue.dimname(Tensor self, SymInt k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
3279
3286
|
variants: function, method
|
3280
3287
|
|
3281
|
-
- func: kthvalue.dimname_out(Tensor self,
|
3288
|
+
- func: kthvalue.dimname_out(Tensor self, SymInt k, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
3282
3289
|
|
3283
3290
|
- func: layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor
|
3284
3291
|
dispatch:
|
@@ -3290,7 +3297,7 @@
|
|
3290
3297
|
CUDA: layer_norm_cuda
|
3291
3298
|
MPS: layer_norm_mps
|
3292
3299
|
CompositeExplicitAutograd: math_native_layer_norm
|
3293
|
-
NestedTensorCPU, NestedTensorCUDA: nested_layer_norm
|
3300
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_layer_norm
|
3294
3301
|
autogen: native_layer_norm.out
|
3295
3302
|
tags: core
|
3296
3303
|
|
@@ -3299,7 +3306,7 @@
|
|
3299
3306
|
CPU: layer_norm_backward_cpu
|
3300
3307
|
CUDA: layer_norm_backward_cuda
|
3301
3308
|
MPS: layer_norm_backward_mps
|
3302
|
-
NestedTensorCPU, NestedTensorCUDA: layer_norm_backward_nested
|
3309
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: layer_norm_backward_nested
|
3303
3310
|
autogen: native_layer_norm_backward.out
|
3304
3311
|
tags: core
|
3305
3312
|
|
@@ -3307,6 +3314,10 @@
|
|
3307
3314
|
dispatch:
|
3308
3315
|
CompositeImplicitAutograd: rms_norm_symint
|
3309
3316
|
|
3317
|
+
- func: _fused_rms_norm(Tensor input, int normalized_shape_ndim, Tensor weight, float eps) -> Tensor
|
3318
|
+
dispatch:
|
3319
|
+
MPS: _fused_rms_norm_mps
|
3320
|
+
|
3310
3321
|
- func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
|
3311
3322
|
variants: function, method
|
3312
3323
|
dispatch:
|
@@ -3323,7 +3334,7 @@
|
|
3323
3334
|
|
3324
3335
|
- func: nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)
|
3325
3336
|
dispatch:
|
3326
|
-
CPU, CUDA: nan_to_num_out
|
3337
|
+
CPU, CUDA, MTIA: nan_to_num_out
|
3327
3338
|
MPS: nan_to_num_out_mps
|
3328
3339
|
SparseCPU, SparseCUDA: nan_to_num_sparse_out
|
3329
3340
|
tags: pointwise
|
@@ -3332,12 +3343,12 @@
|
|
3332
3343
|
python_module: nn
|
3333
3344
|
dispatch:
|
3334
3345
|
CompositeImplicitAutograd: linear
|
3335
|
-
NestedTensorCPU, NestedTensorCUDA: nested_linear
|
3346
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_linear
|
3336
3347
|
MPS: _mps_linear
|
3337
3348
|
|
3338
3349
|
- func: linear_backward(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
3339
3350
|
dispatch:
|
3340
|
-
NestedTensorCPU, NestedTensorCUDA: nested_linear_backward
|
3351
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_linear_backward
|
3341
3352
|
MPS: mps_linear_backward
|
3342
3353
|
autogen: linear_backward.out
|
3343
3354
|
|
@@ -3371,7 +3382,7 @@
|
|
3371
3382
|
dispatch:
|
3372
3383
|
CUDA: _cslt_compress
|
3373
3384
|
|
3374
|
-
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0, int split_k=1,
|
3385
|
+
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0, int split_k=1, int split_k_mode=-1) -> Tensor
|
3375
3386
|
dispatch:
|
3376
3387
|
CUDA: _cslt_sparse_mm
|
3377
3388
|
tags: needs_fixed_stride_order
|
@@ -3496,8 +3507,7 @@
|
|
3496
3507
|
structured: True
|
3497
3508
|
structured_inherits: TensorIteratorBase
|
3498
3509
|
dispatch:
|
3499
|
-
CPU, CUDA: log_out
|
3500
|
-
MPS: log_out_mps
|
3510
|
+
CPU, CUDA, MPS, MTIA: log_out
|
3501
3511
|
tags: pointwise
|
3502
3512
|
|
3503
3513
|
- func: log10(Tensor self) -> Tensor
|
@@ -3517,8 +3527,7 @@
|
|
3517
3527
|
structured: True
|
3518
3528
|
structured_inherits: TensorIteratorBase
|
3519
3529
|
dispatch:
|
3520
|
-
CPU, CUDA: log10_out
|
3521
|
-
MPS: log10_out_mps
|
3530
|
+
CPU, CUDA, MPS: log10_out
|
3522
3531
|
tags: pointwise
|
3523
3532
|
|
3524
3533
|
- func: log1p(Tensor self) -> Tensor
|
@@ -3544,8 +3553,7 @@
|
|
3544
3553
|
structured: True
|
3545
3554
|
structured_inherits: TensorIteratorBase
|
3546
3555
|
dispatch:
|
3547
|
-
CPU, CUDA: log1p_out
|
3548
|
-
MPS: log1p_out_mps
|
3556
|
+
CPU, CUDA, MPS: log1p_out
|
3549
3557
|
SparseCPU, SparseCUDA: log1p_sparse_out
|
3550
3558
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_out
|
3551
3559
|
tags: pointwise
|
@@ -3567,8 +3575,7 @@
|
|
3567
3575
|
structured: True
|
3568
3576
|
structured_inherits: TensorIteratorBase
|
3569
3577
|
dispatch:
|
3570
|
-
CPU, CUDA: log2_out
|
3571
|
-
MPS: log2_out_mps
|
3578
|
+
CPU, CUDA, MPS, MTIA: log2_out
|
3572
3579
|
tags: pointwise
|
3573
3580
|
|
3574
3581
|
- func: logaddexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -3715,6 +3722,7 @@
|
|
3715
3722
|
dispatch:
|
3716
3723
|
CPU: log_softmax_cpu_out
|
3717
3724
|
CUDA: log_softmax_cuda_out
|
3725
|
+
MTIA: log_softmax_mtia_out
|
3718
3726
|
MPS: log_softmax_mps_out
|
3719
3727
|
|
3720
3728
|
- func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
|
@@ -3725,6 +3733,7 @@
|
|
3725
3733
|
dispatch:
|
3726
3734
|
CPU: log_softmax_backward_cpu_out
|
3727
3735
|
CUDA: log_softmax_backward_cuda_out
|
3736
|
+
MTIA: log_softmax_backward_mtia_out
|
3728
3737
|
MPS: log_softmax_backward_mps_out
|
3729
3738
|
|
3730
3739
|
- func: _logcumsumexp(Tensor self, int dim) -> Tensor
|
@@ -3776,17 +3785,17 @@
|
|
3776
3785
|
variants: function, method
|
3777
3786
|
dispatch:
|
3778
3787
|
CompositeImplicitAutograd: matmul
|
3779
|
-
NestedTensorCPU, NestedTensorCUDA: matmul_nested
|
3788
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_nested
|
3780
3789
|
|
3781
3790
|
- func: matmul_backward(Tensor grad, Tensor self, Tensor other, bool[2] mask) -> (Tensor, Tensor)
|
3782
3791
|
dispatch:
|
3783
|
-
NestedTensorCPU, NestedTensorCUDA: matmul_backward_nested
|
3792
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_backward_nested
|
3784
3793
|
autogen: matmul_backward.out
|
3785
3794
|
|
3786
3795
|
- func: matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
3787
3796
|
dispatch:
|
3788
3797
|
CompositeImplicitAutograd: matmul_out
|
3789
|
-
NestedTensorCPU, NestedTensorCUDA: matmul_out_nested
|
3798
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_out_nested
|
3790
3799
|
|
3791
3800
|
# Alias to linalg.matrix_power
|
3792
3801
|
- func: matrix_power(Tensor self, int n) -> Tensor
|
@@ -3848,7 +3857,7 @@
|
|
3848
3857
|
precomputed:
|
3849
3858
|
- dim -> int dim
|
3850
3859
|
dispatch:
|
3851
|
-
CPU, CUDA: max_out
|
3860
|
+
CPU, CUDA, MTIA: max_out
|
3852
3861
|
MPS: max_out_mps
|
3853
3862
|
|
3854
3863
|
- func: max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
@@ -3864,6 +3873,7 @@
|
|
3864
3873
|
device_guard: False
|
3865
3874
|
dispatch:
|
3866
3875
|
CompositeImplicitAutograd: value_selecting_reduction_backward_symint
|
3876
|
+
NestedTensorCPU, NestedTensorCUDA: value_selecting_reduction_backward_nested_symint
|
3867
3877
|
|
3868
3878
|
- func: amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
|
3869
3879
|
variants: function, method
|
@@ -4003,6 +4013,7 @@
|
|
4003
4013
|
dispatch:
|
4004
4014
|
CPU: nanmedian_cpu
|
4005
4015
|
CUDA: nanmedian_cuda
|
4016
|
+
MPS: nanmedian_mps
|
4006
4017
|
autogen: nanmedian.out
|
4007
4018
|
|
4008
4019
|
- func: nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
@@ -4014,6 +4025,7 @@
|
|
4014
4025
|
dispatch:
|
4015
4026
|
CPU: nanmedian_out_cpu
|
4016
4027
|
CUDA: nanmedian_out_cuda
|
4028
|
+
MPS: nanmedian_out_mps
|
4017
4029
|
|
4018
4030
|
- func: nanmedian.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
4019
4031
|
variants: function, method
|
@@ -4034,7 +4046,7 @@
|
|
4034
4046
|
precomputed:
|
4035
4047
|
- dim -> int dim
|
4036
4048
|
dispatch:
|
4037
|
-
CPU, CUDA: min_out
|
4049
|
+
CPU, CUDA, MTIA: min_out
|
4038
4050
|
MPS: min_out_mps
|
4039
4051
|
|
4040
4052
|
- func: min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
@@ -4142,11 +4154,20 @@
|
|
4142
4154
|
dispatch:
|
4143
4155
|
CPU: mm_out_cpu
|
4144
4156
|
CUDA: mm_out_cuda
|
4157
|
+
MTIA: mm_out_mtia
|
4145
4158
|
MPS: mm_out_mps
|
4146
4159
|
XPU: mm_out_xpu
|
4147
4160
|
SparseCPU, SparseCUDA: _sparse_mm_out
|
4148
4161
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm_out
|
4149
4162
|
|
4163
|
+
- func: mm.dtype(Tensor self, Tensor mat2, ScalarType out_dtype) -> Tensor
|
4164
|
+
dispatch:
|
4165
|
+
CUDA: _mm_dtype_cuda
|
4166
|
+
|
4167
|
+
- func: mm.dtype_out(Tensor self, Tensor mat2, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
|
4168
|
+
dispatch:
|
4169
|
+
CUDA: _mm_dtype_out_cuda
|
4170
|
+
|
4150
4171
|
- func: _int_mm(Tensor self, Tensor mat2) -> Tensor
|
4151
4172
|
dispatch:
|
4152
4173
|
CPU: _int_mm_cpu
|
@@ -4167,6 +4188,10 @@
|
|
4167
4188
|
MPS: _weight_int4pack_mm_mps
|
4168
4189
|
CUDA: _weight_int4pack_mm_cuda
|
4169
4190
|
|
4191
|
+
- func: _weight_int4pack_mm_with_scales_and_zeros(Tensor self, Tensor mat2, int qGroupSize, Tensor qScale, Tensor qZeros) -> Tensor
|
4192
|
+
dispatch:
|
4193
|
+
XPU: _weight_int4pack_mm_xpu
|
4194
|
+
|
4170
4195
|
# Split int4 pack weight between cpu and other devices due to
|
4171
4196
|
# https://github.com/pytorch/ao/issues/1117#issuecomment-2451252756.
|
4172
4197
|
- func: _convert_weight_to_int4pack_for_cpu(Tensor self, int innerKTiles) -> Tensor
|
@@ -4177,6 +4202,14 @@
|
|
4177
4202
|
dispatch:
|
4178
4203
|
CPU: _weight_int4pack_mm_cpu
|
4179
4204
|
|
4205
|
+
- func: _dyn_quant_pack_4bit_weight(Tensor weights, Tensor scales_zeros, Tensor? bias, int block_size, int in_features, int out_features) -> Tensor
|
4206
|
+
dispatch:
|
4207
|
+
CPU: _dyn_quant_pack_4bit_weight_cpu
|
4208
|
+
|
4209
|
+
- func: _dyn_quant_matmul_4bit(Tensor inp, Tensor packed_weights, int block_size, int in_features, int out_features) -> Tensor
|
4210
|
+
dispatch:
|
4211
|
+
CPU: _dyn_quant_matmul_4bit_cpu
|
4212
|
+
|
4180
4213
|
- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
|
4181
4214
|
dispatch:
|
4182
4215
|
CPU: _weight_int8pack_mm_cpu
|
@@ -4217,7 +4250,7 @@
|
|
4217
4250
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr
|
4218
4251
|
MkldnnCPU: mkldnn_mul
|
4219
4252
|
ZeroTensor: mul_zerotensor
|
4220
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
|
4253
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul_Tensor
|
4221
4254
|
tags: [core, pointwise]
|
4222
4255
|
|
4223
4256
|
- func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -4228,7 +4261,7 @@
|
|
4228
4261
|
SparseCPU, SparseCUDA: mul_sparse_
|
4229
4262
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr_
|
4230
4263
|
MkldnnCPU: mkldnn_mul_
|
4231
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Tensor
|
4264
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul__Tensor
|
4232
4265
|
tags: pointwise
|
4233
4266
|
|
4234
4267
|
- func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -4236,8 +4269,7 @@
|
|
4236
4269
|
structured: True
|
4237
4270
|
structured_inherits: TensorIteratorBase
|
4238
4271
|
dispatch:
|
4239
|
-
CPU, CUDA: mul_out
|
4240
|
-
MPS: mul_out_mps
|
4272
|
+
CPU, CUDA, MPS: mul_out
|
4241
4273
|
SparseCPU: mul_out_sparse_cpu
|
4242
4274
|
SparseCUDA: mul_out_sparse_cuda
|
4243
4275
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr
|
@@ -4251,7 +4283,7 @@
|
|
4251
4283
|
dispatch:
|
4252
4284
|
CompositeExplicitAutograd: mul
|
4253
4285
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_scalar_sparse_csr
|
4254
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Scalar
|
4286
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul_Scalar
|
4255
4287
|
tags: [core, pointwise]
|
4256
4288
|
|
4257
4289
|
- func: mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
@@ -4260,7 +4292,7 @@
|
|
4260
4292
|
dispatch:
|
4261
4293
|
CompositeExplicitAutograd: mul_
|
4262
4294
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul__scalar_sparse_csr
|
4263
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Scalar
|
4295
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul__Scalar
|
4264
4296
|
autogen: mul.Scalar_out
|
4265
4297
|
tags: pointwise
|
4266
4298
|
# multiply, alias for mul
|
@@ -4326,7 +4358,7 @@
|
|
4326
4358
|
device_guard: False
|
4327
4359
|
dispatch:
|
4328
4360
|
CompositeImplicitAutograd: narrow_symint
|
4329
|
-
NestedTensorCPU, NestedTensorCUDA: narrow_nested_symint
|
4361
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: narrow_nested_symint
|
4330
4362
|
|
4331
4363
|
- func: narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)
|
4332
4364
|
variants: function, method
|
@@ -4465,7 +4497,7 @@
|
|
4465
4497
|
# NB: Although this composite mutates on the inside, it is
|
4466
4498
|
# non-differentiable so NonFunctional doesn't apply
|
4467
4499
|
CompositeExplicitAutograd: ones_like
|
4468
|
-
NestedTensorCPU, NestedTensorCUDA: ones_like
|
4500
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: ones_like
|
4469
4501
|
autogen: ones_like.out
|
4470
4502
|
|
4471
4503
|
- func: pairwise_distance(Tensor x1, Tensor x2, float p=2, float eps=1e-06, bool keepdim=False) -> Tensor
|
@@ -4747,6 +4779,14 @@
|
|
4747
4779
|
CompositeExplicitAutograd: randint_like
|
4748
4780
|
autogen: randint_like.out
|
4749
4781
|
|
4782
|
+
- func: randint_like.Tensor(Tensor self, Tensor high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
4783
|
+
tags: nondeterministic_seeded
|
4784
|
+
dispatch:
|
4785
|
+
# NB: Although this composite mutates on the inside, it is
|
4786
|
+
# non-differentiable so NonFunctional doesn't apply
|
4787
|
+
CompositeExplicitAutograd: randint_like
|
4788
|
+
autogen: randint_like.Tensor_out
|
4789
|
+
|
4750
4790
|
- func: randint_like.low_dtype(Tensor self, SymInt low, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
4751
4791
|
tags: nondeterministic_seeded
|
4752
4792
|
dispatch:
|
@@ -4856,7 +4896,7 @@
|
|
4856
4896
|
structured: True
|
4857
4897
|
structured_inherits: TensorIteratorBase
|
4858
4898
|
dispatch:
|
4859
|
-
CPU, CUDA: reciprocal_out
|
4899
|
+
CPU, CUDA, MTIA: reciprocal_out
|
4860
4900
|
MPS: reciprocal_out_mps
|
4861
4901
|
tags: pointwise
|
4862
4902
|
|
@@ -4867,7 +4907,7 @@
|
|
4867
4907
|
dispatch:
|
4868
4908
|
SparseCPU, SparseCUDA: neg_sparse
|
4869
4909
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr
|
4870
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg
|
4910
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_neg
|
4871
4911
|
tags: [core, pointwise]
|
4872
4912
|
|
4873
4913
|
- func: neg_(Tensor(a!) self) -> Tensor(a!)
|
@@ -4877,7 +4917,7 @@
|
|
4877
4917
|
dispatch:
|
4878
4918
|
SparseCPU, SparseCUDA: neg_sparse_
|
4879
4919
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_
|
4880
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg_
|
4920
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_neg_
|
4881
4921
|
tags: pointwise
|
4882
4922
|
|
4883
4923
|
- func: neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -4885,8 +4925,7 @@
|
|
4885
4925
|
structured: True
|
4886
4926
|
structured_inherits: TensorIteratorBase
|
4887
4927
|
dispatch:
|
4888
|
-
CPU, CUDA: neg_out
|
4889
|
-
MPS: neg_out_mps
|
4928
|
+
CPU, CUDA, MPS, MTIA: neg_out
|
4890
4929
|
SparseCPU, SparseCUDA: neg_out_sparse
|
4891
4930
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_out
|
4892
4931
|
tags: pointwise
|
@@ -4948,7 +4987,7 @@
|
|
4948
4987
|
device_check: NoCheck
|
4949
4988
|
device_guard: False
|
4950
4989
|
dispatch:
|
4951
|
-
CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor, MPS: _reshape_alias
|
4990
|
+
CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor, MPS, MTIA: _reshape_alias
|
4952
4991
|
# We don't need to support mkldnn since this is handled explicitly by the reshape operator.
|
4953
4992
|
|
4954
4993
|
- func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
|
@@ -4989,9 +5028,7 @@
|
|
4989
5028
|
structured: True
|
4990
5029
|
structured_inherits: TensorIteratorBase
|
4991
5030
|
dispatch:
|
4992
|
-
CPU: round_out
|
4993
|
-
CUDA: round_out
|
4994
|
-
MPS: round_out_mps
|
5031
|
+
CPU, CUDA, MPS: round_out
|
4995
5032
|
SparseCPU, SparseCUDA: round_sparse_out
|
4996
5033
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
|
4997
5034
|
tags: pointwise
|
@@ -5013,8 +5050,7 @@
|
|
5013
5050
|
structured: True
|
5014
5051
|
structured_inherits: TensorIteratorBase
|
5015
5052
|
dispatch:
|
5016
|
-
CPU: round_decimals_out
|
5017
|
-
CUDA: round_decimals_out
|
5053
|
+
CPU, CUDA, MPS: round_decimals_out
|
5018
5054
|
tags: pointwise
|
5019
5055
|
|
5020
5056
|
- func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
|
@@ -5029,12 +5065,12 @@
|
|
5029
5065
|
device_check: NoCheck # TensorIterator
|
5030
5066
|
variants: function, method
|
5031
5067
|
dispatch:
|
5032
|
-
CPU, CUDA: relu
|
5068
|
+
CPU, CUDA, MTIA: relu
|
5033
5069
|
MPS: relu_mps
|
5034
5070
|
MkldnnCPU: mkldnn_relu
|
5035
5071
|
QuantizedCPU: relu_quantized_cpu
|
5036
5072
|
QuantizedCUDA: relu_quantized_cuda
|
5037
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu
|
5073
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_relu
|
5038
5074
|
SparseCPU, SparseCUDA: relu_sparse
|
5039
5075
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr
|
5040
5076
|
tags: [core, pointwise]
|
@@ -5043,12 +5079,12 @@
|
|
5043
5079
|
device_check: NoCheck # TensorIterator
|
5044
5080
|
variants: function, method
|
5045
5081
|
dispatch:
|
5046
|
-
CPU, CUDA: relu_
|
5082
|
+
CPU, CUDA, MTIA: relu_
|
5047
5083
|
MPS: relu_mps_
|
5048
5084
|
MkldnnCPU: mkldnn_relu_
|
5049
5085
|
QuantizedCPU: relu_quantized_cpu_
|
5050
5086
|
QuantizedCUDA: relu_quantized_cuda_
|
5051
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu_
|
5087
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_relu_
|
5052
5088
|
SparseCPU, SparseCUDA: relu_sparse_
|
5053
5089
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr_
|
5054
5090
|
autogen: relu.out
|
@@ -5094,7 +5130,7 @@
|
|
5094
5130
|
python_module: nn
|
5095
5131
|
dispatch:
|
5096
5132
|
QuantizedCPU: gelu_quantized_cpu_
|
5097
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
|
5133
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_gelu_
|
5098
5134
|
|
5099
5135
|
- func: gelu(Tensor self, *, str approximate='none') -> Tensor
|
5100
5136
|
structured_delegate: gelu.out
|
@@ -5104,7 +5140,7 @@
|
|
5104
5140
|
MkldnnCPU: mkldnn_gelu
|
5105
5141
|
QuantizedCPU: gelu_quantized_cpu
|
5106
5142
|
QuantizedCUDA: gelu_quantized_cuda
|
5107
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu
|
5143
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_gelu
|
5108
5144
|
tags: [core, pointwise]
|
5109
5145
|
|
5110
5146
|
- func: gelu_backward.grad_input(Tensor grad_output, Tensor self, *, str approximate='none', Tensor(a!) grad_input) -> Tensor(a!)
|
@@ -5121,7 +5157,7 @@
|
|
5121
5157
|
python_module: nn
|
5122
5158
|
dispatch:
|
5123
5159
|
MkldnnCPU: mkldnn_gelu_backward
|
5124
|
-
NestedTensorCPU, NestedTensorCUDA: gelu_backwards_nested
|
5160
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: gelu_backwards_nested
|
5125
5161
|
tags: pointwise
|
5126
5162
|
|
5127
5163
|
- func: infinitely_differentiable_gelu_backward(Tensor grad, Tensor self) -> Tensor
|
@@ -5135,7 +5171,7 @@
|
|
5135
5171
|
structured_inherits: TensorIteratorBase
|
5136
5172
|
device_check: NoCheck # TensorIterator
|
5137
5173
|
dispatch:
|
5138
|
-
CPU, CUDA: hardshrink_out
|
5174
|
+
CPU, CUDA, MPS: hardshrink_out
|
5139
5175
|
|
5140
5176
|
- func: hardshrink(Tensor self, Scalar lambd=0.5) -> Tensor
|
5141
5177
|
structured_delegate: hardshrink.out
|
@@ -5147,7 +5183,7 @@
|
|
5147
5183
|
structured: True
|
5148
5184
|
structured_inherits: TensorIteratorBase
|
5149
5185
|
dispatch:
|
5150
|
-
CPU, CUDA: hardshrink_backward_out
|
5186
|
+
CPU, CUDA, MPS: hardshrink_backward_out
|
5151
5187
|
|
5152
5188
|
- func: hardshrink_backward(Tensor grad_out, Tensor self, Scalar lambd) -> Tensor
|
5153
5189
|
structured_delegate: hardshrink_backward.grad_input
|
@@ -5170,8 +5206,7 @@
|
|
5170
5206
|
structured: True
|
5171
5207
|
structured_inherits: TensorIteratorBase
|
5172
5208
|
dispatch:
|
5173
|
-
CPU, CUDA: rsqrt_out
|
5174
|
-
MPS: rsqrt_out_mps
|
5209
|
+
CPU, CUDA, MPS, MTIA: rsqrt_out
|
5175
5210
|
tags: pointwise
|
5176
5211
|
|
5177
5212
|
- func: select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)
|
@@ -5186,7 +5221,7 @@
|
|
5186
5221
|
dispatch:
|
5187
5222
|
CompositeExplicitAutograd: select_symint
|
5188
5223
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_sparse_csr
|
5189
|
-
NestedTensorCPU, NestedTensorCUDA: select_nested
|
5224
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: select_nested
|
5190
5225
|
tags: core
|
5191
5226
|
|
5192
5227
|
- func: select_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt index) -> Tensor
|
@@ -5202,7 +5237,7 @@
|
|
5202
5237
|
device_check: NoCheck
|
5203
5238
|
device_guard: False
|
5204
5239
|
dispatch:
|
5205
|
-
NestedTensorCPU, NestedTensorCUDA: _nested_select_backward_symint
|
5240
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_select_backward_symint
|
5206
5241
|
|
5207
5242
|
- func: selu(Tensor self) -> Tensor
|
5208
5243
|
device_check: NoCheck # TensorIterator
|
@@ -5227,14 +5262,14 @@
|
|
5227
5262
|
structured_delegate: silu.out
|
5228
5263
|
python_module: nn
|
5229
5264
|
dispatch:
|
5230
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
|
5265
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_silu
|
5231
5266
|
tags: pointwise
|
5232
5267
|
|
5233
5268
|
- func: silu_(Tensor(a!) self) -> Tensor(a!)
|
5234
5269
|
structured_delegate: silu.out
|
5235
5270
|
python_module: nn
|
5236
5271
|
dispatch:
|
5237
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
|
5272
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_silu_
|
5238
5273
|
tags: pointwise
|
5239
5274
|
|
5240
5275
|
- func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -5242,7 +5277,7 @@
|
|
5242
5277
|
structured_inherits: TensorIteratorBase
|
5243
5278
|
python_module: nn
|
5244
5279
|
dispatch:
|
5245
|
-
CPU, CUDA: silu_out
|
5280
|
+
CPU, CUDA, MTIA: silu_out
|
5246
5281
|
MPS: silu_out_mps
|
5247
5282
|
tags: pointwise
|
5248
5283
|
|
@@ -5260,7 +5295,7 @@
|
|
5260
5295
|
python_module: nn
|
5261
5296
|
dispatch:
|
5262
5297
|
CompositeImplicitAutograd: math_silu_backward
|
5263
|
-
NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
|
5298
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: silu_backward_nested
|
5264
5299
|
tags: pointwise
|
5265
5300
|
|
5266
5301
|
- func: mish(Tensor self) -> Tensor
|
@@ -5309,14 +5344,13 @@
|
|
5309
5344
|
structured: True
|
5310
5345
|
structured_inherits: TensorIteratorBase
|
5311
5346
|
dispatch:
|
5312
|
-
CPU, CUDA: sigmoid_out
|
5313
|
-
MPS: sigmoid_out_mps
|
5347
|
+
CPU, CUDA, MPS: sigmoid_out
|
5314
5348
|
tags: pointwise
|
5315
5349
|
|
5316
5350
|
- func: logit(Tensor self, float? eps=None) -> Tensor
|
5317
5351
|
variants: function, method
|
5318
5352
|
dispatch:
|
5319
|
-
CPU, CUDA: logit
|
5353
|
+
CPU, CUDA, MTIA: logit
|
5320
5354
|
MPS: logit_mps
|
5321
5355
|
tags: pointwise
|
5322
5356
|
|
@@ -5339,7 +5373,7 @@
|
|
5339
5373
|
dispatch:
|
5340
5374
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr
|
5341
5375
|
SparseCPU, SparseCUDA: sin_sparse
|
5342
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sin
|
5376
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sin
|
5343
5377
|
tags: [core, pointwise]
|
5344
5378
|
|
5345
5379
|
- func: sin_(Tensor(a!) self) -> Tensor(a!)
|
@@ -5356,8 +5390,7 @@
|
|
5356
5390
|
structured: True
|
5357
5391
|
structured_inherits: TensorIteratorBase
|
5358
5392
|
dispatch:
|
5359
|
-
CPU, CUDA: sin_out
|
5360
|
-
MPS: sin_out_mps
|
5393
|
+
CPU, CUDA, MPS, MTIA: sin_out
|
5361
5394
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_out
|
5362
5395
|
SparseCPU, SparseCUDA: sin_sparse_out
|
5363
5396
|
tags: pointwise
|
@@ -5376,7 +5409,7 @@
|
|
5376
5409
|
structured: True
|
5377
5410
|
structured_inherits: TensorIteratorBase
|
5378
5411
|
dispatch:
|
5379
|
-
CPU, CUDA: sinc_out
|
5412
|
+
CPU, CUDA, MPS: sinc_out
|
5380
5413
|
tags: pointwise
|
5381
5414
|
|
5382
5415
|
- func: sinh(Tensor self) -> Tensor
|
@@ -5402,8 +5435,7 @@
|
|
5402
5435
|
structured: True
|
5403
5436
|
structured_inherits: TensorIteratorBase
|
5404
5437
|
dispatch:
|
5405
|
-
CPU, CUDA: sinh_out
|
5406
|
-
MPS: sinh_out_mps
|
5438
|
+
CPU, CUDA, MPS: sinh_out
|
5407
5439
|
SparseCPU, SparseCUDA: sinh_sparse_out
|
5408
5440
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_out
|
5409
5441
|
|
@@ -5423,7 +5455,7 @@
|
|
5423
5455
|
variants: function, method
|
5424
5456
|
dispatch:
|
5425
5457
|
CompositeExplicitAutograd: detach
|
5426
|
-
NestedTensorCPU, NestedTensorCUDA: detach
|
5458
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: detach
|
5427
5459
|
|
5428
5460
|
# Like `detach()`, but modifies this `Variable` in-place. This method may
|
5429
5461
|
# only be called on non-view `Variable`s. You can use `is_view()` to check
|
@@ -5553,7 +5585,7 @@
|
|
5553
5585
|
structured_delegate: _softmax.out
|
5554
5586
|
dispatch:
|
5555
5587
|
MkldnnCPU: mkldnn_softmax
|
5556
|
-
NestedTensorCPU, NestedTensorCUDA: softmax_nested
|
5588
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: softmax_nested
|
5557
5589
|
tags: core
|
5558
5590
|
|
5559
5591
|
- func: _softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -5566,7 +5598,7 @@
|
|
5566
5598
|
- func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
|
5567
5599
|
structured_delegate: _softmax_backward_data.out
|
5568
5600
|
dispatch:
|
5569
|
-
NestedTensorCPU, NestedTensorCUDA: nested_softmax_backward
|
5601
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_softmax_backward
|
5570
5602
|
|
5571
5603
|
- func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!)
|
5572
5604
|
structured: True
|
@@ -5610,7 +5642,7 @@
|
|
5610
5642
|
device_guard: False
|
5611
5643
|
dispatch:
|
5612
5644
|
CompositeExplicitAutograd: split_with_sizes
|
5613
|
-
NestedTensorCPU, NestedTensorCUDA: split_with_sizes_nested
|
5645
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: split_with_sizes_nested
|
5614
5646
|
tags: core
|
5615
5647
|
|
5616
5648
|
- func: hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
|
@@ -5638,7 +5670,7 @@
|
|
5638
5670
|
dispatch:
|
5639
5671
|
CompositeExplicitAutograd: squeeze
|
5640
5672
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
5641
|
-
NestedTensorCPU, NestedTensorCUDA: squeeze_nested
|
5673
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_nested
|
5642
5674
|
|
5643
5675
|
- func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
|
5644
5676
|
variants: function, method
|
@@ -5647,7 +5679,7 @@
|
|
5647
5679
|
dispatch:
|
5648
5680
|
CompositeExplicitAutograd: squeeze
|
5649
5681
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
5650
|
-
NestedTensorCPU, NestedTensorCUDA: squeeze_dim_nested
|
5682
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_dim_nested
|
5651
5683
|
tags: core
|
5652
5684
|
|
5653
5685
|
- func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
|
@@ -5663,7 +5695,7 @@
|
|
5663
5695
|
dispatch:
|
5664
5696
|
CompositeExplicitAutograd: squeeze
|
5665
5697
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
5666
|
-
NestedTensorCPU, NestedTensorCUDA: squeeze_dim_nested
|
5698
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_dim_nested
|
5667
5699
|
tags: core
|
5668
5700
|
|
5669
5701
|
- func: squeeze_(Tensor(a!) self) -> Tensor(a!)
|
@@ -5747,11 +5779,11 @@
|
|
5747
5779
|
- func: dstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)
|
5748
5780
|
|
5749
5781
|
# Overload without center & pad mode, needed for forward-compatibility
|
5750
|
-
- func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
|
5782
|
+
- func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor
|
5751
5783
|
variants: function, method
|
5752
5784
|
cpp_no_default_args: ['hop_length', 'win_length', 'window', 'normalized']
|
5753
5785
|
|
5754
|
-
- func: stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
|
5786
|
+
- func: stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor
|
5755
5787
|
variants: function, method
|
5756
5788
|
|
5757
5789
|
- func: istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool? onesided=None, int? length=None, bool return_complex=False) -> Tensor
|
@@ -5837,7 +5869,7 @@
|
|
5837
5869
|
structured_delegate: sqrt.out
|
5838
5870
|
variants: function, method
|
5839
5871
|
dispatch:
|
5840
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sqrt
|
5872
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sqrt
|
5841
5873
|
SparseCPU, SparseCUDA: sqrt_sparse
|
5842
5874
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr
|
5843
5875
|
tags: [core, pointwise]
|
@@ -5856,8 +5888,7 @@
|
|
5856
5888
|
structured: True
|
5857
5889
|
structured_inherits: TensorIteratorBase
|
5858
5890
|
dispatch:
|
5859
|
-
CPU, CUDA: sqrt_out
|
5860
|
-
MPS: sqrt_out_mps
|
5891
|
+
CPU, CUDA, MPS, MTIA: sqrt_out
|
5861
5892
|
SparseCPU, SparseCUDA: sqrt_sparse_out
|
5862
5893
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
|
5863
5894
|
tags: pointwise
|
@@ -6014,8 +6045,7 @@
|
|
6014
6045
|
structured: True
|
6015
6046
|
structured_inherits: TensorIteratorBase
|
6016
6047
|
dispatch:
|
6017
|
-
CPU, CUDA: tan_out
|
6018
|
-
MPS: tan_out_mps
|
6048
|
+
CPU, CUDA, MPS: tan_out
|
6019
6049
|
SparseCPU, SparseCUDA: tan_sparse_out
|
6020
6050
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_out
|
6021
6051
|
tags: pointwise
|
@@ -6029,7 +6059,7 @@
|
|
6029
6059
|
MkldnnCPU: mkldnn_tanh
|
6030
6060
|
SparseCPU, SparseCUDA: tanh_sparse
|
6031
6061
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr
|
6032
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh
|
6062
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_tanh
|
6033
6063
|
tags: [core, pointwise]
|
6034
6064
|
|
6035
6065
|
- func: tanh_(Tensor(a!) self) -> Tensor(a!)
|
@@ -6040,7 +6070,7 @@
|
|
6040
6070
|
MkldnnCPU: mkldnn_tanh_
|
6041
6071
|
SparseCPU, SparseCUDA: tanh_sparse_
|
6042
6072
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_
|
6043
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh_
|
6073
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_tanh_
|
6044
6074
|
tags: pointwise
|
6045
6075
|
|
6046
6076
|
- func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -6048,8 +6078,7 @@
|
|
6048
6078
|
structured: True
|
6049
6079
|
structured_inherits: TensorIteratorBase
|
6050
6080
|
dispatch:
|
6051
|
-
CPU, CUDA: tanh_out
|
6052
|
-
MPS: tanh_out_mps
|
6081
|
+
CPU, CUDA, MPS, MTIA: tanh_out
|
6053
6082
|
SparseCPU, SparseCUDA: tanh_sparse_out
|
6054
6083
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
|
6055
6084
|
tags: pointwise
|
@@ -6098,7 +6127,7 @@
|
|
6098
6127
|
MkldnnCPU: mkldnn_relu_backward
|
6099
6128
|
SparseCPU, SparseCUDA: threshold_backward_sparse
|
6100
6129
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed
|
6101
|
-
NestedTensorCPU, NestedTensorCUDA: threshold_backwards_nested
|
6130
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: threshold_backwards_nested
|
6102
6131
|
tags: pointwise
|
6103
6132
|
|
6104
6133
|
- func: tile(Tensor self, SymInt[] dims) -> Tensor
|
@@ -6112,7 +6141,7 @@
|
|
6112
6141
|
device_guard: False
|
6113
6142
|
dispatch:
|
6114
6143
|
CompositeExplicitAutograd: transpose
|
6115
|
-
NestedTensorCPU, NestedTensorCUDA: transpose_nested
|
6144
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: transpose_nested
|
6116
6145
|
|
6117
6146
|
- func: transpose.Dimname(Tensor(a) self, Dimname dim0, Dimname dim1) -> Tensor(a)
|
6118
6147
|
variants: function, method
|
@@ -6209,13 +6238,13 @@
|
|
6209
6238
|
- func: _nested_tensor_size(Tensor self) -> Tensor
|
6210
6239
|
variants: method
|
6211
6240
|
dispatch:
|
6212
|
-
NestedTensorCPU, NestedTensorCUDA: _nested_tensor_size
|
6241
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_tensor_size
|
6213
6242
|
autogen: _nested_tensor_size.out
|
6214
6243
|
|
6215
6244
|
- func: _nested_tensor_strides(Tensor self) -> Tensor
|
6216
6245
|
variants: method
|
6217
6246
|
dispatch:
|
6218
|
-
NestedTensorCPU, NestedTensorCUDA: _nested_tensor_strides
|
6247
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_tensor_strides
|
6219
6248
|
autogen: _nested_tensor_strides.out
|
6220
6249
|
|
6221
6250
|
- func: _nested_tensor_storage_offsets(Tensor self) -> Tensor
|
@@ -6228,7 +6257,7 @@
|
|
6228
6257
|
# _nested_from_padded_and_nested_example is available for testing.
|
6229
6258
|
- func: _nested_from_padded_and_nested_example(Tensor padded, Tensor nt_example) -> Tensor
|
6230
6259
|
dispatch:
|
6231
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_from_padded_and_nested_example
|
6260
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_from_padded_and_nested_example
|
6232
6261
|
autogen: _nested_from_padded_and_nested_example.out
|
6233
6262
|
|
6234
6263
|
# The input arguments' types to this functions are temporary. When nested tensors switch to using SymInts for their metadata representation
|
@@ -6340,8 +6369,7 @@
|
|
6340
6369
|
structured_inherits: TensorIteratorBase
|
6341
6370
|
device_check: NoCheck # TensorIterator
|
6342
6371
|
dispatch:
|
6343
|
-
CPU, CUDA: trunc_out
|
6344
|
-
MPS: trunc_out_mps
|
6372
|
+
CPU, CUDA, MPS: trunc_out
|
6345
6373
|
SparseCPU, SparseCUDA: trunc_sparse_out
|
6346
6374
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
|
6347
6375
|
tags: pointwise
|
@@ -6420,7 +6448,7 @@
|
|
6420
6448
|
CompositeExplicitAutograd: unsqueeze
|
6421
6449
|
SparseCPU, SparseCUDA: unsqueeze_sparse
|
6422
6450
|
QuantizedCPU, QuantizedCUDA: unsqueeze_quantized
|
6423
|
-
NestedTensorCPU, NestedTensorCUDA: unsqueeze_nested
|
6451
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: unsqueeze_nested
|
6424
6452
|
tags: core
|
6425
6453
|
|
6426
6454
|
- func: unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)
|
@@ -6514,15 +6542,15 @@
|
|
6514
6542
|
device_check: NoCheck # TensorIterator
|
6515
6543
|
variants: function, method
|
6516
6544
|
dispatch:
|
6517
|
-
CPU, CUDA, MPS: where
|
6518
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_where
|
6545
|
+
CPU, CUDA, MPS, MTIA: where
|
6546
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_where
|
6519
6547
|
tags: [core, pointwise]
|
6520
6548
|
|
6521
6549
|
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
6522
6550
|
device_check: NoCheck # TensorIterator
|
6523
6551
|
dispatch:
|
6524
|
-
CPU, CUDA, MPS: where_self_out
|
6525
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_where_out
|
6552
|
+
CPU, CUDA, MPS, MTIA: where_self_out
|
6553
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_where_out
|
6526
6554
|
|
6527
6555
|
- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
|
6528
6556
|
variants: function
|
@@ -6857,7 +6885,7 @@
|
|
6857
6885
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: clone_sparse_compressed
|
6858
6886
|
MkldnnCPU: mkldnn_clone
|
6859
6887
|
QuantizedCPU, QuantizedCUDA: quantized_clone
|
6860
|
-
NestedTensorCPU, NestedTensorCUDA: clone_nested
|
6888
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: clone_nested
|
6861
6889
|
autogen: clone.out
|
6862
6890
|
tags: [core, pointwise]
|
6863
6891
|
|
@@ -6891,7 +6919,7 @@
|
|
6891
6919
|
SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
|
6892
6920
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
|
6893
6921
|
MkldnnCPU: mkldnn_zero_
|
6894
|
-
NestedTensorCPU, NestedTensorCUDA: zero_nested_
|
6922
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: zero_nested_
|
6895
6923
|
autogen: zero, zero.out
|
6896
6924
|
|
6897
6925
|
- func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
@@ -6911,7 +6939,7 @@
|
|
6911
6939
|
dispatch:
|
6912
6940
|
SparseCPU, SparseCUDA: sub_sparse
|
6913
6941
|
ZeroTensor: sub_zerotensor
|
6914
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sub_Tensor
|
6942
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sub_Tensor
|
6915
6943
|
tags: [core, pointwise]
|
6916
6944
|
|
6917
6945
|
- func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
@@ -6958,7 +6986,7 @@
|
|
6958
6986
|
device_check: NoCheck # TensorIterator
|
6959
6987
|
variants: function
|
6960
6988
|
dispatch:
|
6961
|
-
CPU, CUDA: rsub
|
6989
|
+
CPU, CUDA, MPS: rsub
|
6962
6990
|
autogen: rsub.Tensor_out
|
6963
6991
|
|
6964
6992
|
- func: heaviside.out(Tensor self, Tensor values, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -7040,6 +7068,14 @@
|
|
7040
7068
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: addmm_sparse_compressed_dense
|
7041
7069
|
tags: core
|
7042
7070
|
|
7071
|
+
- func: addmm.dtype(Tensor self, Tensor mat1, Tensor mat2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
7072
|
+
dispatch:
|
7073
|
+
CUDA: _addmm_dtype_cuda
|
7074
|
+
|
7075
|
+
- func: addmm.dtype_out(Tensor self, Tensor mat1, Tensor mat2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
7076
|
+
dispatch:
|
7077
|
+
CUDA: _addmm_dtype_out_cuda
|
7078
|
+
|
7043
7079
|
- func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
|
7044
7080
|
structured_delegate: addmm.out
|
7045
7081
|
variants: method
|
@@ -7063,13 +7099,26 @@
|
|
7063
7099
|
- func: _scaled_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
|
7064
7100
|
variants: function
|
7065
7101
|
dispatch:
|
7102
|
+
CPU: _scaled_mm_cpu
|
7066
7103
|
CUDA: _scaled_mm_cuda
|
7067
7104
|
|
7068
7105
|
- func: _scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
|
7069
7106
|
variants: function
|
7070
7107
|
dispatch:
|
7108
|
+
CPU: _scaled_mm_out_cpu
|
7071
7109
|
CUDA: _scaled_mm_out_cuda
|
7072
7110
|
|
7111
|
+
|
7112
|
+
- func: _scaled_grouped_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? offs=None, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
|
7113
|
+
variants: function
|
7114
|
+
dispatch:
|
7115
|
+
CUDA: _scaled_grouped_mm_cuda
|
7116
|
+
|
7117
|
+
- func: _grouped_mm(Tensor self, Tensor mat2, Tensor? offs=None, Tensor? bias=None, ScalarType? out_dtype=None) -> Tensor
|
7118
|
+
variants: function
|
7119
|
+
dispatch:
|
7120
|
+
CUDA: _grouped_mm_cuda
|
7121
|
+
|
7073
7122
|
# NOTE [ Sparse: autograd and API ]
|
7074
7123
|
#
|
7075
7124
|
#
|
@@ -7224,13 +7273,13 @@
|
|
7224
7273
|
dispatch:
|
7225
7274
|
CompositeImplicitAutograd: _sparse_coo_tensor_unsafe_symint
|
7226
7275
|
|
7227
|
-
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None) -> ()
|
7276
|
+
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None, bool? check_pinning=None) -> ()
|
7228
7277
|
|
7229
|
-
- func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout) -> ()
|
7230
|
-
- func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
7231
|
-
- func: _validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
|
7232
|
-
- func: _validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
7233
|
-
- func: _validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
|
7278
|
+
- func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout, bool? check_pinning=None) -> ()
|
7279
|
+
- func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7280
|
+
- func: _validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7281
|
+
- func: _validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7282
|
+
- func: _validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7234
7283
|
|
7235
7284
|
- func: _sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
7236
7285
|
dispatch:
|
@@ -7388,7 +7437,7 @@
|
|
7388
7437
|
dispatch:
|
7389
7438
|
SparseCPU, SparseCUDA, SparseMeta: values_sparse
|
7390
7439
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
|
7391
|
-
NestedTensorCPU, NestedTensorCUDA: values_nested
|
7440
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: values_nested
|
7392
7441
|
CompositeExplicitAutograd: values_default
|
7393
7442
|
device_check: NoCheck
|
7394
7443
|
device_guard: False
|
@@ -7447,7 +7496,7 @@
|
|
7447
7496
|
variants: function, method
|
7448
7497
|
dispatch:
|
7449
7498
|
CompositeExplicitAutograd: unbind
|
7450
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
|
7499
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_unbind
|
7451
7500
|
|
7452
7501
|
- func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
|
7453
7502
|
variants: function, method
|
@@ -7735,7 +7784,7 @@
|
|
7735
7784
|
device_guard: False
|
7736
7785
|
dispatch:
|
7737
7786
|
CompositeExplicitAutograd: _to_copy
|
7738
|
-
NestedTensorCPU, NestedTensorCUDA: _to_copy_nested
|
7787
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _to_copy_nested
|
7739
7788
|
autogen: _to_copy.out
|
7740
7789
|
tags: core
|
7741
7790
|
|
@@ -8021,7 +8070,7 @@
|
|
8021
8070
|
variants: function, method
|
8022
8071
|
dispatch:
|
8023
8072
|
CompositeExplicitAutograd: masked_fill
|
8024
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_masked_fill
|
8073
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_masked_fill
|
8025
8074
|
tags: pointwise
|
8026
8075
|
|
8027
8076
|
- func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
|
@@ -8076,9 +8125,9 @@
|
|
8076
8125
|
device_check: NoCheck
|
8077
8126
|
device_guard: False
|
8078
8127
|
dispatch:
|
8079
|
-
ZeroTensor, Meta, CPU, CUDA, QuantizedCPU, QuantizedCUDA, MPS: view
|
8128
|
+
ZeroTensor, Meta, CPU, CUDA, QuantizedCPU, QuantizedCUDA, MPS, MTIA: view
|
8080
8129
|
MkldnnCPU: mkldnn_view
|
8081
|
-
NestedTensorCPU, NestedTensorCUDA: view_nested
|
8130
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: view_nested
|
8082
8131
|
tags: core
|
8083
8132
|
|
8084
8133
|
# Warning: If you want to change the name or overload name of this
|
@@ -8306,7 +8355,7 @@
|
|
8306
8355
|
structured_inherits: TensorIteratorBase
|
8307
8356
|
variants: function
|
8308
8357
|
dispatch:
|
8309
|
-
CPU, CUDA: bitwise_and_out
|
8358
|
+
CPU, CUDA, MTIA: bitwise_and_out
|
8310
8359
|
MPS: bitwise_and_out_mps
|
8311
8360
|
tags: pointwise
|
8312
8361
|
|
@@ -8373,7 +8422,7 @@
|
|
8373
8422
|
structured_inherits: TensorIteratorBase
|
8374
8423
|
variants: function
|
8375
8424
|
dispatch:
|
8376
|
-
CPU, CUDA: bitwise_or_out
|
8425
|
+
CPU, CUDA, MTIA: bitwise_or_out
|
8377
8426
|
MPS: bitwise_or_out_mps
|
8378
8427
|
tags: pointwise
|
8379
8428
|
|
@@ -8919,7 +8968,7 @@
|
|
8919
8968
|
variants: method, function
|
8920
8969
|
dispatch:
|
8921
8970
|
QuantizedCPU: eq_quantized_cpu
|
8922
|
-
NestedTensorCPU, NestedTensorCUDA: eq_scalar_nested
|
8971
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: eq_scalar_nested
|
8923
8972
|
tags: [core, pointwise]
|
8924
8973
|
|
8925
8974
|
- func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8938,7 +8987,7 @@
|
|
8938
8987
|
variants: method, function
|
8939
8988
|
dispatch:
|
8940
8989
|
QuantizedCPU: eq_quantized_cpu
|
8941
|
-
NestedTensorCPU, NestedTensorCUDA: eq_tensor_nested
|
8990
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: eq_tensor_nested
|
8942
8991
|
tags: [core, pointwise]
|
8943
8992
|
|
8944
8993
|
- func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8957,7 +9006,7 @@
|
|
8957
9006
|
variants: method, function
|
8958
9007
|
dispatch:
|
8959
9008
|
QuantizedCPU: ge_quantized_cpu
|
8960
|
-
NestedTensorCPU, NestedTensorCUDA: ge_scalar_nested
|
9009
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: ge_scalar_nested
|
8961
9010
|
tags: [core, pointwise]
|
8962
9011
|
|
8963
9012
|
- func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9084,7 +9133,7 @@
|
|
9084
9133
|
variants: method, function
|
9085
9134
|
dispatch:
|
9086
9135
|
QuantizedCPU: gt_quantized_cpu
|
9087
|
-
NestedTensorCPU, NestedTensorCUDA: gt_scalar_nested
|
9136
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: gt_scalar_nested
|
9088
9137
|
tags: [core, pointwise]
|
9089
9138
|
|
9090
9139
|
- func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9137,7 +9186,7 @@
|
|
9137
9186
|
structured_inherits: TensorIteratorBase
|
9138
9187
|
device_check: NoCheck # TensorIterator
|
9139
9188
|
dispatch:
|
9140
|
-
CPU, CUDA: lt_Scalar_out
|
9189
|
+
CPU, CUDA, MTIA: lt_Scalar_out
|
9141
9190
|
MPS: lt_scalar_out_mps
|
9142
9191
|
QuantizedCPU: lt_out_quantized_cpu
|
9143
9192
|
tags: pointwise
|
@@ -9155,7 +9204,7 @@
|
|
9155
9204
|
structured_inherits: TensorIteratorBase
|
9156
9205
|
device_check: NoCheck # TensorIterator
|
9157
9206
|
dispatch:
|
9158
|
-
CPU, CUDA: lt_Tensor_out
|
9207
|
+
CPU, CUDA, MTIA: lt_Tensor_out
|
9159
9208
|
MPS: lt_tensor_out_mps
|
9160
9209
|
QuantizedCPU: lt_out_quantized_cpu
|
9161
9210
|
tags: pointwise
|
@@ -9274,12 +9323,12 @@
|
|
9274
9323
|
MPS: nonzero_mps
|
9275
9324
|
tags: [dynamic_output_shape, core]
|
9276
9325
|
|
9277
|
-
- func: nonzero_static.out(Tensor self, *,
|
9326
|
+
- func: nonzero_static.out(Tensor self, *, SymInt size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)
|
9278
9327
|
dispatch:
|
9279
9328
|
CPU: nonzero_static_out_cpu
|
9280
9329
|
CUDA: nonzero_static_out_cuda
|
9281
9330
|
|
9282
|
-
- func: nonzero_static(Tensor self, *,
|
9331
|
+
- func: nonzero_static(Tensor self, *, SymInt size, int fill_value=-1) -> Tensor
|
9283
9332
|
variants: method, function
|
9284
9333
|
dispatch:
|
9285
9334
|
CPU: nonzero_static_cpu
|
@@ -9427,12 +9476,12 @@
|
|
9427
9476
|
|
9428
9477
|
- func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
|
9429
9478
|
dispatch:
|
9430
|
-
CPU, CUDA: cholesky_out
|
9479
|
+
CPU, CUDA, MPS: cholesky_out
|
9431
9480
|
|
9432
9481
|
- func: cholesky(Tensor self, bool upper=False) -> Tensor
|
9433
9482
|
variants: method, function
|
9434
9483
|
dispatch:
|
9435
|
-
CPU, CUDA: cholesky
|
9484
|
+
CPU, CUDA, MPS: cholesky
|
9436
9485
|
|
9437
9486
|
- func: cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
|
9438
9487
|
dispatch:
|
@@ -9506,15 +9555,16 @@
|
|
9506
9555
|
structured: True
|
9507
9556
|
dispatch:
|
9508
9557
|
CPU, CUDA: lu_unpack_out
|
9558
|
+
MPS: lu_unpack_out_mps
|
9509
9559
|
|
9510
9560
|
# TODO: remove dispatch section when porting TH CUDA to ATen
|
9511
|
-
- func: multinomial.out(Tensor self,
|
9561
|
+
- func: multinomial.out(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
|
9512
9562
|
tags: nondeterministic_seeded
|
9513
9563
|
dispatch:
|
9514
9564
|
CPU, CUDA: multinomial_out
|
9515
9565
|
MPS: multinomial_out_mps
|
9516
9566
|
|
9517
|
-
- func: multinomial(Tensor self,
|
9567
|
+
- func: multinomial(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor
|
9518
9568
|
variants: method, function
|
9519
9569
|
dispatch:
|
9520
9570
|
CPU, CUDA: multinomial
|
@@ -9602,8 +9652,7 @@
|
|
9602
9652
|
structured: True
|
9603
9653
|
structured_inherits: TensorIteratorBase
|
9604
9654
|
dispatch:
|
9605
|
-
CPU, CUDA: erfinv_out
|
9606
|
-
MPS: erfinv_out_mps
|
9655
|
+
CPU, CUDA, MPS: erfinv_out
|
9607
9656
|
SparseCPU, SparseCUDA: erfinv_sparse_out
|
9608
9657
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_out
|
9609
9658
|
tags: pointwise
|
@@ -9716,8 +9765,7 @@
|
|
9716
9765
|
structured: True
|
9717
9766
|
structured_inherits: TensorIteratorBase
|
9718
9767
|
dispatch:
|
9719
|
-
CPU, CUDA: lerp_Scalar
|
9720
|
-
MPS: lerp_Scalar_mps
|
9768
|
+
CPU, CUDA, MPS: lerp_Scalar
|
9721
9769
|
tags: pointwise
|
9722
9770
|
|
9723
9771
|
- func: lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9816,8 +9864,7 @@
|
|
9816
9864
|
structured: True
|
9817
9865
|
structured_inherits: TensorIteratorBase
|
9818
9866
|
dispatch:
|
9819
|
-
CPU, CUDA: fmod_out
|
9820
|
-
MPS: fmod_mps_out
|
9867
|
+
CPU, CUDA, MPS: fmod_out
|
9821
9868
|
tags: pointwise
|
9822
9869
|
|
9823
9870
|
- func: fmod.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -9923,8 +9970,7 @@
|
|
9923
9970
|
structured: True
|
9924
9971
|
structured_inherits: TensorIteratorBase
|
9925
9972
|
dispatch:
|
9926
|
-
CPU, CUDA: remainder_out
|
9927
|
-
MPS: remainder_out_mps
|
9973
|
+
CPU, CUDA, MPS, MTIA: remainder_out
|
9928
9974
|
tags: pointwise
|
9929
9975
|
|
9930
9976
|
- func: remainder.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -10008,7 +10054,7 @@
|
|
10008
10054
|
structured_inherits: TensorIteratorBase
|
10009
10055
|
device_check: NoCheck # TensorIterator
|
10010
10056
|
dispatch:
|
10011
|
-
CPU, CUDA: maximum_out
|
10057
|
+
CPU, CUDA, MTIA: maximum_out
|
10012
10058
|
MPS: maximum_out_mps
|
10013
10059
|
tags: pointwise
|
10014
10060
|
|
@@ -10040,7 +10086,7 @@
|
|
10040
10086
|
structured_inherits: TensorIteratorBase
|
10041
10087
|
device_check: NoCheck # TensorIterator
|
10042
10088
|
dispatch:
|
10043
|
-
CPU, CUDA: minimum_out
|
10089
|
+
CPU, CUDA, MTIA: minimum_out
|
10044
10090
|
MPS: minimum_out_mps
|
10045
10091
|
tags: pointwise
|
10046
10092
|
|
@@ -10192,7 +10238,7 @@
|
|
10192
10238
|
device_check: NoCheck
|
10193
10239
|
device_guard: False
|
10194
10240
|
dispatch:
|
10195
|
-
CPU, CUDA, Meta, MPS: unfold
|
10241
|
+
CPU, CUDA, Meta, MPS, MTIA: unfold
|
10196
10242
|
QuantizedCPU, QuantizedCUDA: unfold
|
10197
10243
|
|
10198
10244
|
- func: unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor
|
@@ -10305,7 +10351,7 @@
|
|
10305
10351
|
MPS: normal_mps_
|
10306
10352
|
Meta: normal_meta_
|
10307
10353
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: normal_sparse_csr_
|
10308
|
-
NestedTensorCPU, NestedTensorCUDA: normal_nested_
|
10354
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: normal_nested_
|
10309
10355
|
autogen: normal.out
|
10310
10356
|
|
10311
10357
|
# Only used by the functionalization pass.
|
@@ -10373,7 +10419,7 @@
|
|
10373
10419
|
variants: method, function
|
10374
10420
|
dispatch:
|
10375
10421
|
CompositeExplicitAutograd: alias
|
10376
|
-
NestedTensorCPU, NestedTensorCUDA: alias_nested
|
10422
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: alias_nested
|
10377
10423
|
tags: core
|
10378
10424
|
|
10379
10425
|
- func: _amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
|
@@ -10381,6 +10427,7 @@
|
|
10381
10427
|
dispatch:
|
10382
10428
|
CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
|
10383
10429
|
CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
|
10430
|
+
MPS: _amp_foreach_non_finite_check_and_unscale_mps_
|
10384
10431
|
autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
|
10385
10432
|
|
10386
10433
|
- func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
|
@@ -10388,6 +10435,7 @@
|
|
10388
10435
|
dispatch:
|
10389
10436
|
CUDA: _amp_update_scale_cuda_
|
10390
10437
|
CPU: _amp_update_scale_cpu_
|
10438
|
+
MPS: _amp_update_scale_mps_
|
10391
10439
|
autogen: _amp_update_scale, _amp_update_scale.out
|
10392
10440
|
|
10393
10441
|
#- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
|
@@ -11790,7 +11838,7 @@
|
|
11790
11838
|
structured_delegate: elu.out
|
11791
11839
|
device_check: NoCheck # TensorIterator
|
11792
11840
|
python_module: nn
|
11793
|
-
tags: pointwise
|
11841
|
+
tags: [core, pointwise]
|
11794
11842
|
|
11795
11843
|
- func: elu_backward.grad_input(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11796
11844
|
structured: True
|
@@ -11854,8 +11902,7 @@
|
|
11854
11902
|
device_check: NoCheck # TensorIterator
|
11855
11903
|
python_module: nn
|
11856
11904
|
dispatch:
|
11857
|
-
CPU, CUDA: hardsigmoid_out
|
11858
|
-
MPS: hardsigmoid_out_mps
|
11905
|
+
CPU, CUDA, MPS: hardsigmoid_out
|
11859
11906
|
QuantizedCPU: hardsigmoid_out_quantized_cpu
|
11860
11907
|
|
11861
11908
|
- func: hardsigmoid(Tensor self) -> Tensor
|
@@ -11876,8 +11923,7 @@
|
|
11876
11923
|
structured_inherits: TensorIteratorBase
|
11877
11924
|
python_module: nn
|
11878
11925
|
dispatch:
|
11879
|
-
CPU, CUDA: hardsigmoid_backward_out
|
11880
|
-
MPS: hardsigmoid_backward_out_mps
|
11926
|
+
CPU, CUDA, MPS: hardsigmoid_backward_out
|
11881
11927
|
|
11882
11928
|
- func: hardsigmoid_backward(Tensor grad_output, Tensor self) -> Tensor
|
11883
11929
|
structured_delegate: hardsigmoid_backward.grad_input
|
@@ -11921,28 +11967,24 @@
|
|
11921
11967
|
device_check: NoCheck # TensorIterator
|
11922
11968
|
python_module: nn
|
11923
11969
|
dispatch:
|
11924
|
-
CPU, CUDA: hardswish_out
|
11925
|
-
MPS: hardswish_out_mps
|
11970
|
+
CPU, CUDA, MPS: hardswish_out
|
11926
11971
|
|
11927
11972
|
- func: hardswish(Tensor self) -> Tensor
|
11928
11973
|
device_check: NoCheck # TensorIterator
|
11929
11974
|
python_module: nn
|
11930
11975
|
dispatch:
|
11931
|
-
CPU, CUDA: hardswish
|
11932
|
-
MPS: hardswish_mps
|
11976
|
+
CPU, CUDA, MPS: hardswish
|
11933
11977
|
|
11934
11978
|
- func: hardswish_(Tensor(a!) self) -> Tensor(a!)
|
11935
11979
|
device_check: NoCheck # TensorIterator
|
11936
11980
|
python_module: nn
|
11937
11981
|
dispatch:
|
11938
|
-
CPU, CUDA: hardswish_
|
11939
|
-
MPS: hardswish_mps_
|
11982
|
+
CPU, CUDA, MPS: hardswish_
|
11940
11983
|
|
11941
11984
|
- func: hardswish_backward(Tensor grad_output, Tensor self) -> Tensor
|
11942
11985
|
python_module: nn
|
11943
11986
|
dispatch:
|
11944
|
-
CPU, CUDA: hardswish_backward
|
11945
|
-
MPS: hardswish_backward_mps
|
11987
|
+
CPU, CUDA, MPS: hardswish_backward
|
11946
11988
|
autogen: hardswish_backward.out
|
11947
11989
|
|
11948
11990
|
- func: leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -11951,8 +11993,7 @@
|
|
11951
11993
|
device_check: NoCheck # TensorIterator
|
11952
11994
|
python_module: nn
|
11953
11995
|
dispatch:
|
11954
|
-
CPU, CUDA: leaky_relu_out
|
11955
|
-
MPS: leaky_relu_out_mps
|
11996
|
+
CPU, CUDA, MPS: leaky_relu_out
|
11956
11997
|
QuantizedCPU: leaky_relu_out_quantized_cpu
|
11957
11998
|
|
11958
11999
|
- func: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor
|
@@ -11968,8 +12009,7 @@
|
|
11968
12009
|
structured_inherits: TensorIteratorBase
|
11969
12010
|
python_module: nn
|
11970
12011
|
dispatch:
|
11971
|
-
CPU, CUDA: leaky_relu_backward_out
|
11972
|
-
MPS: leaky_relu_backward_out_mps
|
12012
|
+
CPU, CUDA, MPS: leaky_relu_backward_out
|
11973
12013
|
|
11974
12014
|
- func: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor
|
11975
12015
|
structured_delegate: leaky_relu_backward.grad_input
|
@@ -12081,8 +12121,7 @@
|
|
12081
12121
|
device_check: NoCheck # TensorIterator
|
12082
12122
|
python_module: nn
|
12083
12123
|
dispatch:
|
12084
|
-
CPU, CUDA: softshrink_out
|
12085
|
-
MPS: softshrink_out_mps
|
12124
|
+
CPU, CUDA, MPS: softshrink_out
|
12086
12125
|
|
12087
12126
|
- func: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
|
12088
12127
|
structured_delegate: softshrink.out
|
@@ -12095,8 +12134,7 @@
|
|
12095
12134
|
structured_inherits: TensorIteratorBase
|
12096
12135
|
python_module: nn
|
12097
12136
|
dispatch:
|
12098
|
-
CPU, CUDA: softshrink_backward_out
|
12099
|
-
MPS: softshrink_backward_out_mps
|
12137
|
+
CPU, CUDA, MPS: softshrink_backward_out
|
12100
12138
|
|
12101
12139
|
- func: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
|
12102
12140
|
structured_delegate: softshrink_backward.grad_input
|
@@ -12711,6 +12749,7 @@
|
|
12711
12749
|
dispatch:
|
12712
12750
|
CPU: _upsample_bilinear2d_aa_out_cpu
|
12713
12751
|
CUDA: _upsample_bilinear2d_aa_out_cuda
|
12752
|
+
MPS: _upsample_bilinear2d_aa_out_mps
|
12714
12753
|
|
12715
12754
|
- func: _upsample_bilinear2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
|
12716
12755
|
python_module: nn
|
@@ -12757,6 +12796,7 @@
|
|
12757
12796
|
dispatch:
|
12758
12797
|
CPU: _upsample_bicubic2d_aa_out_cpu
|
12759
12798
|
CUDA: _upsample_bicubic2d_aa_out_cuda
|
12799
|
+
MPS: _upsample_bicubic2d_aa_out_mps
|
12760
12800
|
|
12761
12801
|
- func: _upsample_bicubic2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
|
12762
12802
|
python_module: nn
|
@@ -12779,6 +12819,7 @@
|
|
12779
12819
|
dispatch:
|
12780
12820
|
CPU: upsample_trilinear3d_out_cpu
|
12781
12821
|
CUDA: upsample_trilinear3d_out_cuda
|
12822
|
+
MPS: upsample_trilinear3d_out_mps
|
12782
12823
|
|
12783
12824
|
- func: upsample_trilinear3d(Tensor self, SymInt[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12784
12825
|
python_module: nn
|
@@ -12790,6 +12831,7 @@
|
|
12790
12831
|
dispatch:
|
12791
12832
|
CPU: upsample_trilinear3d_backward_out_cpu
|
12792
12833
|
CUDA: upsample_trilinear3d_backward_out_cuda
|
12834
|
+
MPS: upsample_trilinear3d_backward_out_mps
|
12793
12835
|
|
12794
12836
|
- func: upsample_trilinear3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12795
12837
|
python_module: nn
|
@@ -12901,6 +12943,7 @@
|
|
12901
12943
|
dispatch:
|
12902
12944
|
CPU: upsample_nearest3d_out_cpu
|
12903
12945
|
CUDA: upsample_nearest3d_out_cuda
|
12946
|
+
MPS: upsample_nearest3d_out_mps
|
12904
12947
|
|
12905
12948
|
- func: _upsample_nearest_exact3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
|
12906
12949
|
python_module: nn
|
@@ -12908,6 +12951,7 @@
|
|
12908
12951
|
dispatch:
|
12909
12952
|
CPU: _upsample_nearest_exact3d_out_cpu
|
12910
12953
|
CUDA: _upsample_nearest_exact3d_out_cuda
|
12954
|
+
MPS: _upsample_nearest_exact3d_out_mps
|
12911
12955
|
|
12912
12956
|
- func: upsample_nearest3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12913
12957
|
python_module: nn
|
@@ -12927,6 +12971,7 @@
|
|
12927
12971
|
dispatch:
|
12928
12972
|
CPU: upsample_nearest3d_backward_out_cpu
|
12929
12973
|
CUDA: upsample_nearest3d_backward_out_cuda
|
12974
|
+
MPS: upsample_nearest3d_backward_out_mps
|
12930
12975
|
|
12931
12976
|
- func: _upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
|
12932
12977
|
python_module: nn
|
@@ -12934,6 +12979,7 @@
|
|
12934
12979
|
dispatch:
|
12935
12980
|
CPU: _upsample_nearest_exact3d_backward_out_cpu
|
12936
12981
|
CUDA: _upsample_nearest_exact3d_backward_out_cuda
|
12982
|
+
MPS: _upsample_nearest_exact3d_backward_out_mps
|
12937
12983
|
|
12938
12984
|
- func: upsample_nearest3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12939
12985
|
python_module: nn
|
@@ -12976,7 +13022,7 @@
|
|
12976
13022
|
structured: True
|
12977
13023
|
structured_inherits: TensorIteratorBase
|
12978
13024
|
dispatch:
|
12979
|
-
CPU, CUDA: tanh_backward_out
|
13025
|
+
CPU, CUDA, MTIA: tanh_backward_out
|
12980
13026
|
MPS: tanh_backward_out_mps
|
12981
13027
|
tags: pointwise
|
12982
13028
|
|
@@ -13058,7 +13104,6 @@
|
|
13058
13104
|
autogen: _slow_conv2d_backward.output_mask_out
|
13059
13105
|
|
13060
13106
|
- func: _conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
|
13061
|
-
use_const_ref_for_mutable_tensors: True
|
13062
13107
|
python_module: nn
|
13063
13108
|
dispatch:
|
13064
13109
|
CUDA: conv_depthwise2d_cuda_out
|
@@ -13109,12 +13154,14 @@
|
|
13109
13154
|
dispatch:
|
13110
13155
|
CPU: col2im_out_cpu
|
13111
13156
|
CUDA: col2im_out_cuda
|
13157
|
+
MPS: col2im_out_mps
|
13112
13158
|
|
13113
13159
|
- func: col2im(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
|
13114
13160
|
python_module: nn
|
13115
13161
|
dispatch:
|
13116
13162
|
CPU: col2im_cpu
|
13117
13163
|
CUDA: col2im_cuda
|
13164
|
+
MPS: col2im_mps
|
13118
13165
|
tags: core
|
13119
13166
|
|
13120
13167
|
- func: column_stack(Tensor[] tensors) -> Tensor
|
@@ -13147,7 +13194,7 @@
|
|
13147
13194
|
device_guard: False
|
13148
13195
|
dispatch:
|
13149
13196
|
CompositeExplicitAutograd: isinf
|
13150
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isinf
|
13197
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isinf
|
13151
13198
|
SparseCPU, SparseCUDA: isinf_sparse
|
13152
13199
|
SparseMeta: isinf_sparse_meta
|
13153
13200
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isinf_sparse_csr
|
@@ -13163,7 +13210,7 @@
|
|
13163
13210
|
variants: function, method
|
13164
13211
|
structured_delegate: isposinf.out
|
13165
13212
|
dispatch:
|
13166
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isposinf
|
13213
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isposinf
|
13167
13214
|
SparseCPU, SparseCUDA: isposinf_sparse
|
13168
13215
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr
|
13169
13216
|
tags: pointwise
|
@@ -13181,7 +13228,7 @@
|
|
13181
13228
|
variants: function, method
|
13182
13229
|
structured_delegate: isneginf.out
|
13183
13230
|
dispatch:
|
13184
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isneginf
|
13231
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isneginf
|
13185
13232
|
SparseCPU, SparseCUDA: isneginf_sparse
|
13186
13233
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr
|
13187
13234
|
tags: pointwise
|
@@ -13225,7 +13272,7 @@
|
|
13225
13272
|
python_module: special
|
13226
13273
|
variants: function
|
13227
13274
|
dispatch:
|
13228
|
-
CPU, CUDA: special_entr_out
|
13275
|
+
CPU, CUDA, MPS: special_entr_out
|
13229
13276
|
tags: pointwise
|
13230
13277
|
|
13231
13278
|
- func: special_ndtri(Tensor self) -> Tensor
|
@@ -13372,7 +13419,7 @@
|
|
13372
13419
|
python_module: special
|
13373
13420
|
variants: function
|
13374
13421
|
dispatch:
|
13375
|
-
CPU, CUDA: special_xlog1py_out
|
13422
|
+
CPU, CUDA, MPS: special_xlog1py_out
|
13376
13423
|
tags: pointwise
|
13377
13424
|
|
13378
13425
|
- func: special_xlog1py.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -13451,7 +13498,7 @@
|
|
13451
13498
|
python_module: special
|
13452
13499
|
variants: function
|
13453
13500
|
dispatch:
|
13454
|
-
CPU, CUDA: special_zeta_out
|
13501
|
+
CPU, CUDA, MPS: special_zeta_out
|
13455
13502
|
tags: pointwise
|
13456
13503
|
|
13457
13504
|
- func: special_zeta.self_scalar_out(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -13489,7 +13536,7 @@
|
|
13489
13536
|
structured: True
|
13490
13537
|
structured_inherits: TensorIteratorBase
|
13491
13538
|
dispatch:
|
13492
|
-
CPU, CUDA: special_i0e_out
|
13539
|
+
CPU, CUDA, MPS: special_i0e_out
|
13493
13540
|
tags: pointwise
|
13494
13541
|
|
13495
13542
|
- func: special_i1(Tensor self) -> Tensor
|
@@ -13517,7 +13564,7 @@
|
|
13517
13564
|
structured: True
|
13518
13565
|
structured_inherits: TensorIteratorBase
|
13519
13566
|
dispatch:
|
13520
|
-
CPU, CUDA: special_i1e_out
|
13567
|
+
CPU, CUDA, MPS: special_i1e_out
|
13521
13568
|
tags: pointwise
|
13522
13569
|
|
13523
13570
|
- func: special_logit(Tensor self, float? eps=None) -> Tensor
|
@@ -13744,7 +13791,6 @@
|
|
13744
13791
|
CompositeImplicitAutograd: fft_hfft2_symint
|
13745
13792
|
|
13746
13793
|
- func: fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
13747
|
-
use_const_ref_for_mutable_tensors: True
|
13748
13794
|
python_module: fft
|
13749
13795
|
variants: function
|
13750
13796
|
dispatch:
|
@@ -13758,7 +13804,6 @@
|
|
13758
13804
|
CompositeImplicitAutograd: fft_ihfft2_symint
|
13759
13805
|
|
13760
13806
|
- func: fft_ihfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
13761
|
-
use_const_ref_for_mutable_tensors: True
|
13762
13807
|
python_module: fft
|
13763
13808
|
variants: function
|
13764
13809
|
dispatch:
|
@@ -13820,7 +13865,6 @@
|
|
13820
13865
|
CompositeImplicitAutograd: fft_hfftn_symint
|
13821
13866
|
|
13822
13867
|
- func: fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
13823
|
-
use_const_ref_for_mutable_tensors: True
|
13824
13868
|
python_module: fft
|
13825
13869
|
variants: function
|
13826
13870
|
dispatch:
|
@@ -13834,7 +13878,6 @@
|
|
13834
13878
|
CompositeImplicitAutograd: fft_ihfftn_symint
|
13835
13879
|
|
13836
13880
|
- func: fft_ihfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
13837
|
-
use_const_ref_for_mutable_tensors: True
|
13838
13881
|
python_module: fft
|
13839
13882
|
variants: function
|
13840
13883
|
dispatch:
|
@@ -13890,7 +13933,7 @@
|
|
13890
13933
|
python_module: linalg
|
13891
13934
|
structured: True
|
13892
13935
|
dispatch:
|
13893
|
-
CPU, CUDA: linalg_cholesky_ex_out
|
13936
|
+
CPU, CUDA, MPS: linalg_cholesky_ex_out
|
13894
13937
|
|
13895
13938
|
- func: linalg_cholesky(Tensor self, *, bool upper=False) -> Tensor
|
13896
13939
|
python_module: linalg
|
@@ -13937,6 +13980,7 @@
|
|
13937
13980
|
structured: True
|
13938
13981
|
dispatch:
|
13939
13982
|
CPU, CUDA: linalg_lu_factor_ex_out
|
13983
|
+
MPS: linalg_lu_factor_ex_out_mps
|
13940
13984
|
|
13941
13985
|
# linalg.lu
|
13942
13986
|
- func: linalg_lu(Tensor A, *, bool pivot=True) -> (Tensor P, Tensor L, Tensor U)
|
@@ -13971,7 +14015,7 @@
|
|
13971
14015
|
- func: _linalg_det.result(Tensor A, *, Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots) -> (Tensor(a!) result, Tensor(b!) LU, Tensor(c!) pivots)
|
13972
14016
|
structured: True
|
13973
14017
|
dispatch:
|
13974
|
-
CPU, CUDA: _linalg_det_out
|
14018
|
+
CPU, CUDA, MPS: _linalg_det_out
|
13975
14019
|
|
13976
14020
|
- func: linalg_det(Tensor A) -> Tensor
|
13977
14021
|
python_module: linalg
|
@@ -14058,7 +14102,7 @@
|
|
14058
14102
|
- func: _linalg_slogdet.sign(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots) -> (Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots)
|
14059
14103
|
structured: True
|
14060
14104
|
dispatch:
|
14061
|
-
CPU, CUDA: _linalg_slogdet_out
|
14105
|
+
CPU, CUDA, MPS: _linalg_slogdet_out
|
14062
14106
|
|
14063
14107
|
- func: linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet)
|
14064
14108
|
python_module: linalg
|
@@ -14300,6 +14344,7 @@
|
|
14300
14344
|
structured: True
|
14301
14345
|
dispatch:
|
14302
14346
|
CPU, CUDA: _linalg_solve_ex_out
|
14347
|
+
MPS: _linalg_solve_ex_out_mps
|
14303
14348
|
|
14304
14349
|
- func: linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor info)
|
14305
14350
|
python_module: linalg
|
@@ -14458,13 +14503,13 @@
|
|
14458
14503
|
dispatch:
|
14459
14504
|
# the NestedTensor keys are necessary because NestedTensor has been removed
|
14460
14505
|
# from the CompositeExplicitAutograd keyset see Note [NestedTensor Not Included in Backend Keys]
|
14461
|
-
CompositeExplicitAutograd, NestedTensorCPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_fullcoverage
|
14506
|
+
CompositeExplicitAutograd, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_fullcoverage
|
14462
14507
|
autogen: _test_autograd_multiple_dispatch.fullcoverage_out
|
14463
14508
|
|
14464
14509
|
# Note: this function is only for testing.
|
14465
14510
|
- func: _test_autograd_multiple_dispatch.ntonly(Tensor self, bool b) -> Tensor
|
14466
14511
|
dispatch:
|
14467
|
-
CompositeImplicitAutograd, NestedTensorCPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_ntonly
|
14512
|
+
CompositeImplicitAutograd, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_ntonly
|
14468
14513
|
|
14469
14514
|
# Note: this function is only for testing.
|
14470
14515
|
- func: _test_autograd_multiple_dispatch_view(Tensor(a) self) -> Tensor(a)
|
@@ -14809,13 +14854,13 @@
|
|
14809
14854
|
- func: _safe_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
|
14810
14855
|
dispatch:
|
14811
14856
|
CompositeExplicitAutograd: _safe_softmax
|
14812
|
-
NestedTensorCPU, NestedTensorCUDA: _safe_softmax
|
14857
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _safe_softmax
|
14813
14858
|
|
14814
14859
|
# Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
|
14815
14860
|
- func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
|
14816
14861
|
variants: function
|
14817
14862
|
dispatch:
|
14818
|
-
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: transformer_encoder_layer_forward
|
14863
|
+
CPU, CUDA, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: transformer_encoder_layer_forward
|
14819
14864
|
autogen: _transformer_encoder_layer_fwd.out
|
14820
14865
|
|
14821
14866
|
- func: _native_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None) -> (Tensor, Tensor)
|
@@ -14837,6 +14882,7 @@
|
|
14837
14882
|
Meta: _fused_sdp_choice_meta
|
14838
14883
|
CPU, NestedTensorCPU: _fused_sdp_choice_cpp
|
14839
14884
|
CUDA, NestedTensorCUDA: _fused_sdp_choice_cuda
|
14885
|
+
XPU: _fused_sdp_choice_xpu
|
14840
14886
|
tags: nondeterministic_seeded
|
14841
14887
|
|
14842
14888
|
- func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None, bool enable_gqa=False) -> (Tensor, Tensor)
|
@@ -14848,7 +14894,7 @@
|
|
14848
14894
|
MPS: _scaled_dot_product_attention_math_mps
|
14849
14895
|
tags: nondeterministic_seeded
|
14850
14896
|
|
14851
|
-
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor
|
14897
|
+
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
|
14852
14898
|
dispatch:
|
14853
14899
|
CUDA: _scaled_dot_product_flash_attention_cuda
|
14854
14900
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
|
@@ -14862,6 +14908,7 @@
|
|
14862
14908
|
- func: _scaled_dot_product_fused_attention_overrideable(Tensor query, Tensor key, Tensor value, Tensor? attn_bias=None, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14863
14909
|
dispatch:
|
14864
14910
|
CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable
|
14911
|
+
XPU: _scaled_dot_product_fused_attention_overrideable_xpu
|
14865
14912
|
tags: nondeterministic_seeded
|
14866
14913
|
|
14867
14914
|
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
@@ -14898,6 +14945,7 @@
|
|
14898
14945
|
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14899
14946
|
dispatch:
|
14900
14947
|
CUDA: _scaled_dot_product_cudnn_attention_cuda
|
14948
|
+
NestedTensorCUDA: _scaled_dot_product_cudnn_attention_nestedtensor_cuda
|
14901
14949
|
tags: nondeterministic_seeded
|
14902
14950
|
|
14903
14951
|
- func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
@@ -14905,13 +14953,13 @@
|
|
14905
14953
|
CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
|
14906
14954
|
tags: nondeterministic_seeded
|
14907
14955
|
|
14908
|
-
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor
|
14956
|
+
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor rng_state, Tensor unused, Tensor debug_attn_mask)
|
14909
14957
|
variants: function
|
14910
14958
|
dispatch:
|
14911
14959
|
CUDA: _flash_attention_forward
|
14912
14960
|
tags: nondeterministic_seeded
|
14913
14961
|
|
14914
|
-
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor
|
14962
|
+
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor rng_state, Tensor unused, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)
|
14915
14963
|
device_check: NoCheck
|
14916
14964
|
variants: function
|
14917
14965
|
dispatch:
|
@@ -14930,6 +14978,11 @@
|
|
14930
14978
|
dispatch:
|
14931
14979
|
CUDA: _efficient_attention_backward
|
14932
14980
|
|
14981
|
+
- func: _cudnn_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14982
|
+
dispatch:
|
14983
|
+
CUDA: _cudnn_attention_forward
|
14984
|
+
tags: nondeterministic_seeded
|
14985
|
+
|
14933
14986
|
- func: _triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor
|
14934
14987
|
variants: function
|
14935
14988
|
dispatch:
|
@@ -14972,7 +15025,7 @@
|
|
14972
15025
|
|
14973
15026
|
- func: special_bessel_j0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
14974
15027
|
dispatch:
|
14975
|
-
CPU, CUDA: special_bessel_j0_out
|
15028
|
+
CPU, CUDA, MPS: special_bessel_j0_out
|
14976
15029
|
python_module: special
|
14977
15030
|
structured_inherits: TensorIteratorBase
|
14978
15031
|
structured: True
|
@@ -14987,7 +15040,7 @@
|
|
14987
15040
|
|
14988
15041
|
- func: special_bessel_j1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
14989
15042
|
dispatch:
|
14990
|
-
CPU, CUDA: special_bessel_j1_out
|
15043
|
+
CPU, CUDA, MPS: special_bessel_j1_out
|
14991
15044
|
python_module: special
|
14992
15045
|
structured_inherits: TensorIteratorBase
|
14993
15046
|
structured: True
|
@@ -15002,7 +15055,7 @@
|
|
15002
15055
|
|
15003
15056
|
- func: special_bessel_y0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15004
15057
|
dispatch:
|
15005
|
-
CPU, CUDA: special_bessel_y0_out
|
15058
|
+
CPU, CUDA, MPS: special_bessel_y0_out
|
15006
15059
|
python_module: special
|
15007
15060
|
structured_inherits: TensorIteratorBase
|
15008
15061
|
structured: True
|
@@ -15017,7 +15070,7 @@
|
|
15017
15070
|
|
15018
15071
|
- func: special_bessel_y1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15019
15072
|
dispatch:
|
15020
|
-
CPU, CUDA: special_bessel_y1_out
|
15073
|
+
CPU, CUDA, MPS: special_bessel_y1_out
|
15021
15074
|
python_module: special
|
15022
15075
|
structured_inherits: TensorIteratorBase
|
15023
15076
|
structured: True
|
@@ -15050,7 +15103,7 @@
|
|
15050
15103
|
- func: special_chebyshev_polynomial_t.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15051
15104
|
device_check: NoCheck
|
15052
15105
|
dispatch:
|
15053
|
-
CPU, CUDA: special_chebyshev_polynomial_t_out
|
15106
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_t_out
|
15054
15107
|
python_module: special
|
15055
15108
|
structured_inherits: TensorIteratorBase
|
15056
15109
|
structured: True
|
@@ -15099,7 +15152,7 @@
|
|
15099
15152
|
- func: special_chebyshev_polynomial_u.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15100
15153
|
device_check: NoCheck
|
15101
15154
|
dispatch:
|
15102
|
-
CPU, CUDA: special_chebyshev_polynomial_u_out
|
15155
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_u_out
|
15103
15156
|
python_module: special
|
15104
15157
|
structured_inherits: TensorIteratorBase
|
15105
15158
|
structured: True
|
@@ -15148,7 +15201,7 @@
|
|
15148
15201
|
- func: special_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15149
15202
|
device_check: NoCheck
|
15150
15203
|
dispatch:
|
15151
|
-
CPU, CUDA: special_chebyshev_polynomial_v_out
|
15204
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_v_out
|
15152
15205
|
python_module: special
|
15153
15206
|
structured_inherits: TensorIteratorBase
|
15154
15207
|
structured: True
|
@@ -15197,7 +15250,7 @@
|
|
15197
15250
|
- func: special_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15198
15251
|
device_check: NoCheck
|
15199
15252
|
dispatch:
|
15200
|
-
CPU, CUDA: special_chebyshev_polynomial_w_out
|
15253
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_w_out
|
15201
15254
|
python_module: special
|
15202
15255
|
structured_inherits: TensorIteratorBase
|
15203
15256
|
structured: True
|
@@ -15246,7 +15299,7 @@
|
|
15246
15299
|
- func: special_hermite_polynomial_h.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15247
15300
|
device_check: NoCheck
|
15248
15301
|
dispatch:
|
15249
|
-
CPU, CUDA: special_hermite_polynomial_h_out
|
15302
|
+
CPU, CUDA, MPS: special_hermite_polynomial_h_out
|
15250
15303
|
python_module: special
|
15251
15304
|
structured_inherits: TensorIteratorBase
|
15252
15305
|
structured: True
|
@@ -15295,7 +15348,7 @@
|
|
15295
15348
|
- func: special_hermite_polynomial_he.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15296
15349
|
device_check: NoCheck
|
15297
15350
|
dispatch:
|
15298
|
-
CPU, CUDA: special_hermite_polynomial_he_out
|
15351
|
+
CPU, CUDA, MPS: special_hermite_polynomial_he_out
|
15299
15352
|
python_module: special
|
15300
15353
|
structured_inherits: TensorIteratorBase
|
15301
15354
|
structured: True
|
@@ -15424,7 +15477,7 @@
|
|
15424
15477
|
|
15425
15478
|
- func: special_modified_bessel_i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15426
15479
|
dispatch:
|
15427
|
-
CPU, CUDA: special_modified_bessel_i0_out
|
15480
|
+
CPU, CUDA, MPS: special_modified_bessel_i0_out
|
15428
15481
|
python_module: special
|
15429
15482
|
structured_inherits: TensorIteratorBase
|
15430
15483
|
structured: True
|
@@ -15439,7 +15492,7 @@
|
|
15439
15492
|
|
15440
15493
|
- func: special_modified_bessel_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15441
15494
|
dispatch:
|
15442
|
-
CPU, CUDA: special_modified_bessel_i1_out
|
15495
|
+
CPU, CUDA, MPS: special_modified_bessel_i1_out
|
15443
15496
|
python_module: special
|
15444
15497
|
structured_inherits: TensorIteratorBase
|
15445
15498
|
structured: True
|
@@ -15454,7 +15507,7 @@
|
|
15454
15507
|
|
15455
15508
|
- func: special_modified_bessel_k0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15456
15509
|
dispatch:
|
15457
|
-
CPU, CUDA: special_modified_bessel_k0_out
|
15510
|
+
CPU, CUDA, MPS: special_modified_bessel_k0_out
|
15458
15511
|
python_module: special
|
15459
15512
|
structured_inherits: TensorIteratorBase
|
15460
15513
|
structured: True
|
@@ -15469,7 +15522,7 @@
|
|
15469
15522
|
|
15470
15523
|
- func: special_modified_bessel_k1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15471
15524
|
dispatch:
|
15472
|
-
CPU, CUDA: special_modified_bessel_k1_out
|
15525
|
+
CPU, CUDA, MPS: special_modified_bessel_k1_out
|
15473
15526
|
python_module: special
|
15474
15527
|
structured_inherits: TensorIteratorBase
|
15475
15528
|
structured: True
|
@@ -15484,7 +15537,7 @@
|
|
15484
15537
|
|
15485
15538
|
- func: special_scaled_modified_bessel_k0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
|
15486
15539
|
dispatch:
|
15487
|
-
CPU, CUDA: special_scaled_modified_bessel_k0_out
|
15540
|
+
CPU, CUDA, MPS: special_scaled_modified_bessel_k0_out
|
15488
15541
|
python_module: special
|
15489
15542
|
structured_inherits: TensorIteratorBase
|
15490
15543
|
structured: True
|
@@ -15499,7 +15552,7 @@
|
|
15499
15552
|
|
15500
15553
|
- func: special_scaled_modified_bessel_k1.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
|
15501
15554
|
dispatch:
|
15502
|
-
CPU, CUDA: special_scaled_modified_bessel_k1_out
|
15555
|
+
CPU, CUDA, MPS: special_scaled_modified_bessel_k1_out
|
15503
15556
|
python_module: special
|
15504
15557
|
structured_inherits: TensorIteratorBase
|
15505
15558
|
structured: True
|
@@ -15710,7 +15763,7 @@
|
|
15710
15763
|
|
15711
15764
|
- func: special_spherical_bessel_j0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
|
15712
15765
|
dispatch:
|
15713
|
-
CPU, CUDA: special_spherical_bessel_j0_out
|
15766
|
+
CPU, CUDA, MPS: special_spherical_bessel_j0_out
|
15714
15767
|
python_module: special
|
15715
15768
|
structured_inherits: TensorIteratorBase
|
15716
15769
|
structured: True
|
@@ -15790,6 +15843,13 @@
|
|
15790
15843
|
CPU: _fused_adagrad_kernel_cpu_
|
15791
15844
|
autogen: _fused_adagrad, _fused_adagrad.out
|
15792
15845
|
|
15846
|
+
- func: _fused_adagrad_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor[] state_steps, *, Tensor lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15847
|
+
device_check: NoCheck
|
15848
|
+
variants: function
|
15849
|
+
dispatch:
|
15850
|
+
CPU: _fused_adagrad_kernel_cpu_
|
15851
|
+
autogen: _fused_adagrad.tensor_lr, _fused_adagrad.tensor_lr_out
|
15852
|
+
|
15793
15853
|
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15794
15854
|
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15795
15855
|
variants: function
|