torch-rb 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +12 -10
- data/codegen/native_functions.yaml +286 -244
- data/ext/torch/device.cpp +3 -0
- data/ext/torch/ext.cpp +1 -2
- data/ext/torch/ivalue.cpp +2 -0
- data/ext/torch/nn.cpp +3 -1
- data/ext/torch/ruby_arg_parser.cpp +7 -3
- data/ext/torch/ruby_arg_parser.h +5 -2
- data/ext/torch/templates.h +18 -36
- data/ext/torch/tensor.cpp +11 -8
- data/ext/torch/torch.cpp +6 -3
- data/ext/torch/utils.h +3 -1
- data/lib/torch/nn/conv1d.rb +11 -3
- data/lib/torch/nn/conv2d.rb +11 -3
- data/lib/torch/nn/conv3d.rb +11 -3
- data/lib/torch/nn/convnd.rb +1 -1
- data/lib/torch/nn/embedding.rb +10 -3
- data/lib/torch/nn/embedding_bag.rb +10 -3
- data/lib/torch/nn/functional.rb +20 -6
- data/lib/torch/nn/functional_attention.rb +30 -15
- data/lib/torch/nn/multihead_attention.rb +17 -7
- data/lib/torch/nn/rnn_base.rb +10 -3
- data/lib/torch/nn/transformer.rb +19 -10
- data/lib/torch/nn/transformer_decoder_layer.rb +7 -4
- data/lib/torch/nn/transformer_encoder_layer.rb +7 -4
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +1 -1
- metadata +3 -3
@@ -288,13 +288,13 @@
|
|
288
288
|
dispatch:
|
289
289
|
CPU: native_dropout_cpu
|
290
290
|
CUDA: native_dropout_cuda
|
291
|
-
NestedTensorCPU, NestedTensorCUDA: native_dropout_nested
|
291
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: native_dropout_nested
|
292
292
|
tags: [nondeterministic_seeded, core]
|
293
293
|
autogen: native_dropout.out
|
294
294
|
|
295
295
|
- func: native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor
|
296
296
|
dispatch:
|
297
|
-
CPU, NestedTensorCPU, NestedTensorCUDA: native_dropout_backward
|
297
|
+
CPU, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: native_dropout_backward
|
298
298
|
CUDA: native_dropout_backward_cuda
|
299
299
|
autogen: native_dropout_backward.out
|
300
300
|
tags: pointwise
|
@@ -342,7 +342,7 @@
|
|
342
342
|
CompositeExplicitAutograd: abs
|
343
343
|
SparseCPU, SparseCUDA: abs_sparse
|
344
344
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr
|
345
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs
|
345
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_abs
|
346
346
|
tags: [core, pointwise]
|
347
347
|
|
348
348
|
- func: abs_(Tensor(a!) self) -> Tensor(a!)
|
@@ -352,13 +352,12 @@
|
|
352
352
|
CompositeExplicitAutograd: abs_
|
353
353
|
SparseCPU, SparseCUDA: abs_sparse_
|
354
354
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_
|
355
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs_
|
355
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_abs_
|
356
356
|
|
357
357
|
- func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
358
358
|
device_check: NoCheck # TensorIterator
|
359
359
|
dispatch:
|
360
|
-
CPU, CUDA: abs_out
|
361
|
-
MPS: abs_out_mps
|
360
|
+
CPU, CUDA, MPS: abs_out
|
362
361
|
SparseCPU, SparseCUDA: abs_sparse_out
|
363
362
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_out
|
364
363
|
tags: pointwise
|
@@ -431,7 +430,7 @@
|
|
431
430
|
dispatch:
|
432
431
|
SparseCPU, SparseCUDA: sgn_sparse
|
433
432
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr
|
434
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn
|
433
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sgn
|
435
434
|
tags: pointwise
|
436
435
|
|
437
436
|
- func: sgn_(Tensor(a!) self) -> Tensor(a!)
|
@@ -440,7 +439,7 @@
|
|
440
439
|
dispatch:
|
441
440
|
SparseCPU, SparseCUDA: sgn_sparse_
|
442
441
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_
|
443
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn_
|
442
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sgn_
|
444
443
|
tags: pointwise
|
445
444
|
|
446
445
|
- func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -527,8 +526,7 @@
|
|
527
526
|
structured: True
|
528
527
|
structured_inherits: TensorIteratorBase
|
529
528
|
dispatch:
|
530
|
-
CPU, CUDA: acos_out
|
531
|
-
MPS: acos_out_mps
|
529
|
+
CPU, CUDA, MPS: acos_out
|
532
530
|
tags: pointwise
|
533
531
|
|
534
532
|
# arccos, alias of acos
|
@@ -560,7 +558,7 @@
|
|
560
558
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr
|
561
559
|
MkldnnCPU: mkldnn_add
|
562
560
|
ZeroTensor: add_zerotensor
|
563
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
561
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
564
562
|
tags: [core, pointwise]
|
565
563
|
|
566
564
|
- func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
@@ -571,7 +569,7 @@
|
|
571
569
|
SparseCPU, SparseCUDA, SparseMeta: add_sparse_
|
572
570
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr_
|
573
571
|
MkldnnCPU: mkldnn_add_
|
574
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
572
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
575
573
|
tags: pointwise
|
576
574
|
|
577
575
|
- func: add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
@@ -703,7 +701,7 @@
|
|
703
701
|
structured_delegate: all.out
|
704
702
|
variants: function, method
|
705
703
|
dispatch:
|
706
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_all
|
704
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_all
|
707
705
|
|
708
706
|
|
709
707
|
- func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
|
@@ -942,7 +940,7 @@
|
|
942
940
|
- func: as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a)
|
943
941
|
variants: function, method
|
944
942
|
dispatch:
|
945
|
-
ZeroTensor, CPU, CUDA: as_strided_tensorimpl
|
943
|
+
ZeroTensor, CPU, CUDA, MTIA: as_strided_tensorimpl
|
946
944
|
Meta: as_strided_tensorimpl_meta_symint
|
947
945
|
MPS: as_strided_tensorimpl_mps
|
948
946
|
QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
|
@@ -982,8 +980,7 @@
|
|
982
980
|
structured: True
|
983
981
|
structured_inherits: TensorIteratorBase
|
984
982
|
dispatch:
|
985
|
-
CPU, CUDA: asin_out
|
986
|
-
MPS: asin_out_mps
|
983
|
+
CPU, CUDA, MPS: asin_out
|
987
984
|
SparseCPU, SparseCUDA: asin_sparse_out
|
988
985
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_out
|
989
986
|
tags: pointwise
|
@@ -1020,8 +1017,7 @@
|
|
1020
1017
|
structured: True
|
1021
1018
|
structured_inherits: TensorIteratorBase
|
1022
1019
|
dispatch:
|
1023
|
-
CPU, CUDA: atan_out
|
1024
|
-
MPS: atan_out_mps
|
1020
|
+
CPU, CUDA, MPS: atan_out
|
1025
1021
|
SparseCPU, SparseCUDA: atan_sparse_out
|
1026
1022
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_out
|
1027
1023
|
tags: pointwise
|
@@ -1073,6 +1069,16 @@
|
|
1073
1069
|
XPU: baddbmm_out_xpu
|
1074
1070
|
SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
|
1075
1071
|
|
1072
|
+
- func: baddbmm.dtype(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
1073
|
+
variants: function
|
1074
|
+
dispatch:
|
1075
|
+
CUDA: _baddbmm_dtype_cuda
|
1076
|
+
|
1077
|
+
- func: baddbmm.dtype_out(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
1078
|
+
variants: function
|
1079
|
+
dispatch:
|
1080
|
+
CUDA: _baddbmm_out_dtype_cuda
|
1081
|
+
|
1076
1082
|
- func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
1077
1083
|
dispatch:
|
1078
1084
|
CompositeExplicitAutograd: bartlett_window
|
@@ -1185,7 +1191,7 @@
|
|
1185
1191
|
CompositeExplicitAutograd: binary_cross_entropy_with_logits
|
1186
1192
|
autogen: binary_cross_entropy_with_logits.out
|
1187
1193
|
|
1188
|
-
- func: bincount(Tensor self, Tensor? weights=None,
|
1194
|
+
- func: bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor
|
1189
1195
|
variants: function, method
|
1190
1196
|
dispatch:
|
1191
1197
|
CPU: _bincount_cpu
|
@@ -1211,8 +1217,7 @@
|
|
1211
1217
|
structured: True
|
1212
1218
|
structured_inherits: TensorIteratorBase
|
1213
1219
|
dispatch:
|
1214
|
-
CPU, CUDA: bitwise_not_out
|
1215
|
-
MPS: bitwise_not_out_mps
|
1220
|
+
CPU, CUDA, MPS, MTIA: bitwise_not_out
|
1216
1221
|
tags: pointwise
|
1217
1222
|
|
1218
1223
|
- func: copysign.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1262,7 +1267,7 @@
|
|
1262
1267
|
variants: function, method
|
1263
1268
|
dispatch:
|
1264
1269
|
CompositeExplicitAutograd: logical_not
|
1265
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not
|
1270
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_logical_not
|
1266
1271
|
tags: [core, pointwise]
|
1267
1272
|
|
1268
1273
|
- func: logical_not_(Tensor(a!) self) -> Tensor(a!)
|
@@ -1270,7 +1275,7 @@
|
|
1270
1275
|
variants: method
|
1271
1276
|
dispatch:
|
1272
1277
|
CompositeExplicitAutograd: logical_not_
|
1273
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not_
|
1278
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_logical_not_
|
1274
1279
|
tags: pointwise
|
1275
1280
|
|
1276
1281
|
- func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1318,7 +1323,7 @@
|
|
1318
1323
|
- func: logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
1319
1324
|
device_check: NoCheck # TensorIterator
|
1320
1325
|
dispatch:
|
1321
|
-
CPU, CUDA: logical_and_out
|
1326
|
+
CPU, CUDA, MTIA: logical_and_out
|
1322
1327
|
MPS: logical_and_out_mps
|
1323
1328
|
tags: pointwise
|
1324
1329
|
|
@@ -1339,7 +1344,7 @@
|
|
1339
1344
|
- func: logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
1340
1345
|
device_check: NoCheck # TensorIterator
|
1341
1346
|
dispatch:
|
1342
|
-
CPU, CUDA: logical_or_out
|
1347
|
+
CPU, CUDA, MTIA: logical_or_out
|
1343
1348
|
MPS: logical_or_out_mps
|
1344
1349
|
tags: pointwise
|
1345
1350
|
|
@@ -1375,6 +1380,16 @@
|
|
1375
1380
|
SparseCUDA: bmm_out_sparse_cuda
|
1376
1381
|
SparseCsrCUDA: bmm_out_sparse_csr_cuda
|
1377
1382
|
|
1383
|
+
- func: bmm.dtype(Tensor self, Tensor mat2, ScalarType out_dtype) -> Tensor
|
1384
|
+
variants: function
|
1385
|
+
dispatch:
|
1386
|
+
CUDA: _bmm_dtype_cuda
|
1387
|
+
|
1388
|
+
- func: bmm.dtype_out(Tensor self, Tensor mat2, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
|
1389
|
+
variants: function
|
1390
|
+
dispatch:
|
1391
|
+
CUDA: _bmm_out_dtype_cuda
|
1392
|
+
|
1378
1393
|
- func: broadcast_tensors(Tensor[] tensors) -> Tensor[]
|
1379
1394
|
device_check: NoCheck
|
1380
1395
|
device_guard: False
|
@@ -1394,7 +1409,7 @@
|
|
1394
1409
|
dispatch:
|
1395
1410
|
SparseCPU, SparseCUDA: cat_sparse
|
1396
1411
|
QuantizedCPU: cat_quantized_cpu
|
1397
|
-
NestedTensorCPU, NestedTensorCUDA: cat_nested
|
1412
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: cat_nested
|
1398
1413
|
tags: core
|
1399
1414
|
|
1400
1415
|
- func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1482,7 +1497,7 @@
|
|
1482
1497
|
device_guard: False
|
1483
1498
|
dispatch:
|
1484
1499
|
CompositeImplicitAutograd: chunk
|
1485
|
-
NestedTensorCPU, NestedTensorCUDA: chunk_nested_tensor
|
1500
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: chunk_nested_tensor
|
1486
1501
|
|
1487
1502
|
- func: tensor_split.sections(Tensor(a -> *) self, SymInt sections, int dim=0) -> Tensor(a)[]
|
1488
1503
|
variants: function, method
|
@@ -1529,7 +1544,7 @@
|
|
1529
1544
|
structured: True
|
1530
1545
|
structured_inherits: TensorIteratorBase
|
1531
1546
|
dispatch:
|
1532
|
-
CPU, CUDA: clamp_out
|
1547
|
+
CPU, CUDA, MTIA: clamp_out
|
1533
1548
|
MPS: clamp_out_mps
|
1534
1549
|
tags: pointwise
|
1535
1550
|
|
@@ -1569,7 +1584,7 @@
|
|
1569
1584
|
structured: True
|
1570
1585
|
structured_inherits: TensorIteratorBase
|
1571
1586
|
dispatch:
|
1572
|
-
CPU, CUDA: clamp_max_out
|
1587
|
+
CPU, CUDA, MTIA: clamp_max_out
|
1573
1588
|
MPS: clamp_max_out_mps
|
1574
1589
|
tags: pointwise
|
1575
1590
|
|
@@ -1609,7 +1624,7 @@
|
|
1609
1624
|
structured: True
|
1610
1625
|
structured_inherits: TensorIteratorBase
|
1611
1626
|
dispatch:
|
1612
|
-
CPU, CUDA: clamp_min_out
|
1627
|
+
CPU, CUDA, MTIA: clamp_min_out
|
1613
1628
|
MPS: clamp_min_out_mps
|
1614
1629
|
tags: pointwise
|
1615
1630
|
|
@@ -1658,8 +1673,7 @@
|
|
1658
1673
|
|
1659
1674
|
- func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
|
1660
1675
|
dispatch:
|
1661
|
-
CPU, CUDA: complex_out
|
1662
|
-
MPS: complex_out_mps
|
1676
|
+
CPU, CUDA, MPS: complex_out
|
1663
1677
|
|
1664
1678
|
- func: polar(Tensor abs, Tensor angle) -> Tensor
|
1665
1679
|
variants: function
|
@@ -1668,8 +1682,7 @@
|
|
1668
1682
|
|
1669
1683
|
- func: polar.out(Tensor abs, Tensor angle, *, Tensor(a!) out) -> Tensor(a!)
|
1670
1684
|
dispatch:
|
1671
|
-
CPU, CUDA: polar_out
|
1672
|
-
MPS: polar_out_mps
|
1685
|
+
CPU, CUDA, MPS: polar_out
|
1673
1686
|
|
1674
1687
|
- func: constant_pad_nd(Tensor self, SymInt[] pad, Scalar value=0) -> Tensor
|
1675
1688
|
variants: function
|
@@ -1781,7 +1794,7 @@
|
|
1781
1794
|
SparseCPU, SparseCUDA: copy_sparse_wrapper_
|
1782
1795
|
CompositeExplicitAutograd: copy_
|
1783
1796
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: copy_sparse_compressed_
|
1784
|
-
NestedTensorCPU, NestedTensorCUDA: copy_nested_
|
1797
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: copy_nested_
|
1785
1798
|
autogen: copy.out
|
1786
1799
|
|
1787
1800
|
- func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
|
@@ -1801,7 +1814,7 @@
|
|
1801
1814
|
variants: function, method
|
1802
1815
|
structured_delegate: cos.out
|
1803
1816
|
dispatch:
|
1804
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_cos
|
1817
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_cos
|
1805
1818
|
tags: [core, pointwise]
|
1806
1819
|
|
1807
1820
|
- func: cos_(Tensor(a!) self) -> Tensor(a!)
|
@@ -1815,8 +1828,7 @@
|
|
1815
1828
|
structured: True
|
1816
1829
|
structured_inherits: TensorIteratorBase
|
1817
1830
|
dispatch:
|
1818
|
-
CPU, CUDA: cos_out
|
1819
|
-
MPS: cos_out_mps
|
1831
|
+
CPU, CUDA, MPS, MTIA: cos_out
|
1820
1832
|
tags: pointwise
|
1821
1833
|
|
1822
1834
|
- func: cosh(Tensor self) -> Tensor
|
@@ -1836,8 +1848,7 @@
|
|
1836
1848
|
structured: True
|
1837
1849
|
structured_inherits: TensorIteratorBase
|
1838
1850
|
dispatch:
|
1839
|
-
CPU, CUDA: cosh_out
|
1840
|
-
MPS: cosh_out_mps
|
1851
|
+
CPU, CUDA, MPS: cosh_out
|
1841
1852
|
tags: pointwise
|
1842
1853
|
|
1843
1854
|
- func: cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor
|
@@ -1951,6 +1962,7 @@
|
|
1951
1962
|
dispatch:
|
1952
1963
|
CPU: cummax_helper_cpu
|
1953
1964
|
CUDA: cummax_helper_cuda
|
1965
|
+
MPS: cummax_helper_mps
|
1954
1966
|
|
1955
1967
|
- func: cummin(Tensor self, int dim) -> (Tensor values, Tensor indices)
|
1956
1968
|
device_check: NoCheck # TensorIterator
|
@@ -1975,6 +1987,7 @@
|
|
1975
1987
|
dispatch:
|
1976
1988
|
CPU: cummin_helper_cpu
|
1977
1989
|
CUDA: cummin_helper_cuda
|
1990
|
+
MPS: cummin_helper_mps
|
1978
1991
|
|
1979
1992
|
- func: cummaxmin_backward(Tensor grad, Tensor input, Tensor indices, int dim) -> Tensor
|
1980
1993
|
variants: function
|
@@ -2139,7 +2152,7 @@
|
|
2139
2152
|
dispatch:
|
2140
2153
|
SparseCPU, SparseCUDA: div_sparse
|
2141
2154
|
ZeroTensor: div_zerotensor
|
2142
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_div_Tensor
|
2155
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_div_Tensor
|
2143
2156
|
tags: [core, pointwise]
|
2144
2157
|
|
2145
2158
|
- func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -2155,8 +2168,7 @@
|
|
2155
2168
|
structured: True
|
2156
2169
|
structured_inherits: TensorIteratorBase
|
2157
2170
|
dispatch:
|
2158
|
-
CPU, CUDA: div_out
|
2159
|
-
MPS: div_out_mps
|
2171
|
+
CPU, CUDA, MPS: div_out
|
2160
2172
|
SparseCPU, SparseCUDA: div_out_sparse_zerodim
|
2161
2173
|
tags: pointwise
|
2162
2174
|
|
@@ -2181,8 +2193,7 @@
|
|
2181
2193
|
structured: True
|
2182
2194
|
structured_inherits: TensorIteratorBase
|
2183
2195
|
dispatch:
|
2184
|
-
CPU, CUDA: div_out_mode
|
2185
|
-
MPS: div_out_mode_mps
|
2196
|
+
CPU, CUDA, MPS: div_out_mode
|
2186
2197
|
SparseCPU, SparseCUDA: div_out_sparse_zerodim
|
2187
2198
|
tags: pointwise
|
2188
2199
|
|
@@ -2192,7 +2203,7 @@
|
|
2192
2203
|
variants: function, method
|
2193
2204
|
dispatch:
|
2194
2205
|
CompositeExplicitAutograd: div
|
2195
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_div_Scalar
|
2206
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_div_Scalar
|
2196
2207
|
tags: [core, pointwise]
|
2197
2208
|
|
2198
2209
|
- func: div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
@@ -2292,7 +2303,7 @@
|
|
2292
2303
|
- func: embedding(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor
|
2293
2304
|
dispatch:
|
2294
2305
|
CompositeExplicitAutograd: embedding_symint
|
2295
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_embedding
|
2306
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_embedding
|
2296
2307
|
autogen: embedding.out
|
2297
2308
|
tags: core
|
2298
2309
|
|
@@ -2498,7 +2509,7 @@
|
|
2498
2509
|
QuantizedCPU, QuantizedCUDA: empty_like_quantized
|
2499
2510
|
SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
|
2500
2511
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
|
2501
|
-
NestedTensorCPU, NestedTensorCUDA: empty_like_nested
|
2512
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: empty_like_nested
|
2502
2513
|
autogen: empty_like.out
|
2503
2514
|
|
2504
2515
|
- func: empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -2534,8 +2545,7 @@
|
|
2534
2545
|
structured: True
|
2535
2546
|
structured_inherits: TensorIteratorBase
|
2536
2547
|
dispatch:
|
2537
|
-
CPU, CUDA: erf_out
|
2538
|
-
MPS: erf_out_mps
|
2548
|
+
CPU, CUDA, MPS, MTIA: erf_out
|
2539
2549
|
SparseCPU, SparseCUDA: erf_sparse_out
|
2540
2550
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_out
|
2541
2551
|
tags: pointwise
|
@@ -2557,7 +2567,7 @@
|
|
2557
2567
|
structured: True
|
2558
2568
|
structured_inherits: TensorIteratorBase
|
2559
2569
|
dispatch:
|
2560
|
-
CPU, CUDA: erfc_out
|
2570
|
+
CPU, CUDA, MPS: erfc_out
|
2561
2571
|
tags: pointwise
|
2562
2572
|
|
2563
2573
|
- func: exp(Tensor self) -> Tensor
|
@@ -2577,7 +2587,7 @@
|
|
2577
2587
|
structured: True
|
2578
2588
|
structured_inherits: TensorIteratorBase
|
2579
2589
|
dispatch:
|
2580
|
-
CPU, CUDA, MPS: exp_out
|
2590
|
+
CPU, CUDA, MPS, MTIA: exp_out
|
2581
2591
|
tags: pointwise
|
2582
2592
|
|
2583
2593
|
- func: exp2(Tensor self) -> Tensor
|
@@ -2594,8 +2604,7 @@
|
|
2594
2604
|
structured: True
|
2595
2605
|
structured_inherits: TensorIteratorBase
|
2596
2606
|
dispatch:
|
2597
|
-
CPU, CUDA: exp2_out
|
2598
|
-
MPS: exp2_out_mps
|
2607
|
+
CPU, CUDA, MPS: exp2_out
|
2599
2608
|
tags: pointwise
|
2600
2609
|
|
2601
2610
|
- func: expm1(Tensor self) -> Tensor
|
@@ -2621,8 +2630,7 @@
|
|
2621
2630
|
structured: True
|
2622
2631
|
structured_inherits: TensorIteratorBase
|
2623
2632
|
dispatch:
|
2624
|
-
CPU, CUDA: expm1_out
|
2625
|
-
MPS: expm1_out_mps
|
2633
|
+
CPU, CUDA, MPS: expm1_out
|
2626
2634
|
SparseCPU, SparseCUDA: expm1_sparse_out
|
2627
2635
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_out
|
2628
2636
|
tags: pointwise
|
@@ -2703,7 +2711,7 @@
|
|
2703
2711
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
2704
2712
|
Meta: fill_meta_
|
2705
2713
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: fill_sparse_csr_
|
2706
|
-
NestedTensorCPU, NestedTensorCUDA: fill_nested_
|
2714
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: fill_nested_
|
2707
2715
|
autogen: fill.Scalar_out
|
2708
2716
|
|
2709
2717
|
- func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
|
@@ -2714,7 +2722,7 @@
|
|
2714
2722
|
MPS: fill_tensor_mps_
|
2715
2723
|
QuantizedCPU, QuantizedCUDA: fill_quantized_
|
2716
2724
|
Meta: fill_meta_
|
2717
|
-
NestedTensorCPU, NestedTensorCUDA: fill_nested_
|
2725
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: fill_nested_
|
2718
2726
|
autogen: fill.Tensor_out
|
2719
2727
|
|
2720
2728
|
- func: floor(Tensor self) -> Tensor
|
@@ -2749,23 +2757,20 @@
|
|
2749
2757
|
device_check: NoCheck # TensorIterator
|
2750
2758
|
variants: function, method
|
2751
2759
|
dispatch:
|
2752
|
-
CPU, CUDA: floor_divide
|
2753
|
-
MPS: floor_divide_mps
|
2760
|
+
CPU, CUDA, MPS: floor_divide
|
2754
2761
|
SparseCPU, SparseCUDA: floor_divide_sparse
|
2755
2762
|
|
2756
2763
|
- func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
2757
2764
|
device_check: NoCheck # TensorIterator
|
2758
2765
|
variants: method
|
2759
2766
|
dispatch:
|
2760
|
-
CPU, CUDA: floor_divide_
|
2761
|
-
MPS: floor_divide_mps_
|
2767
|
+
CPU, CUDA, MPS: floor_divide_
|
2762
2768
|
SparseCPU, SparseCUDA: floor_divide_sparse_
|
2763
2769
|
|
2764
2770
|
- func: floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
2765
2771
|
device_check: NoCheck # TensorIterator
|
2766
2772
|
dispatch:
|
2767
|
-
CPU, CUDA: floor_divide_out
|
2768
|
-
MPS: floor_divide_out_mps
|
2773
|
+
CPU, CUDA, MPS: floor_divide_out
|
2769
2774
|
SparseCPU, SparseCUDA: floor_divide_out_sparse_zerodim
|
2770
2775
|
|
2771
2776
|
- func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
|
@@ -3100,6 +3105,7 @@
|
|
3100
3105
|
- dim -> int dim
|
3101
3106
|
dispatch:
|
3102
3107
|
CPU, CUDA: index_copy_out
|
3108
|
+
MPS: index_copy_out_mps
|
3103
3109
|
|
3104
3110
|
- func: index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
|
3105
3111
|
variants: method
|
@@ -3170,7 +3176,7 @@
|
|
3170
3176
|
variants: function
|
3171
3177
|
structured: True
|
3172
3178
|
dispatch:
|
3173
|
-
CPU, CUDA: isin_Tensor_Scalar_out
|
3179
|
+
CPU, CUDA, MPS: isin_Tensor_Scalar_out
|
3174
3180
|
|
3175
3181
|
- func: isin.Tensor_Scalar(Tensor elements, Scalar test_element, *, bool assume_unique=False, bool invert=False) -> Tensor
|
3176
3182
|
variants: function
|
@@ -3181,6 +3187,7 @@
|
|
3181
3187
|
structured: True
|
3182
3188
|
dispatch:
|
3183
3189
|
CPU, CUDA: isin_Scalar_Tensor_out
|
3190
|
+
MPS: isin_Scalar_Tensor_out_mps
|
3184
3191
|
|
3185
3192
|
- func: isin.Scalar_Tensor(Scalar element, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor
|
3186
3193
|
variants: function
|
@@ -3191,8 +3198,8 @@
|
|
3191
3198
|
device_check: NoCheck
|
3192
3199
|
device_guard: False
|
3193
3200
|
dispatch:
|
3194
|
-
CPU, CUDA, MPS: isnan
|
3195
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isnan
|
3201
|
+
CPU, CUDA, MPS, MTIA: isnan
|
3202
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isnan
|
3196
3203
|
SparseCPU, SparseCUDA: isnan_sparse
|
3197
3204
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isnan_sparse_csr
|
3198
3205
|
autogen: isnan.out
|
@@ -3243,7 +3250,7 @@
|
|
3243
3250
|
device_check: NoCheck
|
3244
3251
|
device_guard: False
|
3245
3252
|
dispatch:
|
3246
|
-
NestedTensorCPU, NestedTensorCUDA: nested_is_same_size
|
3253
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_is_same_size
|
3247
3254
|
CompositeExplicitAutograd: is_same_size
|
3248
3255
|
|
3249
3256
|
- func: is_signed(Tensor self) -> bool
|
@@ -3265,20 +3272,20 @@
|
|
3265
3272
|
|
3266
3273
|
- func: kron.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
3267
3274
|
|
3268
|
-
- func: kthvalue(Tensor self,
|
3275
|
+
- func: kthvalue(Tensor self, SymInt k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
|
3269
3276
|
variants: function, method
|
3270
3277
|
dispatch:
|
3271
3278
|
CompositeExplicitAutograd: kthvalue
|
3272
3279
|
|
3273
|
-
- func: kthvalue.values(Tensor self,
|
3280
|
+
- func: kthvalue.values(Tensor self, SymInt k, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
3274
3281
|
dispatch:
|
3275
3282
|
CPU: kthvalue_out_cpu
|
3276
3283
|
CUDA: kthvalue_out_cuda
|
3277
3284
|
|
3278
|
-
- func: kthvalue.dimname(Tensor self,
|
3285
|
+
- func: kthvalue.dimname(Tensor self, SymInt k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
3279
3286
|
variants: function, method
|
3280
3287
|
|
3281
|
-
- func: kthvalue.dimname_out(Tensor self,
|
3288
|
+
- func: kthvalue.dimname_out(Tensor self, SymInt k, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
3282
3289
|
|
3283
3290
|
- func: layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor
|
3284
3291
|
dispatch:
|
@@ -3290,7 +3297,7 @@
|
|
3290
3297
|
CUDA: layer_norm_cuda
|
3291
3298
|
MPS: layer_norm_mps
|
3292
3299
|
CompositeExplicitAutograd: math_native_layer_norm
|
3293
|
-
NestedTensorCPU, NestedTensorCUDA: nested_layer_norm
|
3300
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_layer_norm
|
3294
3301
|
autogen: native_layer_norm.out
|
3295
3302
|
tags: core
|
3296
3303
|
|
@@ -3299,7 +3306,7 @@
|
|
3299
3306
|
CPU: layer_norm_backward_cpu
|
3300
3307
|
CUDA: layer_norm_backward_cuda
|
3301
3308
|
MPS: layer_norm_backward_mps
|
3302
|
-
NestedTensorCPU, NestedTensorCUDA: layer_norm_backward_nested
|
3309
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: layer_norm_backward_nested
|
3303
3310
|
autogen: native_layer_norm_backward.out
|
3304
3311
|
tags: core
|
3305
3312
|
|
@@ -3307,6 +3314,10 @@
|
|
3307
3314
|
dispatch:
|
3308
3315
|
CompositeImplicitAutograd: rms_norm_symint
|
3309
3316
|
|
3317
|
+
- func: _fused_rms_norm(Tensor input, int normalized_shape_ndim, Tensor weight, float eps) -> Tensor
|
3318
|
+
dispatch:
|
3319
|
+
MPS: _fused_rms_norm_mps
|
3320
|
+
|
3310
3321
|
- func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
|
3311
3322
|
variants: function, method
|
3312
3323
|
dispatch:
|
@@ -3323,7 +3334,7 @@
|
|
3323
3334
|
|
3324
3335
|
- func: nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)
|
3325
3336
|
dispatch:
|
3326
|
-
CPU, CUDA: nan_to_num_out
|
3337
|
+
CPU, CUDA, MTIA: nan_to_num_out
|
3327
3338
|
MPS: nan_to_num_out_mps
|
3328
3339
|
SparseCPU, SparseCUDA: nan_to_num_sparse_out
|
3329
3340
|
tags: pointwise
|
@@ -3332,12 +3343,12 @@
|
|
3332
3343
|
python_module: nn
|
3333
3344
|
dispatch:
|
3334
3345
|
CompositeImplicitAutograd: linear
|
3335
|
-
NestedTensorCPU, NestedTensorCUDA: nested_linear
|
3346
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_linear
|
3336
3347
|
MPS: _mps_linear
|
3337
3348
|
|
3338
3349
|
- func: linear_backward(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
3339
3350
|
dispatch:
|
3340
|
-
NestedTensorCPU, NestedTensorCUDA: nested_linear_backward
|
3351
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_linear_backward
|
3341
3352
|
MPS: mps_linear_backward
|
3342
3353
|
autogen: linear_backward.out
|
3343
3354
|
|
@@ -3371,7 +3382,7 @@
|
|
3371
3382
|
dispatch:
|
3372
3383
|
CUDA: _cslt_compress
|
3373
3384
|
|
3374
|
-
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0, int split_k=1,
|
3385
|
+
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0, int split_k=1, int split_k_mode=-1) -> Tensor
|
3375
3386
|
dispatch:
|
3376
3387
|
CUDA: _cslt_sparse_mm
|
3377
3388
|
tags: needs_fixed_stride_order
|
@@ -3496,8 +3507,7 @@
|
|
3496
3507
|
structured: True
|
3497
3508
|
structured_inherits: TensorIteratorBase
|
3498
3509
|
dispatch:
|
3499
|
-
CPU, CUDA: log_out
|
3500
|
-
MPS: log_out_mps
|
3510
|
+
CPU, CUDA, MPS, MTIA: log_out
|
3501
3511
|
tags: pointwise
|
3502
3512
|
|
3503
3513
|
- func: log10(Tensor self) -> Tensor
|
@@ -3517,8 +3527,7 @@
|
|
3517
3527
|
structured: True
|
3518
3528
|
structured_inherits: TensorIteratorBase
|
3519
3529
|
dispatch:
|
3520
|
-
CPU, CUDA: log10_out
|
3521
|
-
MPS: log10_out_mps
|
3530
|
+
CPU, CUDA, MPS: log10_out
|
3522
3531
|
tags: pointwise
|
3523
3532
|
|
3524
3533
|
- func: log1p(Tensor self) -> Tensor
|
@@ -3544,8 +3553,7 @@
|
|
3544
3553
|
structured: True
|
3545
3554
|
structured_inherits: TensorIteratorBase
|
3546
3555
|
dispatch:
|
3547
|
-
CPU, CUDA: log1p_out
|
3548
|
-
MPS: log1p_out_mps
|
3556
|
+
CPU, CUDA, MPS: log1p_out
|
3549
3557
|
SparseCPU, SparseCUDA: log1p_sparse_out
|
3550
3558
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_out
|
3551
3559
|
tags: pointwise
|
@@ -3567,8 +3575,7 @@
|
|
3567
3575
|
structured: True
|
3568
3576
|
structured_inherits: TensorIteratorBase
|
3569
3577
|
dispatch:
|
3570
|
-
CPU, CUDA: log2_out
|
3571
|
-
MPS: log2_out_mps
|
3578
|
+
CPU, CUDA, MPS, MTIA: log2_out
|
3572
3579
|
tags: pointwise
|
3573
3580
|
|
3574
3581
|
- func: logaddexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -3715,6 +3722,7 @@
|
|
3715
3722
|
dispatch:
|
3716
3723
|
CPU: log_softmax_cpu_out
|
3717
3724
|
CUDA: log_softmax_cuda_out
|
3725
|
+
MTIA: log_softmax_mtia_out
|
3718
3726
|
MPS: log_softmax_mps_out
|
3719
3727
|
|
3720
3728
|
- func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
|
@@ -3725,6 +3733,7 @@
|
|
3725
3733
|
dispatch:
|
3726
3734
|
CPU: log_softmax_backward_cpu_out
|
3727
3735
|
CUDA: log_softmax_backward_cuda_out
|
3736
|
+
MTIA: log_softmax_backward_mtia_out
|
3728
3737
|
MPS: log_softmax_backward_mps_out
|
3729
3738
|
|
3730
3739
|
- func: _logcumsumexp(Tensor self, int dim) -> Tensor
|
@@ -3776,17 +3785,17 @@
|
|
3776
3785
|
variants: function, method
|
3777
3786
|
dispatch:
|
3778
3787
|
CompositeImplicitAutograd: matmul
|
3779
|
-
NestedTensorCPU, NestedTensorCUDA: matmul_nested
|
3788
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_nested
|
3780
3789
|
|
3781
3790
|
- func: matmul_backward(Tensor grad, Tensor self, Tensor other, bool[2] mask) -> (Tensor, Tensor)
|
3782
3791
|
dispatch:
|
3783
|
-
NestedTensorCPU, NestedTensorCUDA: matmul_backward_nested
|
3792
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_backward_nested
|
3784
3793
|
autogen: matmul_backward.out
|
3785
3794
|
|
3786
3795
|
- func: matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
3787
3796
|
dispatch:
|
3788
3797
|
CompositeImplicitAutograd: matmul_out
|
3789
|
-
NestedTensorCPU, NestedTensorCUDA: matmul_out_nested
|
3798
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: matmul_out_nested
|
3790
3799
|
|
3791
3800
|
# Alias to linalg.matrix_power
|
3792
3801
|
- func: matrix_power(Tensor self, int n) -> Tensor
|
@@ -3848,7 +3857,7 @@
|
|
3848
3857
|
precomputed:
|
3849
3858
|
- dim -> int dim
|
3850
3859
|
dispatch:
|
3851
|
-
CPU, CUDA: max_out
|
3860
|
+
CPU, CUDA, MTIA: max_out
|
3852
3861
|
MPS: max_out_mps
|
3853
3862
|
|
3854
3863
|
- func: max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
@@ -4004,6 +4013,7 @@
|
|
4004
4013
|
dispatch:
|
4005
4014
|
CPU: nanmedian_cpu
|
4006
4015
|
CUDA: nanmedian_cuda
|
4016
|
+
MPS: nanmedian_mps
|
4007
4017
|
autogen: nanmedian.out
|
4008
4018
|
|
4009
4019
|
- func: nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
@@ -4015,6 +4025,7 @@
|
|
4015
4025
|
dispatch:
|
4016
4026
|
CPU: nanmedian_out_cpu
|
4017
4027
|
CUDA: nanmedian_out_cuda
|
4028
|
+
MPS: nanmedian_out_mps
|
4018
4029
|
|
4019
4030
|
- func: nanmedian.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
4020
4031
|
variants: function, method
|
@@ -4035,7 +4046,7 @@
|
|
4035
4046
|
precomputed:
|
4036
4047
|
- dim -> int dim
|
4037
4048
|
dispatch:
|
4038
|
-
CPU, CUDA: min_out
|
4049
|
+
CPU, CUDA, MTIA: min_out
|
4039
4050
|
MPS: min_out_mps
|
4040
4051
|
|
4041
4052
|
- func: min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
|
@@ -4143,11 +4154,20 @@
|
|
4143
4154
|
dispatch:
|
4144
4155
|
CPU: mm_out_cpu
|
4145
4156
|
CUDA: mm_out_cuda
|
4157
|
+
MTIA: mm_out_mtia
|
4146
4158
|
MPS: mm_out_mps
|
4147
4159
|
XPU: mm_out_xpu
|
4148
4160
|
SparseCPU, SparseCUDA: _sparse_mm_out
|
4149
4161
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm_out
|
4150
4162
|
|
4163
|
+
- func: mm.dtype(Tensor self, Tensor mat2, ScalarType out_dtype) -> Tensor
|
4164
|
+
dispatch:
|
4165
|
+
CUDA: _mm_dtype_cuda
|
4166
|
+
|
4167
|
+
- func: mm.dtype_out(Tensor self, Tensor mat2, ScalarType out_dtype, *, Tensor(a!) out) -> Tensor(a!)
|
4168
|
+
dispatch:
|
4169
|
+
CUDA: _mm_dtype_out_cuda
|
4170
|
+
|
4151
4171
|
- func: _int_mm(Tensor self, Tensor mat2) -> Tensor
|
4152
4172
|
dispatch:
|
4153
4173
|
CPU: _int_mm_cpu
|
@@ -4168,6 +4188,10 @@
|
|
4168
4188
|
MPS: _weight_int4pack_mm_mps
|
4169
4189
|
CUDA: _weight_int4pack_mm_cuda
|
4170
4190
|
|
4191
|
+
- func: _weight_int4pack_mm_with_scales_and_zeros(Tensor self, Tensor mat2, int qGroupSize, Tensor qScale, Tensor qZeros) -> Tensor
|
4192
|
+
dispatch:
|
4193
|
+
XPU: _weight_int4pack_mm_xpu
|
4194
|
+
|
4171
4195
|
# Split int4 pack weight between cpu and other devices due to
|
4172
4196
|
# https://github.com/pytorch/ao/issues/1117#issuecomment-2451252756.
|
4173
4197
|
- func: _convert_weight_to_int4pack_for_cpu(Tensor self, int innerKTiles) -> Tensor
|
@@ -4226,7 +4250,7 @@
|
|
4226
4250
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr
|
4227
4251
|
MkldnnCPU: mkldnn_mul
|
4228
4252
|
ZeroTensor: mul_zerotensor
|
4229
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
|
4253
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul_Tensor
|
4230
4254
|
tags: [core, pointwise]
|
4231
4255
|
|
4232
4256
|
- func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -4237,7 +4261,7 @@
|
|
4237
4261
|
SparseCPU, SparseCUDA: mul_sparse_
|
4238
4262
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr_
|
4239
4263
|
MkldnnCPU: mkldnn_mul_
|
4240
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Tensor
|
4264
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul__Tensor
|
4241
4265
|
tags: pointwise
|
4242
4266
|
|
4243
4267
|
- func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -4245,8 +4269,7 @@
|
|
4245
4269
|
structured: True
|
4246
4270
|
structured_inherits: TensorIteratorBase
|
4247
4271
|
dispatch:
|
4248
|
-
CPU, CUDA: mul_out
|
4249
|
-
MPS: mul_out_mps
|
4272
|
+
CPU, CUDA, MPS: mul_out
|
4250
4273
|
SparseCPU: mul_out_sparse_cpu
|
4251
4274
|
SparseCUDA: mul_out_sparse_cuda
|
4252
4275
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr
|
@@ -4260,7 +4283,7 @@
|
|
4260
4283
|
dispatch:
|
4261
4284
|
CompositeExplicitAutograd: mul
|
4262
4285
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_scalar_sparse_csr
|
4263
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Scalar
|
4286
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul_Scalar
|
4264
4287
|
tags: [core, pointwise]
|
4265
4288
|
|
4266
4289
|
- func: mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
@@ -4269,7 +4292,7 @@
|
|
4269
4292
|
dispatch:
|
4270
4293
|
CompositeExplicitAutograd: mul_
|
4271
4294
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul__scalar_sparse_csr
|
4272
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Scalar
|
4295
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul__Scalar
|
4273
4296
|
autogen: mul.Scalar_out
|
4274
4297
|
tags: pointwise
|
4275
4298
|
# multiply, alias for mul
|
@@ -4335,7 +4358,7 @@
|
|
4335
4358
|
device_guard: False
|
4336
4359
|
dispatch:
|
4337
4360
|
CompositeImplicitAutograd: narrow_symint
|
4338
|
-
NestedTensorCPU, NestedTensorCUDA: narrow_nested_symint
|
4361
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: narrow_nested_symint
|
4339
4362
|
|
4340
4363
|
- func: narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)
|
4341
4364
|
variants: function, method
|
@@ -4474,7 +4497,7 @@
|
|
4474
4497
|
# NB: Although this composite mutates on the inside, it is
|
4475
4498
|
# non-differentiable so NonFunctional doesn't apply
|
4476
4499
|
CompositeExplicitAutograd: ones_like
|
4477
|
-
NestedTensorCPU, NestedTensorCUDA: ones_like
|
4500
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: ones_like
|
4478
4501
|
autogen: ones_like.out
|
4479
4502
|
|
4480
4503
|
- func: pairwise_distance(Tensor x1, Tensor x2, float p=2, float eps=1e-06, bool keepdim=False) -> Tensor
|
@@ -4756,6 +4779,14 @@
|
|
4756
4779
|
CompositeExplicitAutograd: randint_like
|
4757
4780
|
autogen: randint_like.out
|
4758
4781
|
|
4782
|
+
- func: randint_like.Tensor(Tensor self, Tensor high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
4783
|
+
tags: nondeterministic_seeded
|
4784
|
+
dispatch:
|
4785
|
+
# NB: Although this composite mutates on the inside, it is
|
4786
|
+
# non-differentiable so NonFunctional doesn't apply
|
4787
|
+
CompositeExplicitAutograd: randint_like
|
4788
|
+
autogen: randint_like.Tensor_out
|
4789
|
+
|
4759
4790
|
- func: randint_like.low_dtype(Tensor self, SymInt low, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
4760
4791
|
tags: nondeterministic_seeded
|
4761
4792
|
dispatch:
|
@@ -4865,7 +4896,7 @@
|
|
4865
4896
|
structured: True
|
4866
4897
|
structured_inherits: TensorIteratorBase
|
4867
4898
|
dispatch:
|
4868
|
-
CPU, CUDA: reciprocal_out
|
4899
|
+
CPU, CUDA, MTIA: reciprocal_out
|
4869
4900
|
MPS: reciprocal_out_mps
|
4870
4901
|
tags: pointwise
|
4871
4902
|
|
@@ -4876,7 +4907,7 @@
|
|
4876
4907
|
dispatch:
|
4877
4908
|
SparseCPU, SparseCUDA: neg_sparse
|
4878
4909
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr
|
4879
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg
|
4910
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_neg
|
4880
4911
|
tags: [core, pointwise]
|
4881
4912
|
|
4882
4913
|
- func: neg_(Tensor(a!) self) -> Tensor(a!)
|
@@ -4886,7 +4917,7 @@
|
|
4886
4917
|
dispatch:
|
4887
4918
|
SparseCPU, SparseCUDA: neg_sparse_
|
4888
4919
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_
|
4889
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg_
|
4920
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_neg_
|
4890
4921
|
tags: pointwise
|
4891
4922
|
|
4892
4923
|
- func: neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -4894,8 +4925,7 @@
|
|
4894
4925
|
structured: True
|
4895
4926
|
structured_inherits: TensorIteratorBase
|
4896
4927
|
dispatch:
|
4897
|
-
CPU, CUDA: neg_out
|
4898
|
-
MPS: neg_out_mps
|
4928
|
+
CPU, CUDA, MPS, MTIA: neg_out
|
4899
4929
|
SparseCPU, SparseCUDA: neg_out_sparse
|
4900
4930
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_out
|
4901
4931
|
tags: pointwise
|
@@ -4957,7 +4987,7 @@
|
|
4957
4987
|
device_check: NoCheck
|
4958
4988
|
device_guard: False
|
4959
4989
|
dispatch:
|
4960
|
-
CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor, MPS: _reshape_alias
|
4990
|
+
CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor, MPS, MTIA: _reshape_alias
|
4961
4991
|
# We don't need to support mkldnn since this is handled explicitly by the reshape operator.
|
4962
4992
|
|
4963
4993
|
- func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
|
@@ -5035,12 +5065,12 @@
|
|
5035
5065
|
device_check: NoCheck # TensorIterator
|
5036
5066
|
variants: function, method
|
5037
5067
|
dispatch:
|
5038
|
-
CPU, CUDA: relu
|
5068
|
+
CPU, CUDA, MTIA: relu
|
5039
5069
|
MPS: relu_mps
|
5040
5070
|
MkldnnCPU: mkldnn_relu
|
5041
5071
|
QuantizedCPU: relu_quantized_cpu
|
5042
5072
|
QuantizedCUDA: relu_quantized_cuda
|
5043
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu
|
5073
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_relu
|
5044
5074
|
SparseCPU, SparseCUDA: relu_sparse
|
5045
5075
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr
|
5046
5076
|
tags: [core, pointwise]
|
@@ -5049,12 +5079,12 @@
|
|
5049
5079
|
device_check: NoCheck # TensorIterator
|
5050
5080
|
variants: function, method
|
5051
5081
|
dispatch:
|
5052
|
-
CPU, CUDA: relu_
|
5082
|
+
CPU, CUDA, MTIA: relu_
|
5053
5083
|
MPS: relu_mps_
|
5054
5084
|
MkldnnCPU: mkldnn_relu_
|
5055
5085
|
QuantizedCPU: relu_quantized_cpu_
|
5056
5086
|
QuantizedCUDA: relu_quantized_cuda_
|
5057
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu_
|
5087
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_relu_
|
5058
5088
|
SparseCPU, SparseCUDA: relu_sparse_
|
5059
5089
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr_
|
5060
5090
|
autogen: relu.out
|
@@ -5100,7 +5130,7 @@
|
|
5100
5130
|
python_module: nn
|
5101
5131
|
dispatch:
|
5102
5132
|
QuantizedCPU: gelu_quantized_cpu_
|
5103
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
|
5133
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_gelu_
|
5104
5134
|
|
5105
5135
|
- func: gelu(Tensor self, *, str approximate='none') -> Tensor
|
5106
5136
|
structured_delegate: gelu.out
|
@@ -5110,7 +5140,7 @@
|
|
5110
5140
|
MkldnnCPU: mkldnn_gelu
|
5111
5141
|
QuantizedCPU: gelu_quantized_cpu
|
5112
5142
|
QuantizedCUDA: gelu_quantized_cuda
|
5113
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu
|
5143
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_gelu
|
5114
5144
|
tags: [core, pointwise]
|
5115
5145
|
|
5116
5146
|
- func: gelu_backward.grad_input(Tensor grad_output, Tensor self, *, str approximate='none', Tensor(a!) grad_input) -> Tensor(a!)
|
@@ -5127,7 +5157,7 @@
|
|
5127
5157
|
python_module: nn
|
5128
5158
|
dispatch:
|
5129
5159
|
MkldnnCPU: mkldnn_gelu_backward
|
5130
|
-
NestedTensorCPU, NestedTensorCUDA: gelu_backwards_nested
|
5160
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: gelu_backwards_nested
|
5131
5161
|
tags: pointwise
|
5132
5162
|
|
5133
5163
|
- func: infinitely_differentiable_gelu_backward(Tensor grad, Tensor self) -> Tensor
|
@@ -5141,7 +5171,7 @@
|
|
5141
5171
|
structured_inherits: TensorIteratorBase
|
5142
5172
|
device_check: NoCheck # TensorIterator
|
5143
5173
|
dispatch:
|
5144
|
-
CPU, CUDA: hardshrink_out
|
5174
|
+
CPU, CUDA, MPS: hardshrink_out
|
5145
5175
|
|
5146
5176
|
- func: hardshrink(Tensor self, Scalar lambd=0.5) -> Tensor
|
5147
5177
|
structured_delegate: hardshrink.out
|
@@ -5153,7 +5183,7 @@
|
|
5153
5183
|
structured: True
|
5154
5184
|
structured_inherits: TensorIteratorBase
|
5155
5185
|
dispatch:
|
5156
|
-
CPU, CUDA: hardshrink_backward_out
|
5186
|
+
CPU, CUDA, MPS: hardshrink_backward_out
|
5157
5187
|
|
5158
5188
|
- func: hardshrink_backward(Tensor grad_out, Tensor self, Scalar lambd) -> Tensor
|
5159
5189
|
structured_delegate: hardshrink_backward.grad_input
|
@@ -5176,8 +5206,7 @@
|
|
5176
5206
|
structured: True
|
5177
5207
|
structured_inherits: TensorIteratorBase
|
5178
5208
|
dispatch:
|
5179
|
-
CPU, CUDA: rsqrt_out
|
5180
|
-
MPS: rsqrt_out_mps
|
5209
|
+
CPU, CUDA, MPS, MTIA: rsqrt_out
|
5181
5210
|
tags: pointwise
|
5182
5211
|
|
5183
5212
|
- func: select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)
|
@@ -5192,7 +5221,7 @@
|
|
5192
5221
|
dispatch:
|
5193
5222
|
CompositeExplicitAutograd: select_symint
|
5194
5223
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_sparse_csr
|
5195
|
-
NestedTensorCPU, NestedTensorCUDA: select_nested
|
5224
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: select_nested
|
5196
5225
|
tags: core
|
5197
5226
|
|
5198
5227
|
- func: select_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt index) -> Tensor
|
@@ -5208,7 +5237,7 @@
|
|
5208
5237
|
device_check: NoCheck
|
5209
5238
|
device_guard: False
|
5210
5239
|
dispatch:
|
5211
|
-
NestedTensorCPU, NestedTensorCUDA: _nested_select_backward_symint
|
5240
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_select_backward_symint
|
5212
5241
|
|
5213
5242
|
- func: selu(Tensor self) -> Tensor
|
5214
5243
|
device_check: NoCheck # TensorIterator
|
@@ -5233,14 +5262,14 @@
|
|
5233
5262
|
structured_delegate: silu.out
|
5234
5263
|
python_module: nn
|
5235
5264
|
dispatch:
|
5236
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
|
5265
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_silu
|
5237
5266
|
tags: pointwise
|
5238
5267
|
|
5239
5268
|
- func: silu_(Tensor(a!) self) -> Tensor(a!)
|
5240
5269
|
structured_delegate: silu.out
|
5241
5270
|
python_module: nn
|
5242
5271
|
dispatch:
|
5243
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
|
5272
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_silu_
|
5244
5273
|
tags: pointwise
|
5245
5274
|
|
5246
5275
|
- func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -5248,7 +5277,7 @@
|
|
5248
5277
|
structured_inherits: TensorIteratorBase
|
5249
5278
|
python_module: nn
|
5250
5279
|
dispatch:
|
5251
|
-
CPU, CUDA: silu_out
|
5280
|
+
CPU, CUDA, MTIA: silu_out
|
5252
5281
|
MPS: silu_out_mps
|
5253
5282
|
tags: pointwise
|
5254
5283
|
|
@@ -5266,7 +5295,7 @@
|
|
5266
5295
|
python_module: nn
|
5267
5296
|
dispatch:
|
5268
5297
|
CompositeImplicitAutograd: math_silu_backward
|
5269
|
-
NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
|
5298
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: silu_backward_nested
|
5270
5299
|
tags: pointwise
|
5271
5300
|
|
5272
5301
|
- func: mish(Tensor self) -> Tensor
|
@@ -5315,14 +5344,13 @@
|
|
5315
5344
|
structured: True
|
5316
5345
|
structured_inherits: TensorIteratorBase
|
5317
5346
|
dispatch:
|
5318
|
-
CPU, CUDA: sigmoid_out
|
5319
|
-
MPS: sigmoid_out_mps
|
5347
|
+
CPU, CUDA, MPS: sigmoid_out
|
5320
5348
|
tags: pointwise
|
5321
5349
|
|
5322
5350
|
- func: logit(Tensor self, float? eps=None) -> Tensor
|
5323
5351
|
variants: function, method
|
5324
5352
|
dispatch:
|
5325
|
-
CPU, CUDA: logit
|
5353
|
+
CPU, CUDA, MTIA: logit
|
5326
5354
|
MPS: logit_mps
|
5327
5355
|
tags: pointwise
|
5328
5356
|
|
@@ -5345,7 +5373,7 @@
|
|
5345
5373
|
dispatch:
|
5346
5374
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr
|
5347
5375
|
SparseCPU, SparseCUDA: sin_sparse
|
5348
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sin
|
5376
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sin
|
5349
5377
|
tags: [core, pointwise]
|
5350
5378
|
|
5351
5379
|
- func: sin_(Tensor(a!) self) -> Tensor(a!)
|
@@ -5362,8 +5390,7 @@
|
|
5362
5390
|
structured: True
|
5363
5391
|
structured_inherits: TensorIteratorBase
|
5364
5392
|
dispatch:
|
5365
|
-
CPU, CUDA: sin_out
|
5366
|
-
MPS: sin_out_mps
|
5393
|
+
CPU, CUDA, MPS, MTIA: sin_out
|
5367
5394
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_out
|
5368
5395
|
SparseCPU, SparseCUDA: sin_sparse_out
|
5369
5396
|
tags: pointwise
|
@@ -5408,8 +5435,7 @@
|
|
5408
5435
|
structured: True
|
5409
5436
|
structured_inherits: TensorIteratorBase
|
5410
5437
|
dispatch:
|
5411
|
-
CPU, CUDA: sinh_out
|
5412
|
-
MPS: sinh_out_mps
|
5438
|
+
CPU, CUDA, MPS: sinh_out
|
5413
5439
|
SparseCPU, SparseCUDA: sinh_sparse_out
|
5414
5440
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_out
|
5415
5441
|
|
@@ -5429,7 +5455,7 @@
|
|
5429
5455
|
variants: function, method
|
5430
5456
|
dispatch:
|
5431
5457
|
CompositeExplicitAutograd: detach
|
5432
|
-
NestedTensorCPU, NestedTensorCUDA: detach
|
5458
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: detach
|
5433
5459
|
|
5434
5460
|
# Like `detach()`, but modifies this `Variable` in-place. This method may
|
5435
5461
|
# only be called on non-view `Variable`s. You can use `is_view()` to check
|
@@ -5559,7 +5585,7 @@
|
|
5559
5585
|
structured_delegate: _softmax.out
|
5560
5586
|
dispatch:
|
5561
5587
|
MkldnnCPU: mkldnn_softmax
|
5562
|
-
NestedTensorCPU, NestedTensorCUDA: softmax_nested
|
5588
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: softmax_nested
|
5563
5589
|
tags: core
|
5564
5590
|
|
5565
5591
|
- func: _softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -5572,7 +5598,7 @@
|
|
5572
5598
|
- func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
|
5573
5599
|
structured_delegate: _softmax_backward_data.out
|
5574
5600
|
dispatch:
|
5575
|
-
NestedTensorCPU, NestedTensorCUDA: nested_softmax_backward
|
5601
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: nested_softmax_backward
|
5576
5602
|
|
5577
5603
|
- func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!)
|
5578
5604
|
structured: True
|
@@ -5616,7 +5642,7 @@
|
|
5616
5642
|
device_guard: False
|
5617
5643
|
dispatch:
|
5618
5644
|
CompositeExplicitAutograd: split_with_sizes
|
5619
|
-
NestedTensorCPU, NestedTensorCUDA: split_with_sizes_nested
|
5645
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: split_with_sizes_nested
|
5620
5646
|
tags: core
|
5621
5647
|
|
5622
5648
|
- func: hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
|
@@ -5644,7 +5670,7 @@
|
|
5644
5670
|
dispatch:
|
5645
5671
|
CompositeExplicitAutograd: squeeze
|
5646
5672
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
5647
|
-
NestedTensorCPU, NestedTensorCUDA: squeeze_nested
|
5673
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_nested
|
5648
5674
|
|
5649
5675
|
- func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
|
5650
5676
|
variants: function, method
|
@@ -5653,7 +5679,7 @@
|
|
5653
5679
|
dispatch:
|
5654
5680
|
CompositeExplicitAutograd: squeeze
|
5655
5681
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
5656
|
-
NestedTensorCPU, NestedTensorCUDA: squeeze_dim_nested
|
5682
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_dim_nested
|
5657
5683
|
tags: core
|
5658
5684
|
|
5659
5685
|
- func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
|
@@ -5669,7 +5695,7 @@
|
|
5669
5695
|
dispatch:
|
5670
5696
|
CompositeExplicitAutograd: squeeze
|
5671
5697
|
QuantizedCPU, QuantizedCUDA: squeeze_quantized
|
5672
|
-
NestedTensorCPU, NestedTensorCUDA: squeeze_dim_nested
|
5698
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: squeeze_dim_nested
|
5673
5699
|
tags: core
|
5674
5700
|
|
5675
5701
|
- func: squeeze_(Tensor(a!) self) -> Tensor(a!)
|
@@ -5843,7 +5869,7 @@
|
|
5843
5869
|
structured_delegate: sqrt.out
|
5844
5870
|
variants: function, method
|
5845
5871
|
dispatch:
|
5846
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sqrt
|
5872
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sqrt
|
5847
5873
|
SparseCPU, SparseCUDA: sqrt_sparse
|
5848
5874
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr
|
5849
5875
|
tags: [core, pointwise]
|
@@ -5862,7 +5888,7 @@
|
|
5862
5888
|
structured: True
|
5863
5889
|
structured_inherits: TensorIteratorBase
|
5864
5890
|
dispatch:
|
5865
|
-
CPU, CUDA, MPS: sqrt_out
|
5891
|
+
CPU, CUDA, MPS, MTIA: sqrt_out
|
5866
5892
|
SparseCPU, SparseCUDA: sqrt_sparse_out
|
5867
5893
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
|
5868
5894
|
tags: pointwise
|
@@ -6019,8 +6045,7 @@
|
|
6019
6045
|
structured: True
|
6020
6046
|
structured_inherits: TensorIteratorBase
|
6021
6047
|
dispatch:
|
6022
|
-
CPU, CUDA: tan_out
|
6023
|
-
MPS: tan_out_mps
|
6048
|
+
CPU, CUDA, MPS: tan_out
|
6024
6049
|
SparseCPU, SparseCUDA: tan_sparse_out
|
6025
6050
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_out
|
6026
6051
|
tags: pointwise
|
@@ -6034,7 +6059,7 @@
|
|
6034
6059
|
MkldnnCPU: mkldnn_tanh
|
6035
6060
|
SparseCPU, SparseCUDA: tanh_sparse
|
6036
6061
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr
|
6037
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh
|
6062
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_tanh
|
6038
6063
|
tags: [core, pointwise]
|
6039
6064
|
|
6040
6065
|
- func: tanh_(Tensor(a!) self) -> Tensor(a!)
|
@@ -6045,7 +6070,7 @@
|
|
6045
6070
|
MkldnnCPU: mkldnn_tanh_
|
6046
6071
|
SparseCPU, SparseCUDA: tanh_sparse_
|
6047
6072
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_
|
6048
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh_
|
6073
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_tanh_
|
6049
6074
|
tags: pointwise
|
6050
6075
|
|
6051
6076
|
- func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -6053,7 +6078,7 @@
|
|
6053
6078
|
structured: True
|
6054
6079
|
structured_inherits: TensorIteratorBase
|
6055
6080
|
dispatch:
|
6056
|
-
CPU, CUDA, MPS: tanh_out
|
6081
|
+
CPU, CUDA, MPS, MTIA: tanh_out
|
6057
6082
|
SparseCPU, SparseCUDA: tanh_sparse_out
|
6058
6083
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
|
6059
6084
|
tags: pointwise
|
@@ -6102,7 +6127,7 @@
|
|
6102
6127
|
MkldnnCPU: mkldnn_relu_backward
|
6103
6128
|
SparseCPU, SparseCUDA: threshold_backward_sparse
|
6104
6129
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed
|
6105
|
-
NestedTensorCPU, NestedTensorCUDA: threshold_backwards_nested
|
6130
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: threshold_backwards_nested
|
6106
6131
|
tags: pointwise
|
6107
6132
|
|
6108
6133
|
- func: tile(Tensor self, SymInt[] dims) -> Tensor
|
@@ -6116,7 +6141,7 @@
|
|
6116
6141
|
device_guard: False
|
6117
6142
|
dispatch:
|
6118
6143
|
CompositeExplicitAutograd: transpose
|
6119
|
-
NestedTensorCPU, NestedTensorCUDA: transpose_nested
|
6144
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: transpose_nested
|
6120
6145
|
|
6121
6146
|
- func: transpose.Dimname(Tensor(a) self, Dimname dim0, Dimname dim1) -> Tensor(a)
|
6122
6147
|
variants: function, method
|
@@ -6213,13 +6238,13 @@
|
|
6213
6238
|
- func: _nested_tensor_size(Tensor self) -> Tensor
|
6214
6239
|
variants: method
|
6215
6240
|
dispatch:
|
6216
|
-
NestedTensorCPU, NestedTensorCUDA: _nested_tensor_size
|
6241
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_tensor_size
|
6217
6242
|
autogen: _nested_tensor_size.out
|
6218
6243
|
|
6219
6244
|
- func: _nested_tensor_strides(Tensor self) -> Tensor
|
6220
6245
|
variants: method
|
6221
6246
|
dispatch:
|
6222
|
-
NestedTensorCPU, NestedTensorCUDA: _nested_tensor_strides
|
6247
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _nested_tensor_strides
|
6223
6248
|
autogen: _nested_tensor_strides.out
|
6224
6249
|
|
6225
6250
|
- func: _nested_tensor_storage_offsets(Tensor self) -> Tensor
|
@@ -6232,7 +6257,7 @@
|
|
6232
6257
|
# _nested_from_padded_and_nested_example is available for testing.
|
6233
6258
|
- func: _nested_from_padded_and_nested_example(Tensor padded, Tensor nt_example) -> Tensor
|
6234
6259
|
dispatch:
|
6235
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_from_padded_and_nested_example
|
6260
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_from_padded_and_nested_example
|
6236
6261
|
autogen: _nested_from_padded_and_nested_example.out
|
6237
6262
|
|
6238
6263
|
# The input arguments' types to this functions are temporary. When nested tensors switch to using SymInts for their metadata representation
|
@@ -6423,7 +6448,7 @@
|
|
6423
6448
|
CompositeExplicitAutograd: unsqueeze
|
6424
6449
|
SparseCPU, SparseCUDA: unsqueeze_sparse
|
6425
6450
|
QuantizedCPU, QuantizedCUDA: unsqueeze_quantized
|
6426
|
-
NestedTensorCPU, NestedTensorCUDA: unsqueeze_nested
|
6451
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: unsqueeze_nested
|
6427
6452
|
tags: core
|
6428
6453
|
|
6429
6454
|
- func: unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)
|
@@ -6517,15 +6542,15 @@
|
|
6517
6542
|
device_check: NoCheck # TensorIterator
|
6518
6543
|
variants: function, method
|
6519
6544
|
dispatch:
|
6520
|
-
CPU, CUDA, MPS: where
|
6521
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_where
|
6545
|
+
CPU, CUDA, MPS, MTIA: where
|
6546
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_where
|
6522
6547
|
tags: [core, pointwise]
|
6523
6548
|
|
6524
6549
|
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
6525
6550
|
device_check: NoCheck # TensorIterator
|
6526
6551
|
dispatch:
|
6527
|
-
CPU, CUDA, MPS: where_self_out
|
6528
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_where_out
|
6552
|
+
CPU, CUDA, MPS, MTIA: where_self_out
|
6553
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_where_out
|
6529
6554
|
|
6530
6555
|
- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
|
6531
6556
|
variants: function
|
@@ -6860,7 +6885,7 @@
|
|
6860
6885
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: clone_sparse_compressed
|
6861
6886
|
MkldnnCPU: mkldnn_clone
|
6862
6887
|
QuantizedCPU, QuantizedCUDA: quantized_clone
|
6863
|
-
NestedTensorCPU, NestedTensorCUDA: clone_nested
|
6888
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: clone_nested
|
6864
6889
|
autogen: clone.out
|
6865
6890
|
tags: [core, pointwise]
|
6866
6891
|
|
@@ -6894,7 +6919,7 @@
|
|
6894
6919
|
SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
|
6895
6920
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
|
6896
6921
|
MkldnnCPU: mkldnn_zero_
|
6897
|
-
NestedTensorCPU, NestedTensorCUDA: zero_nested_
|
6922
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: zero_nested_
|
6898
6923
|
autogen: zero, zero.out
|
6899
6924
|
|
6900
6925
|
- func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
@@ -6914,7 +6939,7 @@
|
|
6914
6939
|
dispatch:
|
6915
6940
|
SparseCPU, SparseCUDA: sub_sparse
|
6916
6941
|
ZeroTensor: sub_zerotensor
|
6917
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sub_Tensor
|
6942
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sub_Tensor
|
6918
6943
|
tags: [core, pointwise]
|
6919
6944
|
|
6920
6945
|
- func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
@@ -6961,7 +6986,7 @@
|
|
6961
6986
|
device_check: NoCheck # TensorIterator
|
6962
6987
|
variants: function
|
6963
6988
|
dispatch:
|
6964
|
-
CPU, CUDA: rsub
|
6989
|
+
CPU, CUDA, MPS: rsub
|
6965
6990
|
autogen: rsub.Tensor_out
|
6966
6991
|
|
6967
6992
|
- func: heaviside.out(Tensor self, Tensor values, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -7043,6 +7068,14 @@
|
|
7043
7068
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: addmm_sparse_compressed_dense
|
7044
7069
|
tags: core
|
7045
7070
|
|
7071
|
+
- func: addmm.dtype(Tensor self, Tensor mat1, Tensor mat2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor
|
7072
|
+
dispatch:
|
7073
|
+
CUDA: _addmm_dtype_cuda
|
7074
|
+
|
7075
|
+
- func: addmm.dtype_out(Tensor self, Tensor mat1, Tensor mat2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
7076
|
+
dispatch:
|
7077
|
+
CUDA: _addmm_dtype_out_cuda
|
7078
|
+
|
7046
7079
|
- func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
|
7047
7080
|
structured_delegate: addmm.out
|
7048
7081
|
variants: method
|
@@ -7066,11 +7099,13 @@
|
|
7066
7099
|
- func: _scaled_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
|
7067
7100
|
variants: function
|
7068
7101
|
dispatch:
|
7102
|
+
CPU: _scaled_mm_cpu
|
7069
7103
|
CUDA: _scaled_mm_cuda
|
7070
7104
|
|
7071
7105
|
- func: _scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
|
7072
7106
|
variants: function
|
7073
7107
|
dispatch:
|
7108
|
+
CPU: _scaled_mm_out_cpu
|
7074
7109
|
CUDA: _scaled_mm_out_cuda
|
7075
7110
|
|
7076
7111
|
|
@@ -7079,6 +7114,11 @@
|
|
7079
7114
|
dispatch:
|
7080
7115
|
CUDA: _scaled_grouped_mm_cuda
|
7081
7116
|
|
7117
|
+
- func: _grouped_mm(Tensor self, Tensor mat2, Tensor? offs=None, Tensor? bias=None, ScalarType? out_dtype=None) -> Tensor
|
7118
|
+
variants: function
|
7119
|
+
dispatch:
|
7120
|
+
CUDA: _grouped_mm_cuda
|
7121
|
+
|
7082
7122
|
# NOTE [ Sparse: autograd and API ]
|
7083
7123
|
#
|
7084
7124
|
#
|
@@ -7233,13 +7273,13 @@
|
|
7233
7273
|
dispatch:
|
7234
7274
|
CompositeImplicitAutograd: _sparse_coo_tensor_unsafe_symint
|
7235
7275
|
|
7236
|
-
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None) -> ()
|
7276
|
+
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None, bool? check_pinning=None) -> ()
|
7237
7277
|
|
7238
|
-
- func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout) -> ()
|
7239
|
-
- func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
7240
|
-
- func: _validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
|
7241
|
-
- func: _validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
7242
|
-
- func: _validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size) -> ()
|
7278
|
+
- func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout, bool? check_pinning=None) -> ()
|
7279
|
+
- func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7280
|
+
- func: _validate_sparse_csc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7281
|
+
- func: _validate_sparse_bsr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7282
|
+
- func: _validate_sparse_bsc_tensor_args(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, bool? check_pinning=None) -> ()
|
7243
7283
|
|
7244
7284
|
- func: _sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
7245
7285
|
dispatch:
|
@@ -7397,7 +7437,7 @@
|
|
7397
7437
|
dispatch:
|
7398
7438
|
SparseCPU, SparseCUDA, SparseMeta: values_sparse
|
7399
7439
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
|
7400
|
-
NestedTensorCPU, NestedTensorCUDA: values_nested
|
7440
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: values_nested
|
7401
7441
|
CompositeExplicitAutograd: values_default
|
7402
7442
|
device_check: NoCheck
|
7403
7443
|
device_guard: False
|
@@ -7456,7 +7496,7 @@
|
|
7456
7496
|
variants: function, method
|
7457
7497
|
dispatch:
|
7458
7498
|
CompositeExplicitAutograd: unbind
|
7459
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
|
7499
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_unbind
|
7460
7500
|
|
7461
7501
|
- func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
|
7462
7502
|
variants: function, method
|
@@ -7744,7 +7784,7 @@
|
|
7744
7784
|
device_guard: False
|
7745
7785
|
dispatch:
|
7746
7786
|
CompositeExplicitAutograd: _to_copy
|
7747
|
-
NestedTensorCPU, NestedTensorCUDA: _to_copy_nested
|
7787
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _to_copy_nested
|
7748
7788
|
autogen: _to_copy.out
|
7749
7789
|
tags: core
|
7750
7790
|
|
@@ -8030,7 +8070,7 @@
|
|
8030
8070
|
variants: function, method
|
8031
8071
|
dispatch:
|
8032
8072
|
CompositeExplicitAutograd: masked_fill
|
8033
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_masked_fill
|
8073
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_masked_fill
|
8034
8074
|
tags: pointwise
|
8035
8075
|
|
8036
8076
|
- func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
|
@@ -8085,9 +8125,9 @@
|
|
8085
8125
|
device_check: NoCheck
|
8086
8126
|
device_guard: False
|
8087
8127
|
dispatch:
|
8088
|
-
ZeroTensor, Meta, CPU, CUDA, QuantizedCPU, QuantizedCUDA, MPS: view
|
8128
|
+
ZeroTensor, Meta, CPU, CUDA, QuantizedCPU, QuantizedCUDA, MPS, MTIA: view
|
8089
8129
|
MkldnnCPU: mkldnn_view
|
8090
|
-
NestedTensorCPU, NestedTensorCUDA: view_nested
|
8130
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: view_nested
|
8091
8131
|
tags: core
|
8092
8132
|
|
8093
8133
|
# Warning: If you want to change the name or overload name of this
|
@@ -8315,7 +8355,7 @@
|
|
8315
8355
|
structured_inherits: TensorIteratorBase
|
8316
8356
|
variants: function
|
8317
8357
|
dispatch:
|
8318
|
-
CPU, CUDA: bitwise_and_out
|
8358
|
+
CPU, CUDA, MTIA: bitwise_and_out
|
8319
8359
|
MPS: bitwise_and_out_mps
|
8320
8360
|
tags: pointwise
|
8321
8361
|
|
@@ -8382,7 +8422,7 @@
|
|
8382
8422
|
structured_inherits: TensorIteratorBase
|
8383
8423
|
variants: function
|
8384
8424
|
dispatch:
|
8385
|
-
CPU, CUDA: bitwise_or_out
|
8425
|
+
CPU, CUDA, MTIA: bitwise_or_out
|
8386
8426
|
MPS: bitwise_or_out_mps
|
8387
8427
|
tags: pointwise
|
8388
8428
|
|
@@ -8928,7 +8968,7 @@
|
|
8928
8968
|
variants: method, function
|
8929
8969
|
dispatch:
|
8930
8970
|
QuantizedCPU: eq_quantized_cpu
|
8931
|
-
NestedTensorCPU, NestedTensorCUDA: eq_scalar_nested
|
8971
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: eq_scalar_nested
|
8932
8972
|
tags: [core, pointwise]
|
8933
8973
|
|
8934
8974
|
- func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8947,7 +8987,7 @@
|
|
8947
8987
|
variants: method, function
|
8948
8988
|
dispatch:
|
8949
8989
|
QuantizedCPU: eq_quantized_cpu
|
8950
|
-
NestedTensorCPU, NestedTensorCUDA: eq_tensor_nested
|
8990
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: eq_tensor_nested
|
8951
8991
|
tags: [core, pointwise]
|
8952
8992
|
|
8953
8993
|
- func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -8966,7 +9006,7 @@
|
|
8966
9006
|
variants: method, function
|
8967
9007
|
dispatch:
|
8968
9008
|
QuantizedCPU: ge_quantized_cpu
|
8969
|
-
NestedTensorCPU, NestedTensorCUDA: ge_scalar_nested
|
9009
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: ge_scalar_nested
|
8970
9010
|
tags: [core, pointwise]
|
8971
9011
|
|
8972
9012
|
- func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9093,7 +9133,7 @@
|
|
9093
9133
|
variants: method, function
|
9094
9134
|
dispatch:
|
9095
9135
|
QuantizedCPU: gt_quantized_cpu
|
9096
|
-
NestedTensorCPU, NestedTensorCUDA: gt_scalar_nested
|
9136
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: gt_scalar_nested
|
9097
9137
|
tags: [core, pointwise]
|
9098
9138
|
|
9099
9139
|
- func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9146,7 +9186,7 @@
|
|
9146
9186
|
structured_inherits: TensorIteratorBase
|
9147
9187
|
device_check: NoCheck # TensorIterator
|
9148
9188
|
dispatch:
|
9149
|
-
CPU, CUDA: lt_Scalar_out
|
9189
|
+
CPU, CUDA, MTIA: lt_Scalar_out
|
9150
9190
|
MPS: lt_scalar_out_mps
|
9151
9191
|
QuantizedCPU: lt_out_quantized_cpu
|
9152
9192
|
tags: pointwise
|
@@ -9164,7 +9204,7 @@
|
|
9164
9204
|
structured_inherits: TensorIteratorBase
|
9165
9205
|
device_check: NoCheck # TensorIterator
|
9166
9206
|
dispatch:
|
9167
|
-
CPU, CUDA: lt_Tensor_out
|
9207
|
+
CPU, CUDA, MTIA: lt_Tensor_out
|
9168
9208
|
MPS: lt_tensor_out_mps
|
9169
9209
|
QuantizedCPU: lt_out_quantized_cpu
|
9170
9210
|
tags: pointwise
|
@@ -9436,14 +9476,12 @@
|
|
9436
9476
|
|
9437
9477
|
- func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
|
9438
9478
|
dispatch:
|
9439
|
-
CPU, CUDA: cholesky_out
|
9440
|
-
MPS: cholesky_mps_out
|
9479
|
+
CPU, CUDA, MPS: cholesky_out
|
9441
9480
|
|
9442
9481
|
- func: cholesky(Tensor self, bool upper=False) -> Tensor
|
9443
9482
|
variants: method, function
|
9444
9483
|
dispatch:
|
9445
|
-
CPU, CUDA: cholesky
|
9446
|
-
MPS: cholesky_mps
|
9484
|
+
CPU, CUDA, MPS: cholesky
|
9447
9485
|
|
9448
9486
|
- func: cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
|
9449
9487
|
dispatch:
|
@@ -9520,13 +9558,13 @@
|
|
9520
9558
|
MPS: lu_unpack_out_mps
|
9521
9559
|
|
9522
9560
|
# TODO: remove dispatch section when porting TH CUDA to ATen
|
9523
|
-
- func: multinomial.out(Tensor self,
|
9561
|
+
- func: multinomial.out(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
|
9524
9562
|
tags: nondeterministic_seeded
|
9525
9563
|
dispatch:
|
9526
9564
|
CPU, CUDA: multinomial_out
|
9527
9565
|
MPS: multinomial_out_mps
|
9528
9566
|
|
9529
|
-
- func: multinomial(Tensor self,
|
9567
|
+
- func: multinomial(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor
|
9530
9568
|
variants: method, function
|
9531
9569
|
dispatch:
|
9532
9570
|
CPU, CUDA: multinomial
|
@@ -9727,8 +9765,7 @@
|
|
9727
9765
|
structured: True
|
9728
9766
|
structured_inherits: TensorIteratorBase
|
9729
9767
|
dispatch:
|
9730
|
-
CPU, CUDA: lerp_Scalar
|
9731
|
-
MPS: lerp_Scalar_mps
|
9768
|
+
CPU, CUDA, MPS: lerp_Scalar
|
9732
9769
|
tags: pointwise
|
9733
9770
|
|
9734
9771
|
- func: lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9827,8 +9864,7 @@
|
|
9827
9864
|
structured: True
|
9828
9865
|
structured_inherits: TensorIteratorBase
|
9829
9866
|
dispatch:
|
9830
|
-
CPU, CUDA: fmod_out
|
9831
|
-
MPS: fmod_mps_out
|
9867
|
+
CPU, CUDA, MPS: fmod_out
|
9832
9868
|
tags: pointwise
|
9833
9869
|
|
9834
9870
|
- func: fmod.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -9934,8 +9970,7 @@
|
|
9934
9970
|
structured: True
|
9935
9971
|
structured_inherits: TensorIteratorBase
|
9936
9972
|
dispatch:
|
9937
|
-
CPU, CUDA: remainder_out
|
9938
|
-
MPS: remainder_out_mps
|
9973
|
+
CPU, CUDA, MPS, MTIA: remainder_out
|
9939
9974
|
tags: pointwise
|
9940
9975
|
|
9941
9976
|
- func: remainder.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -10019,7 +10054,7 @@
|
|
10019
10054
|
structured_inherits: TensorIteratorBase
|
10020
10055
|
device_check: NoCheck # TensorIterator
|
10021
10056
|
dispatch:
|
10022
|
-
CPU, CUDA: maximum_out
|
10057
|
+
CPU, CUDA, MTIA: maximum_out
|
10023
10058
|
MPS: maximum_out_mps
|
10024
10059
|
tags: pointwise
|
10025
10060
|
|
@@ -10051,7 +10086,7 @@
|
|
10051
10086
|
structured_inherits: TensorIteratorBase
|
10052
10087
|
device_check: NoCheck # TensorIterator
|
10053
10088
|
dispatch:
|
10054
|
-
CPU, CUDA: minimum_out
|
10089
|
+
CPU, CUDA, MTIA: minimum_out
|
10055
10090
|
MPS: minimum_out_mps
|
10056
10091
|
tags: pointwise
|
10057
10092
|
|
@@ -10203,7 +10238,7 @@
|
|
10203
10238
|
device_check: NoCheck
|
10204
10239
|
device_guard: False
|
10205
10240
|
dispatch:
|
10206
|
-
CPU, CUDA, Meta, MPS: unfold
|
10241
|
+
CPU, CUDA, Meta, MPS, MTIA: unfold
|
10207
10242
|
QuantizedCPU, QuantizedCUDA: unfold
|
10208
10243
|
|
10209
10244
|
- func: unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor
|
@@ -10316,7 +10351,7 @@
|
|
10316
10351
|
MPS: normal_mps_
|
10317
10352
|
Meta: normal_meta_
|
10318
10353
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: normal_sparse_csr_
|
10319
|
-
NestedTensorCPU, NestedTensorCUDA: normal_nested_
|
10354
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: normal_nested_
|
10320
10355
|
autogen: normal.out
|
10321
10356
|
|
10322
10357
|
# Only used by the functionalization pass.
|
@@ -10384,7 +10419,7 @@
|
|
10384
10419
|
variants: method, function
|
10385
10420
|
dispatch:
|
10386
10421
|
CompositeExplicitAutograd: alias
|
10387
|
-
NestedTensorCPU, NestedTensorCUDA: alias_nested
|
10422
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: alias_nested
|
10388
10423
|
tags: core
|
10389
10424
|
|
10390
10425
|
- func: _amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
|
@@ -10392,6 +10427,7 @@
|
|
10392
10427
|
dispatch:
|
10393
10428
|
CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
|
10394
10429
|
CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
|
10430
|
+
MPS: _amp_foreach_non_finite_check_and_unscale_mps_
|
10395
10431
|
autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
|
10396
10432
|
|
10397
10433
|
- func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
|
@@ -10399,6 +10435,7 @@
|
|
10399
10435
|
dispatch:
|
10400
10436
|
CUDA: _amp_update_scale_cuda_
|
10401
10437
|
CPU: _amp_update_scale_cpu_
|
10438
|
+
MPS: _amp_update_scale_mps_
|
10402
10439
|
autogen: _amp_update_scale, _amp_update_scale.out
|
10403
10440
|
|
10404
10441
|
#- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
|
@@ -11801,7 +11838,7 @@
|
|
11801
11838
|
structured_delegate: elu.out
|
11802
11839
|
device_check: NoCheck # TensorIterator
|
11803
11840
|
python_module: nn
|
11804
|
-
tags: pointwise
|
11841
|
+
tags: [core, pointwise]
|
11805
11842
|
|
11806
11843
|
- func: elu_backward.grad_input(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11807
11844
|
structured: True
|
@@ -11865,8 +11902,7 @@
|
|
11865
11902
|
device_check: NoCheck # TensorIterator
|
11866
11903
|
python_module: nn
|
11867
11904
|
dispatch:
|
11868
|
-
CPU, CUDA: hardsigmoid_out
|
11869
|
-
MPS: hardsigmoid_out_mps
|
11905
|
+
CPU, CUDA, MPS: hardsigmoid_out
|
11870
11906
|
QuantizedCPU: hardsigmoid_out_quantized_cpu
|
11871
11907
|
|
11872
11908
|
- func: hardsigmoid(Tensor self) -> Tensor
|
@@ -11887,8 +11923,7 @@
|
|
11887
11923
|
structured_inherits: TensorIteratorBase
|
11888
11924
|
python_module: nn
|
11889
11925
|
dispatch:
|
11890
|
-
CPU, CUDA: hardsigmoid_backward_out
|
11891
|
-
MPS: hardsigmoid_backward_out_mps
|
11926
|
+
CPU, CUDA, MPS: hardsigmoid_backward_out
|
11892
11927
|
|
11893
11928
|
- func: hardsigmoid_backward(Tensor grad_output, Tensor self) -> Tensor
|
11894
11929
|
structured_delegate: hardsigmoid_backward.grad_input
|
@@ -11932,28 +11967,24 @@
|
|
11932
11967
|
device_check: NoCheck # TensorIterator
|
11933
11968
|
python_module: nn
|
11934
11969
|
dispatch:
|
11935
|
-
CPU, CUDA: hardswish_out
|
11936
|
-
MPS: hardswish_out_mps
|
11970
|
+
CPU, CUDA, MPS: hardswish_out
|
11937
11971
|
|
11938
11972
|
- func: hardswish(Tensor self) -> Tensor
|
11939
11973
|
device_check: NoCheck # TensorIterator
|
11940
11974
|
python_module: nn
|
11941
11975
|
dispatch:
|
11942
|
-
CPU, CUDA: hardswish
|
11943
|
-
MPS: hardswish_mps
|
11976
|
+
CPU, CUDA, MPS: hardswish
|
11944
11977
|
|
11945
11978
|
- func: hardswish_(Tensor(a!) self) -> Tensor(a!)
|
11946
11979
|
device_check: NoCheck # TensorIterator
|
11947
11980
|
python_module: nn
|
11948
11981
|
dispatch:
|
11949
|
-
CPU, CUDA: hardswish_
|
11950
|
-
MPS: hardswish_mps_
|
11982
|
+
CPU, CUDA, MPS: hardswish_
|
11951
11983
|
|
11952
11984
|
- func: hardswish_backward(Tensor grad_output, Tensor self) -> Tensor
|
11953
11985
|
python_module: nn
|
11954
11986
|
dispatch:
|
11955
|
-
CPU, CUDA: hardswish_backward
|
11956
|
-
MPS: hardswish_backward_mps
|
11987
|
+
CPU, CUDA, MPS: hardswish_backward
|
11957
11988
|
autogen: hardswish_backward.out
|
11958
11989
|
|
11959
11990
|
- func: leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -11962,8 +11993,7 @@
|
|
11962
11993
|
device_check: NoCheck # TensorIterator
|
11963
11994
|
python_module: nn
|
11964
11995
|
dispatch:
|
11965
|
-
CPU, CUDA: leaky_relu_out
|
11966
|
-
MPS: leaky_relu_out_mps
|
11996
|
+
CPU, CUDA, MPS: leaky_relu_out
|
11967
11997
|
QuantizedCPU: leaky_relu_out_quantized_cpu
|
11968
11998
|
|
11969
11999
|
- func: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor
|
@@ -11979,8 +12009,7 @@
|
|
11979
12009
|
structured_inherits: TensorIteratorBase
|
11980
12010
|
python_module: nn
|
11981
12011
|
dispatch:
|
11982
|
-
CPU, CUDA: leaky_relu_backward_out
|
11983
|
-
MPS: leaky_relu_backward_out_mps
|
12012
|
+
CPU, CUDA, MPS: leaky_relu_backward_out
|
11984
12013
|
|
11985
12014
|
- func: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor
|
11986
12015
|
structured_delegate: leaky_relu_backward.grad_input
|
@@ -12092,8 +12121,7 @@
|
|
12092
12121
|
device_check: NoCheck # TensorIterator
|
12093
12122
|
python_module: nn
|
12094
12123
|
dispatch:
|
12095
|
-
CPU, CUDA: softshrink_out
|
12096
|
-
MPS: softshrink_out_mps
|
12124
|
+
CPU, CUDA, MPS: softshrink_out
|
12097
12125
|
|
12098
12126
|
- func: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
|
12099
12127
|
structured_delegate: softshrink.out
|
@@ -12106,8 +12134,7 @@
|
|
12106
12134
|
structured_inherits: TensorIteratorBase
|
12107
12135
|
python_module: nn
|
12108
12136
|
dispatch:
|
12109
|
-
CPU, CUDA: softshrink_backward_out
|
12110
|
-
MPS: softshrink_backward_out_mps
|
12137
|
+
CPU, CUDA, MPS: softshrink_backward_out
|
12111
12138
|
|
12112
12139
|
- func: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
|
12113
12140
|
structured_delegate: softshrink_backward.grad_input
|
@@ -12769,6 +12796,7 @@
|
|
12769
12796
|
dispatch:
|
12770
12797
|
CPU: _upsample_bicubic2d_aa_out_cpu
|
12771
12798
|
CUDA: _upsample_bicubic2d_aa_out_cuda
|
12799
|
+
MPS: _upsample_bicubic2d_aa_out_mps
|
12772
12800
|
|
12773
12801
|
- func: _upsample_bicubic2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
|
12774
12802
|
python_module: nn
|
@@ -12791,6 +12819,7 @@
|
|
12791
12819
|
dispatch:
|
12792
12820
|
CPU: upsample_trilinear3d_out_cpu
|
12793
12821
|
CUDA: upsample_trilinear3d_out_cuda
|
12822
|
+
MPS: upsample_trilinear3d_out_mps
|
12794
12823
|
|
12795
12824
|
- func: upsample_trilinear3d(Tensor self, SymInt[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12796
12825
|
python_module: nn
|
@@ -12802,6 +12831,7 @@
|
|
12802
12831
|
dispatch:
|
12803
12832
|
CPU: upsample_trilinear3d_backward_out_cpu
|
12804
12833
|
CUDA: upsample_trilinear3d_backward_out_cuda
|
12834
|
+
MPS: upsample_trilinear3d_backward_out_mps
|
12805
12835
|
|
12806
12836
|
- func: upsample_trilinear3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12807
12837
|
python_module: nn
|
@@ -12913,6 +12943,7 @@
|
|
12913
12943
|
dispatch:
|
12914
12944
|
CPU: upsample_nearest3d_out_cpu
|
12915
12945
|
CUDA: upsample_nearest3d_out_cuda
|
12946
|
+
MPS: upsample_nearest3d_out_mps
|
12916
12947
|
|
12917
12948
|
- func: _upsample_nearest_exact3d.out(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
|
12918
12949
|
python_module: nn
|
@@ -12920,6 +12951,7 @@
|
|
12920
12951
|
dispatch:
|
12921
12952
|
CPU: _upsample_nearest_exact3d_out_cpu
|
12922
12953
|
CUDA: _upsample_nearest_exact3d_out_cuda
|
12954
|
+
MPS: _upsample_nearest_exact3d_out_mps
|
12923
12955
|
|
12924
12956
|
- func: upsample_nearest3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12925
12957
|
python_module: nn
|
@@ -12939,6 +12971,7 @@
|
|
12939
12971
|
dispatch:
|
12940
12972
|
CPU: upsample_nearest3d_backward_out_cpu
|
12941
12973
|
CUDA: upsample_nearest3d_backward_out_cuda
|
12974
|
+
MPS: upsample_nearest3d_backward_out_mps
|
12942
12975
|
|
12943
12976
|
- func: _upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
|
12944
12977
|
python_module: nn
|
@@ -12946,6 +12979,7 @@
|
|
12946
12979
|
dispatch:
|
12947
12980
|
CPU: _upsample_nearest_exact3d_backward_out_cpu
|
12948
12981
|
CUDA: _upsample_nearest_exact3d_backward_out_cuda
|
12982
|
+
MPS: _upsample_nearest_exact3d_backward_out_mps
|
12949
12983
|
|
12950
12984
|
- func: upsample_nearest3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
|
12951
12985
|
python_module: nn
|
@@ -12988,7 +13022,7 @@
|
|
12988
13022
|
structured: True
|
12989
13023
|
structured_inherits: TensorIteratorBase
|
12990
13024
|
dispatch:
|
12991
|
-
CPU, CUDA: tanh_backward_out
|
13025
|
+
CPU, CUDA, MTIA: tanh_backward_out
|
12992
13026
|
MPS: tanh_backward_out_mps
|
12993
13027
|
tags: pointwise
|
12994
13028
|
|
@@ -13120,12 +13154,14 @@
|
|
13120
13154
|
dispatch:
|
13121
13155
|
CPU: col2im_out_cpu
|
13122
13156
|
CUDA: col2im_out_cuda
|
13157
|
+
MPS: col2im_out_mps
|
13123
13158
|
|
13124
13159
|
- func: col2im(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
|
13125
13160
|
python_module: nn
|
13126
13161
|
dispatch:
|
13127
13162
|
CPU: col2im_cpu
|
13128
13163
|
CUDA: col2im_cuda
|
13164
|
+
MPS: col2im_mps
|
13129
13165
|
tags: core
|
13130
13166
|
|
13131
13167
|
- func: column_stack(Tensor[] tensors) -> Tensor
|
@@ -13158,7 +13194,7 @@
|
|
13158
13194
|
device_guard: False
|
13159
13195
|
dispatch:
|
13160
13196
|
CompositeExplicitAutograd: isinf
|
13161
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isinf
|
13197
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isinf
|
13162
13198
|
SparseCPU, SparseCUDA: isinf_sparse
|
13163
13199
|
SparseMeta: isinf_sparse_meta
|
13164
13200
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isinf_sparse_csr
|
@@ -13174,7 +13210,7 @@
|
|
13174
13210
|
variants: function, method
|
13175
13211
|
structured_delegate: isposinf.out
|
13176
13212
|
dispatch:
|
13177
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isposinf
|
13213
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isposinf
|
13178
13214
|
SparseCPU, SparseCUDA: isposinf_sparse
|
13179
13215
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr
|
13180
13216
|
tags: pointwise
|
@@ -13192,7 +13228,7 @@
|
|
13192
13228
|
variants: function, method
|
13193
13229
|
structured_delegate: isneginf.out
|
13194
13230
|
dispatch:
|
13195
|
-
NestedTensorCPU, NestedTensorCUDA: NestedTensor_isneginf
|
13231
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_isneginf
|
13196
13232
|
SparseCPU, SparseCUDA: isneginf_sparse
|
13197
13233
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr
|
13198
13234
|
tags: pointwise
|
@@ -13500,7 +13536,7 @@
|
|
13500
13536
|
structured: True
|
13501
13537
|
structured_inherits: TensorIteratorBase
|
13502
13538
|
dispatch:
|
13503
|
-
CPU, CUDA: special_i0e_out
|
13539
|
+
CPU, CUDA, MPS: special_i0e_out
|
13504
13540
|
tags: pointwise
|
13505
13541
|
|
13506
13542
|
- func: special_i1(Tensor self) -> Tensor
|
@@ -13528,7 +13564,7 @@
|
|
13528
13564
|
structured: True
|
13529
13565
|
structured_inherits: TensorIteratorBase
|
13530
13566
|
dispatch:
|
13531
|
-
CPU, CUDA: special_i1e_out
|
13567
|
+
CPU, CUDA, MPS: special_i1e_out
|
13532
13568
|
tags: pointwise
|
13533
13569
|
|
13534
13570
|
- func: special_logit(Tensor self, float? eps=None) -> Tensor
|
@@ -13897,8 +13933,7 @@
|
|
13897
13933
|
python_module: linalg
|
13898
13934
|
structured: True
|
13899
13935
|
dispatch:
|
13900
|
-
CPU, CUDA: linalg_cholesky_ex_out
|
13901
|
-
MPS: linalg_cholesky_ex_out_mps
|
13936
|
+
CPU, CUDA, MPS: linalg_cholesky_ex_out
|
13902
13937
|
|
13903
13938
|
- func: linalg_cholesky(Tensor self, *, bool upper=False) -> Tensor
|
13904
13939
|
python_module: linalg
|
@@ -14468,13 +14503,13 @@
|
|
14468
14503
|
dispatch:
|
14469
14504
|
# the NestedTensor keys are necessary because NestedTensor has been removed
|
14470
14505
|
# from the CompositeExplicitAutograd keyset see Note [NestedTensor Not Included in Backend Keys]
|
14471
|
-
CompositeExplicitAutograd, NestedTensorCPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_fullcoverage
|
14506
|
+
CompositeExplicitAutograd, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_fullcoverage
|
14472
14507
|
autogen: _test_autograd_multiple_dispatch.fullcoverage_out
|
14473
14508
|
|
14474
14509
|
# Note: this function is only for testing.
|
14475
14510
|
- func: _test_autograd_multiple_dispatch.ntonly(Tensor self, bool b) -> Tensor
|
14476
14511
|
dispatch:
|
14477
|
-
CompositeImplicitAutograd, NestedTensorCPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_ntonly
|
14512
|
+
CompositeImplicitAutograd, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _test_autograd_multiple_dispatch_ntonly
|
14478
14513
|
|
14479
14514
|
# Note: this function is only for testing.
|
14480
14515
|
- func: _test_autograd_multiple_dispatch_view(Tensor(a) self) -> Tensor(a)
|
@@ -14819,13 +14854,13 @@
|
|
14819
14854
|
- func: _safe_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
|
14820
14855
|
dispatch:
|
14821
14856
|
CompositeExplicitAutograd: _safe_softmax
|
14822
|
-
NestedTensorCPU, NestedTensorCUDA: _safe_softmax
|
14857
|
+
NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: _safe_softmax
|
14823
14858
|
|
14824
14859
|
# Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
|
14825
14860
|
- func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
|
14826
14861
|
variants: function
|
14827
14862
|
dispatch:
|
14828
|
-
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: transformer_encoder_layer_forward
|
14863
|
+
CPU, CUDA, NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: transformer_encoder_layer_forward
|
14829
14864
|
autogen: _transformer_encoder_layer_fwd.out
|
14830
14865
|
|
14831
14866
|
- func: _native_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, bool need_weights=True, bool average_attn_weights=True, int? mask_type=None) -> (Tensor, Tensor)
|
@@ -14990,7 +15025,7 @@
|
|
14990
15025
|
|
14991
15026
|
- func: special_bessel_j0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
14992
15027
|
dispatch:
|
14993
|
-
CPU, CUDA: special_bessel_j0_out
|
15028
|
+
CPU, CUDA, MPS: special_bessel_j0_out
|
14994
15029
|
python_module: special
|
14995
15030
|
structured_inherits: TensorIteratorBase
|
14996
15031
|
structured: True
|
@@ -15005,7 +15040,7 @@
|
|
15005
15040
|
|
15006
15041
|
- func: special_bessel_j1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15007
15042
|
dispatch:
|
15008
|
-
CPU, CUDA: special_bessel_j1_out
|
15043
|
+
CPU, CUDA, MPS: special_bessel_j1_out
|
15009
15044
|
python_module: special
|
15010
15045
|
structured_inherits: TensorIteratorBase
|
15011
15046
|
structured: True
|
@@ -15020,7 +15055,7 @@
|
|
15020
15055
|
|
15021
15056
|
- func: special_bessel_y0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15022
15057
|
dispatch:
|
15023
|
-
CPU, CUDA: special_bessel_y0_out
|
15058
|
+
CPU, CUDA, MPS: special_bessel_y0_out
|
15024
15059
|
python_module: special
|
15025
15060
|
structured_inherits: TensorIteratorBase
|
15026
15061
|
structured: True
|
@@ -15035,7 +15070,7 @@
|
|
15035
15070
|
|
15036
15071
|
- func: special_bessel_y1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15037
15072
|
dispatch:
|
15038
|
-
CPU, CUDA: special_bessel_y1_out
|
15073
|
+
CPU, CUDA, MPS: special_bessel_y1_out
|
15039
15074
|
python_module: special
|
15040
15075
|
structured_inherits: TensorIteratorBase
|
15041
15076
|
structured: True
|
@@ -15068,7 +15103,7 @@
|
|
15068
15103
|
- func: special_chebyshev_polynomial_t.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15069
15104
|
device_check: NoCheck
|
15070
15105
|
dispatch:
|
15071
|
-
CPU, CUDA: special_chebyshev_polynomial_t_out
|
15106
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_t_out
|
15072
15107
|
python_module: special
|
15073
15108
|
structured_inherits: TensorIteratorBase
|
15074
15109
|
structured: True
|
@@ -15117,7 +15152,7 @@
|
|
15117
15152
|
- func: special_chebyshev_polynomial_u.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15118
15153
|
device_check: NoCheck
|
15119
15154
|
dispatch:
|
15120
|
-
CPU, CUDA: special_chebyshev_polynomial_u_out
|
15155
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_u_out
|
15121
15156
|
python_module: special
|
15122
15157
|
structured_inherits: TensorIteratorBase
|
15123
15158
|
structured: True
|
@@ -15166,7 +15201,7 @@
|
|
15166
15201
|
- func: special_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15167
15202
|
device_check: NoCheck
|
15168
15203
|
dispatch:
|
15169
|
-
CPU, CUDA: special_chebyshev_polynomial_v_out
|
15204
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_v_out
|
15170
15205
|
python_module: special
|
15171
15206
|
structured_inherits: TensorIteratorBase
|
15172
15207
|
structured: True
|
@@ -15215,7 +15250,7 @@
|
|
15215
15250
|
- func: special_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15216
15251
|
device_check: NoCheck
|
15217
15252
|
dispatch:
|
15218
|
-
CPU, CUDA: special_chebyshev_polynomial_w_out
|
15253
|
+
CPU, CUDA, MPS: special_chebyshev_polynomial_w_out
|
15219
15254
|
python_module: special
|
15220
15255
|
structured_inherits: TensorIteratorBase
|
15221
15256
|
structured: True
|
@@ -15264,7 +15299,7 @@
|
|
15264
15299
|
- func: special_hermite_polynomial_h.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15265
15300
|
device_check: NoCheck
|
15266
15301
|
dispatch:
|
15267
|
-
CPU, CUDA: special_hermite_polynomial_h_out
|
15302
|
+
CPU, CUDA, MPS: special_hermite_polynomial_h_out
|
15268
15303
|
python_module: special
|
15269
15304
|
structured_inherits: TensorIteratorBase
|
15270
15305
|
structured: True
|
@@ -15313,7 +15348,7 @@
|
|
15313
15348
|
- func: special_hermite_polynomial_he.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
|
15314
15349
|
device_check: NoCheck
|
15315
15350
|
dispatch:
|
15316
|
-
CPU, CUDA: special_hermite_polynomial_he_out
|
15351
|
+
CPU, CUDA, MPS: special_hermite_polynomial_he_out
|
15317
15352
|
python_module: special
|
15318
15353
|
structured_inherits: TensorIteratorBase
|
15319
15354
|
structured: True
|
@@ -15442,7 +15477,7 @@
|
|
15442
15477
|
|
15443
15478
|
- func: special_modified_bessel_i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15444
15479
|
dispatch:
|
15445
|
-
CPU, CUDA: special_modified_bessel_i0_out
|
15480
|
+
CPU, CUDA, MPS: special_modified_bessel_i0_out
|
15446
15481
|
python_module: special
|
15447
15482
|
structured_inherits: TensorIteratorBase
|
15448
15483
|
structured: True
|
@@ -15457,7 +15492,7 @@
|
|
15457
15492
|
|
15458
15493
|
- func: special_modified_bessel_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15459
15494
|
dispatch:
|
15460
|
-
CPU, CUDA: special_modified_bessel_i1_out
|
15495
|
+
CPU, CUDA, MPS: special_modified_bessel_i1_out
|
15461
15496
|
python_module: special
|
15462
15497
|
structured_inherits: TensorIteratorBase
|
15463
15498
|
structured: True
|
@@ -15472,7 +15507,7 @@
|
|
15472
15507
|
|
15473
15508
|
- func: special_modified_bessel_k0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15474
15509
|
dispatch:
|
15475
|
-
CPU, CUDA: special_modified_bessel_k0_out
|
15510
|
+
CPU, CUDA, MPS: special_modified_bessel_k0_out
|
15476
15511
|
python_module: special
|
15477
15512
|
structured_inherits: TensorIteratorBase
|
15478
15513
|
structured: True
|
@@ -15487,7 +15522,7 @@
|
|
15487
15522
|
|
15488
15523
|
- func: special_modified_bessel_k1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
15489
15524
|
dispatch:
|
15490
|
-
CPU, CUDA: special_modified_bessel_k1_out
|
15525
|
+
CPU, CUDA, MPS: special_modified_bessel_k1_out
|
15491
15526
|
python_module: special
|
15492
15527
|
structured_inherits: TensorIteratorBase
|
15493
15528
|
structured: True
|
@@ -15502,7 +15537,7 @@
|
|
15502
15537
|
|
15503
15538
|
- func: special_scaled_modified_bessel_k0.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
|
15504
15539
|
dispatch:
|
15505
|
-
CPU, CUDA: special_scaled_modified_bessel_k0_out
|
15540
|
+
CPU, CUDA, MPS: special_scaled_modified_bessel_k0_out
|
15506
15541
|
python_module: special
|
15507
15542
|
structured_inherits: TensorIteratorBase
|
15508
15543
|
structured: True
|
@@ -15517,7 +15552,7 @@
|
|
15517
15552
|
|
15518
15553
|
- func: special_scaled_modified_bessel_k1.out(Tensor x, *, Tensor(a!) out) -> Tensor(a!)
|
15519
15554
|
dispatch:
|
15520
|
-
CPU, CUDA: special_scaled_modified_bessel_k1_out
|
15555
|
+
CPU, CUDA, MPS: special_scaled_modified_bessel_k1_out
|
15521
15556
|
python_module: special
|
15522
15557
|
structured_inherits: TensorIteratorBase
|
15523
15558
|
structured: True
|
@@ -15808,6 +15843,13 @@
|
|
15808
15843
|
CPU: _fused_adagrad_kernel_cpu_
|
15809
15844
|
autogen: _fused_adagrad, _fused_adagrad.out
|
15810
15845
|
|
15846
|
+
- func: _fused_adagrad_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor[] state_steps, *, Tensor lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15847
|
+
device_check: NoCheck
|
15848
|
+
variants: function
|
15849
|
+
dispatch:
|
15850
|
+
CPU: _fused_adagrad_kernel_cpu_
|
15851
|
+
autogen: _fused_adagrad.tensor_lr, _fused_adagrad.tensor_lr_out
|
15852
|
+
|
15811
15853
|
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15812
15854
|
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15813
15855
|
variants: function
|