torch-rb 0.13.1 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -0
- data/codegen/generate_functions.rb +6 -1
- data/codegen/native_functions.yaml +985 -516
- data/ext/torch/torch.cpp +10 -6
- data/ext/torch/utils.h +1 -1
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +6 -3
- metadata +2 -2
@@ -170,8 +170,36 @@
|
|
170
170
|
CPU: _assert_async_cpu
|
171
171
|
CUDA: _assert_async_cuda
|
172
172
|
|
173
|
+
- func: _assert_async.msg(Tensor self, str assert_msg) -> ()
|
174
|
+
dispatch:
|
175
|
+
CPU: _assert_async_msg_cpu
|
176
|
+
CUDA: _assert_async_msg_cuda
|
177
|
+
|
178
|
+
- func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
|
179
|
+
dispatch:
|
180
|
+
CPU: _functional_assert_async_msg_cpu
|
181
|
+
|
182
|
+
- func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
|
183
|
+
|
184
|
+
- func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
|
185
|
+
dispatch:
|
186
|
+
CompositeExplicitAutograd: sym_constrain_range
|
187
|
+
|
188
|
+
- func: sym_constrain_range_for_size(Scalar size, *, int? min, int? max) -> ()
|
189
|
+
dispatch:
|
190
|
+
CompositeExplicitAutograd: sym_constrain_range_for_size
|
173
191
|
|
174
|
-
- func:
|
192
|
+
- func: _functional_sym_constrain_range(Scalar size, int? min, int? max, Tensor dep_token) -> Tensor
|
193
|
+
dispatch:
|
194
|
+
CompositeExplicitAutograd: _functional_sym_constrain_range
|
195
|
+
|
196
|
+
- func: _functional_sym_constrain_range_for_size(Scalar size, int? min, int? max, Tensor dep_token) -> Tensor
|
197
|
+
dispatch:
|
198
|
+
CompositeExplicitAutograd: _functional_sym_constrain_range_for_size
|
199
|
+
|
200
|
+
- func: _make_dep_token(*, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
201
|
+
dispatch:
|
202
|
+
CPU: _make_dep_token_cpu
|
175
203
|
|
176
204
|
- func: refine_names(Tensor(a) self, Dimname[] names) -> Tensor(a)
|
177
205
|
variants: method
|
@@ -211,6 +239,7 @@
|
|
211
239
|
dispatch:
|
212
240
|
CUDA: _cudnn_rnn
|
213
241
|
autogen: _cudnn_rnn.out
|
242
|
+
tags: nondeterministic_seeded
|
214
243
|
|
215
244
|
- func: _cudnn_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])
|
216
245
|
dispatch:
|
@@ -221,6 +250,7 @@
|
|
221
250
|
dispatch:
|
222
251
|
CUDA: _cudnn_init_dropout_state
|
223
252
|
autogen: _cudnn_init_dropout_state.out
|
253
|
+
tags: nondeterministic_seeded
|
224
254
|
|
225
255
|
- func: _debug_has_internal_overlap(Tensor self) -> int
|
226
256
|
variants: function
|
@@ -297,6 +327,7 @@
|
|
297
327
|
CompositeExplicitAutograd: abs
|
298
328
|
SparseCPU, SparseCUDA: abs_sparse
|
299
329
|
SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr
|
330
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs
|
300
331
|
tags: [core, pointwise]
|
301
332
|
|
302
333
|
- func: abs_(Tensor(a!) self) -> Tensor(a!)
|
@@ -306,6 +337,7 @@
|
|
306
337
|
CompositeExplicitAutograd: abs_
|
307
338
|
SparseCPU, SparseCUDA: abs_sparse_
|
308
339
|
SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_
|
340
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs_
|
309
341
|
|
310
342
|
- func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
311
343
|
device_check: NoCheck # TensorIterator
|
@@ -374,7 +406,7 @@
|
|
374
406
|
- func: view_as_complex(Tensor(a) self) -> Tensor(a)
|
375
407
|
variants: function
|
376
408
|
dispatch:
|
377
|
-
CPU, CUDA, Meta: view_as_complex
|
409
|
+
CPU, CUDA, MPS, Meta: view_as_complex
|
378
410
|
|
379
411
|
- func: sgn(Tensor self) -> Tensor
|
380
412
|
variants: function, method
|
@@ -382,6 +414,7 @@
|
|
382
414
|
dispatch:
|
383
415
|
SparseCPU, SparseCUDA: sgn_sparse
|
384
416
|
SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr
|
417
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn
|
385
418
|
tags: pointwise
|
386
419
|
|
387
420
|
- func: sgn_(Tensor(a!) self) -> Tensor(a!)
|
@@ -390,6 +423,7 @@
|
|
390
423
|
dispatch:
|
391
424
|
SparseCPU, SparseCUDA: sgn_sparse_
|
392
425
|
SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_
|
426
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn_
|
393
427
|
tags: pointwise
|
394
428
|
|
395
429
|
- func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -488,8 +522,10 @@
|
|
488
522
|
- func: arccos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
489
523
|
|
490
524
|
- func: avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor
|
525
|
+
tags: core
|
491
526
|
|
492
527
|
- func: adaptive_avg_pool1d(Tensor self, int[1] output_size) -> Tensor
|
528
|
+
tags: core
|
493
529
|
|
494
530
|
# Return: (Tensor output, Tensor indices)
|
495
531
|
- func: adaptive_max_pool1d(Tensor self, int[1] output_size) -> (Tensor, Tensor)
|
@@ -610,13 +646,13 @@
|
|
610
646
|
MPS: addr_out_mps
|
611
647
|
CompositeExplicitAutograd: math_addr_out
|
612
648
|
|
613
|
-
- func: affine_grid_generator(Tensor theta,
|
649
|
+
- func: affine_grid_generator(Tensor theta, SymInt[] size, bool align_corners) -> Tensor
|
614
650
|
variants: function
|
615
651
|
dispatch:
|
616
652
|
CompositeExplicitAutograd: affine_grid_generator
|
617
653
|
autogen: affine_grid_generator.out
|
618
654
|
|
619
|
-
- func: affine_grid_generator_backward(Tensor grad,
|
655
|
+
- func: affine_grid_generator_backward(Tensor grad, SymInt[] size, bool align_corners) -> Tensor
|
620
656
|
variants: function
|
621
657
|
|
622
658
|
- func: _is_all_true(Tensor self) -> Tensor
|
@@ -633,6 +669,13 @@
|
|
633
669
|
- func: _test_check_tensor(Tensor self) -> Tensor
|
634
670
|
variants: function
|
635
671
|
|
672
|
+
# Note; this function is only for testing
|
673
|
+
- func: _test_functorch_fallback(Tensor self, Tensor other) -> Tensor
|
674
|
+
variants: function
|
675
|
+
dispatch:
|
676
|
+
CPU: _test_functorch_fallback
|
677
|
+
autogen: _test_functorch_fallback.out
|
678
|
+
|
636
679
|
- func: all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor
|
637
680
|
device_check: NoCheck # TensorIterator
|
638
681
|
structured_delegate: all.out
|
@@ -664,6 +707,7 @@
|
|
664
707
|
device_check: NoCheck # TensorIterator
|
665
708
|
structured_delegate: any.out
|
666
709
|
variants: function, method
|
710
|
+
tags: core
|
667
711
|
|
668
712
|
- func: any.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
669
713
|
device_check: NoCheck # TensorIterator
|
@@ -1108,6 +1152,7 @@
|
|
1108
1152
|
structured_inherits: TensorIteratorBase
|
1109
1153
|
dispatch:
|
1110
1154
|
CPU, CUDA: bitwise_not_out
|
1155
|
+
MPS: bitwise_not_out_mps
|
1111
1156
|
tags: pointwise
|
1112
1157
|
|
1113
1158
|
- func: copysign.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1115,7 +1160,7 @@
|
|
1115
1160
|
structured: True
|
1116
1161
|
structured_inherits: TensorIteratorBase
|
1117
1162
|
dispatch:
|
1118
|
-
CPU, CUDA: copysign_out
|
1163
|
+
CPU, CUDA, MPS: copysign_out
|
1119
1164
|
tags: pointwise
|
1120
1165
|
|
1121
1166
|
- func: copysign.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -1150,6 +1195,7 @@
|
|
1150
1195
|
variants: function, method
|
1151
1196
|
dispatch:
|
1152
1197
|
CompositeExplicitAutograd: logical_not
|
1198
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not
|
1153
1199
|
tags: [core, pointwise]
|
1154
1200
|
|
1155
1201
|
- func: logical_not_(Tensor(a!) self) -> Tensor(a!)
|
@@ -1157,6 +1203,7 @@
|
|
1157
1203
|
variants: method
|
1158
1204
|
dispatch:
|
1159
1205
|
CompositeExplicitAutograd: logical_not_
|
1206
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not_
|
1160
1207
|
tags: pointwise
|
1161
1208
|
|
1162
1209
|
- func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1171,7 +1218,7 @@
|
|
1171
1218
|
variants: function, method
|
1172
1219
|
dispatch:
|
1173
1220
|
CompositeExplicitAutograd: logical_xor
|
1174
|
-
tags: pointwise
|
1221
|
+
tags: [core, pointwise]
|
1175
1222
|
|
1176
1223
|
- func: logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
1177
1224
|
device_check: NoCheck # TensorIterator
|
@@ -1326,7 +1373,7 @@
|
|
1326
1373
|
dispatch:
|
1327
1374
|
SparseCPU, SparseCUDA: ceil_sparse
|
1328
1375
|
SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr
|
1329
|
-
tags: pointwise
|
1376
|
+
tags: [core, pointwise]
|
1330
1377
|
|
1331
1378
|
- func: ceil_(Tensor(a!) self) -> Tensor(a!)
|
1332
1379
|
device_check: NoCheck # TensorIterator
|
@@ -1393,7 +1440,7 @@
|
|
1393
1440
|
- func: clamp.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor
|
1394
1441
|
variants: function, method
|
1395
1442
|
structured_delegate: clamp.Tensor_out
|
1396
|
-
tags: pointwise
|
1443
|
+
tags: [core, pointwise]
|
1397
1444
|
|
1398
1445
|
- func: clamp_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)
|
1399
1446
|
device_check: NoCheck # TensorIterator
|
@@ -1552,6 +1599,7 @@
|
|
1552
1599
|
- func: polar.out(Tensor abs, Tensor angle, *, Tensor(a!) out) -> Tensor(a!)
|
1553
1600
|
dispatch:
|
1554
1601
|
CPU, CUDA: polar_out
|
1602
|
+
MPS: polar_out_mps
|
1555
1603
|
|
1556
1604
|
- func: constant_pad_nd(Tensor self, SymInt[] pad, Scalar value=0) -> Tensor
|
1557
1605
|
variants: function
|
@@ -1598,11 +1646,17 @@
|
|
1598
1646
|
|
1599
1647
|
- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
1600
1648
|
|
1601
|
-
- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1,
|
1649
|
+
- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, SymInt[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor
|
1650
|
+
dispatch:
|
1651
|
+
CompositeImplicitAutograd: conv1d_symint
|
1602
1652
|
|
1603
|
-
- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1,
|
1653
|
+
- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor
|
1654
|
+
dispatch:
|
1655
|
+
CompositeImplicitAutograd: conv2d_symint
|
1604
1656
|
|
1605
|
-
- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1,
|
1657
|
+
- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor
|
1658
|
+
dispatch:
|
1659
|
+
CompositeImplicitAutograd: conv3d_symint
|
1606
1660
|
|
1607
1661
|
- func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, str padding="valid", int[1] dilation=1, int groups=1) -> Tensor
|
1608
1662
|
cpp_no_default_args: ['bias', 'stride', 'padding']
|
@@ -1621,11 +1675,17 @@
|
|
1621
1675
|
- func: conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)
|
1622
1676
|
|
1623
1677
|
# NB: we inherit the goofy argument order from PyTorch torch.nn.functional
|
1624
|
-
- func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1,
|
1678
|
+
- func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, int groups=1, int[1] dilation=1) -> Tensor
|
1679
|
+
dispatch:
|
1680
|
+
CompositeImplicitAutograd: conv_transpose1d_symint
|
1625
1681
|
|
1626
|
-
- func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1,
|
1682
|
+
- func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, int groups=1, int[2] dilation=1) -> Tensor
|
1683
|
+
dispatch:
|
1684
|
+
CompositeImplicitAutograd: conv_transpose2d_symint
|
1627
1685
|
|
1628
|
-
- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1,
|
1686
|
+
- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor
|
1687
|
+
dispatch:
|
1688
|
+
CompositeImplicitAutograd: conv_transpose3d_symint
|
1629
1689
|
|
1630
1690
|
- func: copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor
|
1631
1691
|
variants: function
|
@@ -1850,6 +1910,7 @@
|
|
1850
1910
|
device_check: NoCheck # TensorIterator
|
1851
1911
|
dispatch:
|
1852
1912
|
CPU, CUDA: cumprod_out
|
1913
|
+
MPS: cumprod_out_mps
|
1853
1914
|
|
1854
1915
|
- func: cumprod.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
|
1855
1916
|
device_check: NoCheck # TensorIterator
|
@@ -1870,6 +1931,7 @@
|
|
1870
1931
|
structured_delegate: cumsum.out
|
1871
1932
|
device_check: NoCheck # TensorIterator
|
1872
1933
|
variants: function, method
|
1934
|
+
tags: core
|
1873
1935
|
|
1874
1936
|
- func: cumsum_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!)
|
1875
1937
|
structured_delegate: cumsum.out
|
@@ -2145,6 +2207,7 @@
|
|
2145
2207
|
CompositeExplicitAutograd: embedding_symint
|
2146
2208
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_embedding
|
2147
2209
|
autogen: embedding.out
|
2210
|
+
tags: core
|
2148
2211
|
|
2149
2212
|
- func: embedding_backward(Tensor grad, Tensor indices, SymInt num_weights, SymInt padding_idx, bool scale_grad_by_freq, bool sparse) -> Tensor
|
2150
2213
|
dispatch:
|
@@ -2202,6 +2265,7 @@
|
|
2202
2265
|
CPU: _embedding_bag_cpu
|
2203
2266
|
CUDA: _embedding_bag_cuda
|
2204
2267
|
autogen: _embedding_bag.out
|
2268
|
+
tags: core
|
2205
2269
|
|
2206
2270
|
- func: _embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
|
2207
2271
|
dispatch:
|
@@ -2240,6 +2304,12 @@
|
|
2240
2304
|
SparseCPU, SparseCUDA, SparseMeta: empty_sparse
|
2241
2305
|
SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
|
2242
2306
|
QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
|
2307
|
+
tags: core
|
2308
|
+
|
2309
|
+
- func: empty_permuted(SymInt[] size, int[] physical_layout, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
2310
|
+
dispatch:
|
2311
|
+
CompositeExplicitAutograd: empty_permuted_symint
|
2312
|
+
autogen: empty_permuted.out
|
2243
2313
|
|
2244
2314
|
# We do not make new_empty a composite that calls into new_empty_strided, as the strided version
|
2245
2315
|
# is significantly more difficult to implement by different backends
|
@@ -2280,7 +2350,7 @@
|
|
2280
2350
|
autogen: new_ones.out
|
2281
2351
|
|
2282
2352
|
# other overrides are to provide a more helpful error message that dtype is required
|
2283
|
-
- func: _empty_affine_quantized(
|
2353
|
+
- func: _empty_affine_quantized(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, float scale=1, int zero_point=0, MemoryFormat? memory_format=contiguous_format) -> Tensor
|
2284
2354
|
dispatch:
|
2285
2355
|
CPU: empty_affine_quantized_other_backends_stub
|
2286
2356
|
QuantizedCPU, QuantizedCUDA: empty_affine_quantized
|
@@ -2288,7 +2358,7 @@
|
|
2288
2358
|
|
2289
2359
|
# it's a factory function receiving a tensor argument, thus overriding explicitly
|
2290
2360
|
# other overrides are to provide a more helpful error message that dtype is required
|
2291
|
-
- func: _empty_per_channel_affine_quantized(
|
2361
|
+
- func: _empty_per_channel_affine_quantized(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=contiguous_format) -> Tensor
|
2292
2362
|
category_override: factory
|
2293
2363
|
dispatch:
|
2294
2364
|
CPU: empty_per_channel_affine_quantized_other_backends_stub
|
@@ -2313,7 +2383,7 @@
|
|
2313
2383
|
# This is a utility function to enable users to resize out tensor while registering kernels for out variants.
|
2314
2384
|
# Eventually, we can consider exposing `resize_output` as a public API to ship it with python op registration
|
2315
2385
|
# to make it easy to register out variants for ops.
|
2316
|
-
- func: _resize_output_(Tensor(a!) self,
|
2386
|
+
- func: _resize_output_(Tensor(a!) self, SymInt[] size, Device device) -> Tensor(a!)
|
2317
2387
|
use_const_ref_for_mutable_tensors: True
|
2318
2388
|
variants: function
|
2319
2389
|
dispatch:
|
@@ -2483,21 +2553,21 @@
|
|
2483
2553
|
device_guard: False
|
2484
2554
|
|
2485
2555
|
# decomposes to eye.m
|
2486
|
-
- func: eye(
|
2556
|
+
- func: eye(SymInt n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
2487
2557
|
dispatch:
|
2488
2558
|
CompositeExplicitAutograd: eye
|
2489
2559
|
|
2490
|
-
- func: eye.m(
|
2560
|
+
- func: eye.m(SymInt n, SymInt m, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
2491
2561
|
dispatch:
|
2492
2562
|
CompositeExplicitAutograd: eye
|
2493
2563
|
|
2494
|
-
- func: eye.out(
|
2564
|
+
- func: eye.out(SymInt n, *, Tensor(a!) out) -> Tensor(a!)
|
2495
2565
|
dispatch:
|
2496
2566
|
CPU, Meta: eye_out_cpu
|
2497
2567
|
CUDA: eye_out_cuda
|
2498
2568
|
MPS: eye_out_mps
|
2499
2569
|
|
2500
|
-
- func: eye.m_out(
|
2570
|
+
- func: eye.m_out(SymInt n, SymInt m, *, Tensor(a!) out) -> Tensor(a!)
|
2501
2571
|
dispatch:
|
2502
2572
|
CPU, Meta: eye_out_cpu
|
2503
2573
|
CUDA: eye_out_cuda
|
@@ -2515,11 +2585,15 @@
|
|
2515
2585
|
- func: flatten.DimnameList(Tensor(a) self, Dimname[] dims, Dimname out_dim) -> Tensor(a)
|
2516
2586
|
variants: function, method
|
2517
2587
|
|
2518
|
-
- func: unflatten.int(Tensor(a) self, int dim,
|
2588
|
+
- func: unflatten.int(Tensor(a) self, int dim, SymInt[] sizes) -> Tensor(a)
|
2519
2589
|
variants: function, method
|
2590
|
+
dispatch:
|
2591
|
+
CompositeImplicitAutograd: unflatten_symint
|
2520
2592
|
|
2521
|
-
- func: unflatten.Dimname(Tensor(a) self, Dimname dim,
|
2593
|
+
- func: unflatten.Dimname(Tensor(a) self, Dimname dim, SymInt[] sizes, Dimname[] names) -> Tensor(a)
|
2522
2594
|
variants: function, method
|
2595
|
+
dispatch:
|
2596
|
+
CompositeImplicitAutograd: unflatten_dimname_symint
|
2523
2597
|
|
2524
2598
|
- func: fill.Scalar(Tensor self, Scalar value) -> Tensor
|
2525
2599
|
variants: function
|
@@ -2839,13 +2913,13 @@
|
|
2839
2913
|
CUDA: _fft_r2c_cufft_out
|
2840
2914
|
|
2841
2915
|
# Complex to real inverse FFT
|
2842
|
-
- func: _fft_c2r(Tensor self, int[] dim, int normalization,
|
2916
|
+
- func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
|
2843
2917
|
variants: function
|
2844
2918
|
dispatch:
|
2845
2919
|
CPU: _fft_c2r_mkl
|
2846
2920
|
CUDA: _fft_c2r_cufft
|
2847
2921
|
|
2848
|
-
- func: _fft_c2r.out(Tensor self, int[] dim, int normalization,
|
2922
|
+
- func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
|
2849
2923
|
variants: function
|
2850
2924
|
dispatch:
|
2851
2925
|
CPU: _fft_c2r_mkl_out
|
@@ -2871,13 +2945,13 @@
|
|
2871
2945
|
CPU: _validate_compressed_sparse_indices_cpu
|
2872
2946
|
CUDA: _validate_compressed_sparse_indices_cuda
|
2873
2947
|
|
2874
|
-
- func: _cufft_get_plan_cache_size(
|
2948
|
+
- func: _cufft_get_plan_cache_size(DeviceIndex device_index) -> int
|
2875
2949
|
|
2876
|
-
- func: _cufft_get_plan_cache_max_size(
|
2950
|
+
- func: _cufft_get_plan_cache_max_size(DeviceIndex device_index) -> int
|
2877
2951
|
|
2878
|
-
- func: _cufft_set_plan_cache_max_size(
|
2952
|
+
- func: _cufft_set_plan_cache_max_size(DeviceIndex device_index, int max_size) -> ()
|
2879
2953
|
|
2880
|
-
- func: _cufft_clear_plan_cache(
|
2954
|
+
- func: _cufft_clear_plan_cache(DeviceIndex device_index) -> ()
|
2881
2955
|
|
2882
2956
|
- func: index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
|
2883
2957
|
device_check: NoCheck # TensorIterator
|
@@ -2885,7 +2959,7 @@
|
|
2885
2959
|
variants: function, method
|
2886
2960
|
dispatch:
|
2887
2961
|
QuantizedCPU: quantized_index
|
2888
|
-
tags: dynamic_output_shape
|
2962
|
+
tags: [core, dynamic_output_shape]
|
2889
2963
|
# NB: This function is special-cased in tools/autograd/gen_variable_type.py
|
2890
2964
|
# NB: The following functions are declared in aten/src/ATen/templates/TensorBody.h and defined in aten/src/ATen/TensorIndexing.cpp:
|
2891
2965
|
# - Tensor Tensor::index(ArrayRef<TensorIndex> indices)
|
@@ -2900,6 +2974,13 @@
|
|
2900
2974
|
dispatch:
|
2901
2975
|
CPU, CUDA, MPS: index_out
|
2902
2976
|
|
2977
|
+
# Used by inductor to signal indexing without bounds checks
|
2978
|
+
# Note that we don't support boolean indexing, to avoid dynamic output shapes
|
2979
|
+
- func: _unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
|
2980
|
+
variants: function
|
2981
|
+
dispatch:
|
2982
|
+
CPU, CUDA: _unsafe_index
|
2983
|
+
|
2903
2984
|
- func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
|
2904
2985
|
structured: True
|
2905
2986
|
variants: function
|
@@ -2939,6 +3020,13 @@
|
|
2939
3020
|
variants: function, method
|
2940
3021
|
dispatch:
|
2941
3022
|
CompositeExplicitAutograd: index_put
|
3023
|
+
tags: core
|
3024
|
+
|
3025
|
+
- func: _unsafe_index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor
|
3026
|
+
device_check: NoCheck # delegate to _index_put_impl_ after clone, which leverages TensorIterator
|
3027
|
+
variants: function
|
3028
|
+
dispatch:
|
3029
|
+
CompositeExplicitAutograd: _unsafe_index_put
|
2942
3030
|
|
2943
3031
|
- func: _index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!)
|
2944
3032
|
device_check: NoCheck # TensorIterator
|
@@ -3097,6 +3185,7 @@
|
|
3097
3185
|
CPU: layer_norm_backward_cpu
|
3098
3186
|
CUDA: layer_norm_backward_cuda
|
3099
3187
|
MPS: layer_norm_backward_mps
|
3188
|
+
NestedTensorCPU, NestedTensorCUDA: layer_norm_backward_nested
|
3100
3189
|
autogen: native_layer_norm_backward.out
|
3101
3190
|
tags: core
|
3102
3191
|
|
@@ -3160,6 +3249,18 @@
|
|
3160
3249
|
MkldnnCPU: mkldnn_linear_backward
|
3161
3250
|
autogen: mkldnn_linear_backward.out
|
3162
3251
|
|
3252
|
+
- func: _cslt_compress(Tensor input) -> Tensor
|
3253
|
+
dispatch:
|
3254
|
+
CUDA: _cslt_compress
|
3255
|
+
|
3256
|
+
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, bool transpose_result=False) -> Tensor
|
3257
|
+
dispatch:
|
3258
|
+
CUDA: _cslt_sparse_mm
|
3259
|
+
|
3260
|
+
- func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None) -> Tensor
|
3261
|
+
dispatch:
|
3262
|
+
CUDA: _sparse_semi_structured_linear
|
3263
|
+
|
3163
3264
|
- func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
3164
3265
|
|
3165
3266
|
- func: fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
@@ -3355,6 +3456,7 @@
|
|
3355
3456
|
variants: function
|
3356
3457
|
dispatch:
|
3357
3458
|
CPU, CUDA: xlogy_out
|
3459
|
+
MPS: xlogy_out_mps
|
3358
3460
|
tags: pointwise
|
3359
3461
|
|
3360
3462
|
- func: xlogy.OutScalar_Self(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -3510,6 +3612,7 @@
|
|
3510
3612
|
structured: True
|
3511
3613
|
dispatch:
|
3512
3614
|
CPU, CUDA: aminmax_out
|
3615
|
+
MPS: aminmax_out_mps
|
3513
3616
|
|
3514
3617
|
- func: _compute_linear_combination(Tensor input, Tensor coefficients) -> Tensor
|
3515
3618
|
dispatch:
|
@@ -3607,6 +3710,11 @@
|
|
3607
3710
|
QuantizedCUDA: quantized_max_pool2d_cudnn
|
3608
3711
|
autogen: quantized_max_pool2d.out
|
3609
3712
|
|
3713
|
+
- func: quantized_max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
|
3714
|
+
dispatch:
|
3715
|
+
QuantizedCPU: quantized_max_pool3d
|
3716
|
+
autogen: quantized_max_pool3d.out
|
3717
|
+
|
3610
3718
|
- func: max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
|
3611
3719
|
|
3612
3720
|
# The CPU and GPU dispatch variants are named weirdly here because otherwise there
|
@@ -3616,6 +3724,7 @@
|
|
3616
3724
|
variants: function, method
|
3617
3725
|
dispatch:
|
3618
3726
|
CompositeExplicitAutograd: mean
|
3727
|
+
tags: core
|
3619
3728
|
|
3620
3729
|
# For normal naming convention this should be `mean.out`. However since we already have `mean.out` we have to rename this.
|
3621
3730
|
# FIXME: fix CI jobs and re-enable this
|
@@ -3756,6 +3865,7 @@
|
|
3756
3865
|
- func: mkldnn_rnn_layer(Tensor input, Tensor weight0, Tensor weight1, Tensor weight2, Tensor weight3, Tensor hx_, Tensor cx_, bool reverse, int[] batch_sizes, int mode, int hidden_size, int num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train) -> (Tensor, Tensor, Tensor, Tensor)
|
3757
3866
|
dispatch:
|
3758
3867
|
CPU: mkldnn_rnn_layer
|
3868
|
+
MkldnnCPU: mkldnn_rnn_layer
|
3759
3869
|
autogen: mkldnn_rnn_layer.out
|
3760
3870
|
|
3761
3871
|
- func: mkldnn_rnn_layer_backward(Tensor input, Tensor weight1, Tensor weight2, Tensor weight3, Tensor weight4, Tensor hx_, Tensor cx_tmp, Tensor output, Tensor hy_, Tensor cy_, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, bool reverse, int mode, int hidden_size, int num_layers, bool has_biases, bool train, bool bidirectional, int[] batch_sizes, bool batch_first, Tensor workspace) -> (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor)
|
@@ -3800,6 +3910,8 @@
|
|
3800
3910
|
dispatch:
|
3801
3911
|
CUDA: miopen_rnn
|
3802
3912
|
autogen: miopen_rnn.out
|
3913
|
+
tags: nondeterministic_seeded
|
3914
|
+
|
3803
3915
|
|
3804
3916
|
- func: miopen_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])
|
3805
3917
|
dispatch:
|
@@ -3823,6 +3935,14 @@
|
|
3823
3935
|
SparseCPU, SparseCUDA: _sparse_mm_out
|
3824
3936
|
SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm_out
|
3825
3937
|
|
3938
|
+
- func: _int_mm(Tensor self, Tensor mat2) -> Tensor
|
3939
|
+
dispatch:
|
3940
|
+
CUDA: _int_mm_cuda
|
3941
|
+
|
3942
|
+
- func: _int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
|
3943
|
+
dispatch:
|
3944
|
+
CUDA: _int_mm_out_cuda
|
3945
|
+
|
3826
3946
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
3827
3947
|
python_module: sparse
|
3828
3948
|
|
@@ -3981,7 +4101,6 @@
|
|
3981
4101
|
CUDA: batch_norm_cuda
|
3982
4102
|
MPS: batch_norm_mps
|
3983
4103
|
MkldnnCPU: mkldnn_batch_norm
|
3984
|
-
tags: core
|
3985
4104
|
|
3986
4105
|
- func: native_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!))
|
3987
4106
|
dispatch:
|
@@ -3997,6 +4116,16 @@
|
|
3997
4116
|
MPS: _batch_norm_legit_mps
|
3998
4117
|
MkldnnCPU: _mkldnn_batch_norm_legit
|
3999
4118
|
autogen: _native_batch_norm_legit_functional
|
4119
|
+
tags: core
|
4120
|
+
|
4121
|
+
# HACK: identical to _native_batch_norm_legit, but training is known to be False,
|
4122
|
+
# So we known that running stats will not be mutated.
|
4123
|
+
# The real fix here is batch norm consolidation.
|
4124
|
+
- func: _native_batch_norm_legit_no_training(Tensor input, Tensor? weight, Tensor? bias, Tensor running_mean, Tensor running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor)
|
4125
|
+
dispatch:
|
4126
|
+
CompositeExplicitAutograd: _batch_norm_legit_no_training
|
4127
|
+
autogen: _native_batch_norm_legit_no_training.out
|
4128
|
+
tags: core
|
4000
4129
|
|
4001
4130
|
- func: _native_batch_norm_legit.out(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, bool training, float momentum, float eps, *, Tensor(d!) out, Tensor(e!) save_mean, Tensor(f!) save_invstd) -> (Tensor(d!), Tensor(e!), Tensor(f!))
|
4002
4131
|
dispatch:
|
@@ -4055,7 +4184,7 @@
|
|
4055
4184
|
CUDA: batch_norm_backward_reduce_cuda
|
4056
4185
|
autogen: batch_norm_backward_reduce.out
|
4057
4186
|
|
4058
|
-
- func: batch_norm_backward_elemt(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor
|
4187
|
+
- func: batch_norm_backward_elemt(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count) -> Tensor
|
4059
4188
|
dispatch:
|
4060
4189
|
CUDA: batch_norm_backward_elemt_cuda
|
4061
4190
|
autogen: batch_norm_backward_elemt.out
|
@@ -4113,6 +4242,7 @@
|
|
4113
4242
|
CPU, CUDA: _cdist_forward
|
4114
4243
|
MPS: _cdist_forward_mps
|
4115
4244
|
autogen: _cdist_forward.out
|
4245
|
+
tags: core
|
4116
4246
|
|
4117
4247
|
- func: _cdist_backward(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist) -> Tensor
|
4118
4248
|
dispatch:
|
@@ -4125,6 +4255,7 @@
|
|
4125
4255
|
dispatch:
|
4126
4256
|
CPU, CUDA: _pdist_forward
|
4127
4257
|
autogen: _pdist_forward.out
|
4258
|
+
tags: core
|
4128
4259
|
|
4129
4260
|
- func: _pdist_backward(Tensor grad, Tensor self, float p, Tensor pdist) -> Tensor
|
4130
4261
|
dispatch:
|
@@ -4185,6 +4316,7 @@
|
|
4185
4316
|
CPU: pixel_shuffle_cpu
|
4186
4317
|
CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
|
4187
4318
|
autogen: pixel_shuffle.out
|
4319
|
+
tags: core
|
4188
4320
|
|
4189
4321
|
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
|
4190
4322
|
dispatch:
|
@@ -4194,7 +4326,7 @@
|
|
4194
4326
|
|
4195
4327
|
- func: channel_shuffle(Tensor self, int groups) -> Tensor
|
4196
4328
|
dispatch:
|
4197
|
-
CPU: channel_shuffle
|
4329
|
+
CPU, CUDA: channel_shuffle
|
4198
4330
|
QuantizedCPU: channel_shuffle_quantized_cpu
|
4199
4331
|
autogen: channel_shuffle.out
|
4200
4332
|
|
@@ -4294,7 +4426,7 @@
|
|
4294
4426
|
autogen: rand.generator_with_names_out
|
4295
4427
|
|
4296
4428
|
- func: rand(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4297
|
-
tags: nondeterministic_seeded
|
4429
|
+
tags: [core, nondeterministic_seeded]
|
4298
4430
|
dispatch:
|
4299
4431
|
CompositeExplicitAutograd: rand
|
4300
4432
|
|
@@ -4319,47 +4451,47 @@
|
|
4319
4451
|
CompositeExplicitAutograd: rand_like
|
4320
4452
|
autogen: rand_like.out
|
4321
4453
|
|
4322
|
-
- func: randint(
|
4454
|
+
- func: randint(SymInt high, SymInt[] size, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4323
4455
|
tags: nondeterministic_seeded
|
4324
4456
|
dispatch:
|
4325
4457
|
CompositeExplicitAutograd: randint
|
4326
4458
|
|
4327
|
-
- func: randint.generator(
|
4459
|
+
- func: randint.generator(SymInt high, SymInt[] size, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4328
4460
|
tags: nondeterministic_seeded
|
4329
4461
|
dispatch:
|
4330
4462
|
CompositeExplicitAutograd: randint
|
4331
4463
|
|
4332
|
-
- func: randint.low(
|
4464
|
+
- func: randint.low(SymInt low, SymInt high, SymInt[] size, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4333
4465
|
tags: nondeterministic_seeded
|
4334
4466
|
dispatch:
|
4335
4467
|
CompositeExplicitAutograd: randint
|
4336
4468
|
|
4337
|
-
- func: randint.low_generator(
|
4469
|
+
- func: randint.low_generator(SymInt low, SymInt high, SymInt[] size, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4338
4470
|
tags: nondeterministic_seeded
|
4339
4471
|
dispatch:
|
4340
4472
|
CompositeExplicitAutograd: randint
|
4341
4473
|
|
4342
|
-
- func: randint.out(
|
4474
|
+
- func: randint.out(SymInt high, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)
|
4343
4475
|
tags: nondeterministic_seeded
|
4344
4476
|
dispatch:
|
4345
4477
|
CompositeExplicitAutograd: randint_out
|
4346
4478
|
|
4347
|
-
- func: randint.generator_out(
|
4479
|
+
- func: randint.generator_out(SymInt high, SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)
|
4348
4480
|
tags: nondeterministic_seeded
|
4349
4481
|
dispatch:
|
4350
4482
|
CompositeExplicitAutograd: randint_out
|
4351
4483
|
|
4352
|
-
- func: randint.low_out(
|
4484
|
+
- func: randint.low_out(SymInt low, SymInt high, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)
|
4353
4485
|
tags: nondeterministic_seeded
|
4354
4486
|
dispatch:
|
4355
4487
|
CompositeExplicitAutograd: randint_out
|
4356
4488
|
|
4357
|
-
- func: randint.low_generator_out(
|
4489
|
+
- func: randint.low_generator_out(SymInt low, SymInt high, SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)
|
4358
4490
|
tags: nondeterministic_seeded
|
4359
4491
|
dispatch:
|
4360
4492
|
CompositeExplicitAutograd: randint_out
|
4361
4493
|
|
4362
|
-
- func: randint_like(Tensor self,
|
4494
|
+
- func: randint_like(Tensor self, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
4363
4495
|
tags: nondeterministic_seeded
|
4364
4496
|
dispatch:
|
4365
4497
|
# NB: Although this composite mutates on the inside, it is
|
@@ -4367,7 +4499,7 @@
|
|
4367
4499
|
CompositeExplicitAutograd: randint_like
|
4368
4500
|
autogen: randint_like.out
|
4369
4501
|
|
4370
|
-
- func: randint_like.low_dtype(Tensor self,
|
4502
|
+
- func: randint_like.low_dtype(Tensor self, SymInt low, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
4371
4503
|
tags: nondeterministic_seeded
|
4372
4504
|
dispatch:
|
4373
4505
|
# NB: Although this composite mutates on the inside, it is
|
@@ -4376,7 +4508,7 @@
|
|
4376
4508
|
autogen: randint_like.low_dtype_out
|
4377
4509
|
|
4378
4510
|
- func: randn(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4379
|
-
tags: nondeterministic_seeded
|
4511
|
+
tags: [core, nondeterministic_seeded]
|
4380
4512
|
dispatch:
|
4381
4513
|
CompositeExplicitAutograd: randn
|
4382
4514
|
|
@@ -4412,25 +4544,25 @@
|
|
4412
4544
|
dispatch:
|
4413
4545
|
# NB: Although this composite mutates on the inside, it is
|
4414
4546
|
# non-differentiable so NonFunctional doesn't apply
|
4415
|
-
CompositeExplicitAutograd: randn_like
|
4547
|
+
CompositeExplicitAutograd, CompositeImplicitAutogradNestedTensor: randn_like
|
4416
4548
|
autogen: randn_like.out
|
4417
4549
|
|
4418
|
-
- func: randperm(
|
4419
|
-
tags: nondeterministic_seeded
|
4550
|
+
- func: randperm(SymInt n, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4551
|
+
tags: [core, nondeterministic_seeded]
|
4420
4552
|
dispatch:
|
4421
4553
|
CompositeExplicitAutograd: randperm
|
4422
4554
|
|
4423
|
-
- func: randperm.generator(
|
4555
|
+
- func: randperm.generator(SymInt n, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4424
4556
|
tags: nondeterministic_seeded
|
4425
4557
|
dispatch:
|
4426
4558
|
CompositeExplicitAutograd: randperm
|
4427
4559
|
|
4428
|
-
- func: randperm.out(
|
4560
|
+
- func: randperm.out(SymInt n, *, Tensor(a!) out) -> Tensor(a!)
|
4429
4561
|
tags: nondeterministic_seeded
|
4430
4562
|
dispatch:
|
4431
4563
|
CompositeExplicitAutograd: randperm_out
|
4432
4564
|
|
4433
|
-
- func: randperm.generator_out(
|
4565
|
+
- func: randperm.generator_out(SymInt n, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)
|
4434
4566
|
tags: nondeterministic_seeded
|
4435
4567
|
dispatch:
|
4436
4568
|
CPU: randperm_out_cpu
|
@@ -4591,7 +4723,7 @@
|
|
4591
4723
|
dispatch:
|
4592
4724
|
SparseCPU, SparseCUDA: round_sparse
|
4593
4725
|
SparseCsrCPU, SparseCsrCUDA: round_sparse_csr
|
4594
|
-
tags: pointwise
|
4726
|
+
tags: [core, pointwise]
|
4595
4727
|
|
4596
4728
|
- func: round_(Tensor(a!) self) -> Tensor(a!)
|
4597
4729
|
device_check: NoCheck # TensorIterator
|
@@ -4839,10 +4971,14 @@
|
|
4839
4971
|
- func: silu(Tensor self) -> Tensor
|
4840
4972
|
structured_delegate: silu.out
|
4841
4973
|
python_module: nn
|
4974
|
+
dispatch:
|
4975
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
|
4842
4976
|
|
4843
4977
|
- func: silu_(Tensor(a!) self) -> Tensor(a!)
|
4844
4978
|
structured_delegate: silu.out
|
4845
4979
|
python_module: nn
|
4980
|
+
dispatch:
|
4981
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
|
4846
4982
|
|
4847
4983
|
- func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
4848
4984
|
structured: True
|
@@ -4865,6 +5001,7 @@
|
|
4865
5001
|
python_module: nn
|
4866
5002
|
dispatch:
|
4867
5003
|
CompositeImplicitAutograd: math_silu_backward
|
5004
|
+
NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
|
4868
5005
|
|
4869
5006
|
- func: mish(Tensor self) -> Tensor
|
4870
5007
|
structured_delegate: mish.out
|
@@ -4917,6 +5054,7 @@
|
|
4917
5054
|
variants: function, method
|
4918
5055
|
dispatch:
|
4919
5056
|
CPU, CUDA: logit
|
5057
|
+
MPS: logit_mps
|
4920
5058
|
tags: pointwise
|
4921
5059
|
|
4922
5060
|
- func: logit_(Tensor(a!) self, float? eps=None) -> Tensor(a!)
|
@@ -4928,6 +5066,7 @@
|
|
4928
5066
|
- func: logit.out(Tensor self, float? eps=None, *, Tensor(a!) out) -> Tensor(a!)
|
4929
5067
|
dispatch:
|
4930
5068
|
CPU, CUDA: logit_out
|
5069
|
+
MPS: logit_out_mps
|
4931
5070
|
tags: pointwise
|
4932
5071
|
|
4933
5072
|
- func: sin(Tensor self) -> Tensor
|
@@ -5042,6 +5181,27 @@
|
|
5042
5181
|
device_check: NoCheck
|
5043
5182
|
device_guard: False
|
5044
5183
|
|
5184
|
+
- func: sym_size.int(Tensor self, int dim) -> SymInt
|
5185
|
+
variants: function
|
5186
|
+
device_check: NoCheck
|
5187
|
+
device_guard: False
|
5188
|
+
tags: core
|
5189
|
+
manual_cpp_binding: True
|
5190
|
+
|
5191
|
+
- func: sym_numel(Tensor self) -> SymInt
|
5192
|
+
variants: function
|
5193
|
+
device_check: NoCheck
|
5194
|
+
device_guard: False
|
5195
|
+
tags: core
|
5196
|
+
manual_cpp_binding: True
|
5197
|
+
|
5198
|
+
- func: sym_storage_offset(Tensor self) -> SymInt
|
5199
|
+
variants: function
|
5200
|
+
device_check: NoCheck
|
5201
|
+
device_guard: False
|
5202
|
+
tags: core
|
5203
|
+
manual_cpp_binding: True
|
5204
|
+
|
5045
5205
|
- func: slice.Tensor(Tensor(a) self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
|
5046
5206
|
variants: function, method
|
5047
5207
|
device_check: NoCheck
|
@@ -5066,7 +5226,7 @@
|
|
5066
5226
|
device_check: NoCheck
|
5067
5227
|
device_guard: False
|
5068
5228
|
dispatch:
|
5069
|
-
|
5229
|
+
CompositeExplicitAutogradNonFunctional: slice_scatter
|
5070
5230
|
autogen: slice_scatter.out
|
5071
5231
|
tags: core
|
5072
5232
|
|
@@ -5075,15 +5235,16 @@
|
|
5075
5235
|
device_check: NoCheck
|
5076
5236
|
device_guard: False
|
5077
5237
|
dispatch:
|
5078
|
-
|
5238
|
+
CompositeExplicitAutogradNonFunctional: select_scatter_symint
|
5079
5239
|
autogen: select_scatter.out
|
5240
|
+
tags: core
|
5080
5241
|
|
5081
5242
|
- func: diagonal_scatter(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1) -> Tensor
|
5082
5243
|
variants: function, method
|
5083
5244
|
device_check: NoCheck
|
5084
5245
|
device_guard: False
|
5085
5246
|
dispatch:
|
5086
|
-
|
5247
|
+
CompositeExplicitAutogradNonFunctional: diagonal_scatter
|
5087
5248
|
autogen: diagonal_scatter.out
|
5088
5249
|
|
5089
5250
|
- func: as_strided_scatter(Tensor self, Tensor src, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor
|
@@ -5091,7 +5252,7 @@
|
|
5091
5252
|
device_check: NoCheck
|
5092
5253
|
device_guard: False
|
5093
5254
|
dispatch:
|
5094
|
-
|
5255
|
+
CompositeExplicitAutogradNonFunctional: as_strided_scatter_symint
|
5095
5256
|
autogen: as_strided_scatter.out
|
5096
5257
|
|
5097
5258
|
- func: smm(Tensor self, Tensor mat2) -> Tensor
|
@@ -5170,6 +5331,8 @@
|
|
5170
5331
|
device_guard: False
|
5171
5332
|
dispatch:
|
5172
5333
|
CompositeExplicitAutograd: split_with_sizes
|
5334
|
+
NestedTensorCPU, NestedTensorCUDA: split_with_sizes_nested
|
5335
|
+
tags: core
|
5173
5336
|
|
5174
5337
|
- func: hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
|
5175
5338
|
variants: function, method
|
@@ -5316,6 +5479,13 @@
|
|
5316
5479
|
device_check: NoCheck
|
5317
5480
|
device_guard: False
|
5318
5481
|
|
5482
|
+
- func: sym_stride.int(Tensor self, int dim) -> SymInt
|
5483
|
+
variants: function
|
5484
|
+
device_check: NoCheck
|
5485
|
+
device_guard: False
|
5486
|
+
tags: core
|
5487
|
+
manual_cpp_binding: True
|
5488
|
+
|
5319
5489
|
- func: sum(Tensor self, *, ScalarType? dtype=None) -> Tensor
|
5320
5490
|
device_check: NoCheck # TensorIterator
|
5321
5491
|
variants: function, method
|
@@ -5326,12 +5496,14 @@
|
|
5326
5496
|
autogen: sum.out
|
5327
5497
|
|
5328
5498
|
- func: sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
5499
|
+
# TODO: Align the signature of sum.dim_IntList and _sparse_csr_sum.dim_dtype
|
5329
5500
|
structured_delegate: sum.IntList_out
|
5330
5501
|
device_check: NoCheck # TensorIterator
|
5331
5502
|
variants: function, method
|
5332
5503
|
dispatch:
|
5333
5504
|
NestedTensorCPU: NestedTensor_sum_dim_CPU
|
5334
5505
|
SparseCPU, SparseCUDA: sum_sparse_coo
|
5506
|
+
SparseCsrCPU, SparseCsrCUDA: sum_sparse_compressed
|
5335
5507
|
tags: core
|
5336
5508
|
|
5337
5509
|
- func: sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
@@ -5364,10 +5536,12 @@
|
|
5364
5536
|
CPU, CUDA: nansum_out
|
5365
5537
|
MPS: nansum_out_mps
|
5366
5538
|
|
5367
|
-
- func: sum_to_size(Tensor self,
|
5539
|
+
- func: sum_to_size(Tensor self, SymInt[] size) -> Tensor
|
5368
5540
|
variants: method
|
5369
5541
|
device_check: NoCheck
|
5370
5542
|
device_guard: False
|
5543
|
+
dispatch:
|
5544
|
+
CompositeImplicitAutograd: sum_to_size_symint
|
5371
5545
|
|
5372
5546
|
- func: sqrt(Tensor self) -> Tensor
|
5373
5547
|
device_check: NoCheck # TensorIterator
|
@@ -5421,7 +5595,7 @@
|
|
5421
5595
|
variants: function, method
|
5422
5596
|
cpp_no_default_args: ["unbiased"]
|
5423
5597
|
|
5424
|
-
- func: std.correction(Tensor self, int[1]? dim=None, *,
|
5598
|
+
- func: std.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor
|
5425
5599
|
device_check: NoCheck # TensorIterator
|
5426
5600
|
variants: function, method
|
5427
5601
|
dispatch:
|
@@ -5439,7 +5613,7 @@
|
|
5439
5613
|
variants: function
|
5440
5614
|
cpp_no_default_args: ["unbiased"]
|
5441
5615
|
|
5442
|
-
- func: std_mean.correction(Tensor self, int[1]? dim=None, *,
|
5616
|
+
- func: std_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
|
5443
5617
|
device_check: NoCheck # TensorIterator
|
5444
5618
|
variants: function
|
5445
5619
|
dispatch:
|
@@ -5451,7 +5625,7 @@
|
|
5451
5625
|
variants: function
|
5452
5626
|
cpp_no_default_args: ["unbiased"]
|
5453
5627
|
|
5454
|
-
- func: std_mean.correction_names(Tensor self, Dimname[1] dim, *,
|
5628
|
+
- func: std_mean.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
|
5455
5629
|
device_check: NoCheck # TensorIterator
|
5456
5630
|
variants: function
|
5457
5631
|
|
@@ -5459,7 +5633,7 @@
|
|
5459
5633
|
device_check: NoCheck # TensorIterator
|
5460
5634
|
cpp_no_default_args: ["unbiased"]
|
5461
5635
|
|
5462
|
-
- func: std.correction_out(Tensor self, int[1]? dim=None, *,
|
5636
|
+
- func: std.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
|
5463
5637
|
device_check: NoCheck # TensorIterator
|
5464
5638
|
dispatch:
|
5465
5639
|
CPU, CUDA: std_out
|
@@ -5474,11 +5648,11 @@
|
|
5474
5648
|
device_check: NoCheck # TensorIterator
|
5475
5649
|
cpp_no_default_args: ["unbiased"]
|
5476
5650
|
|
5477
|
-
- func: std.correction_names(Tensor self, Dimname[1] dim, *,
|
5651
|
+
- func: std.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor
|
5478
5652
|
device_check: NoCheck # TensorIterator
|
5479
5653
|
variants: function, method
|
5480
5654
|
|
5481
|
-
- func: std.correction_names_out(Tensor self, Dimname[1] dim, *,
|
5655
|
+
- func: std.correction_names_out(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
|
5482
5656
|
device_check: NoCheck # TensorIterator
|
5483
5657
|
variants: function
|
5484
5658
|
|
@@ -5489,11 +5663,13 @@
|
|
5489
5663
|
CPU, CUDA: prod
|
5490
5664
|
MPS: prod_mps
|
5491
5665
|
autogen: prod.out
|
5666
|
+
tags: core
|
5492
5667
|
|
5493
5668
|
- func: prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
5494
5669
|
structured_delegate: prod.int_out
|
5495
5670
|
device_check: NoCheck # TensorIterator
|
5496
5671
|
variants: function, method
|
5672
|
+
tags: core
|
5497
5673
|
|
5498
5674
|
- func: prod.int_out(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
|
5499
5675
|
structured: True
|
@@ -5531,7 +5707,7 @@
|
|
5531
5707
|
dispatch:
|
5532
5708
|
SparseCPU, SparseCUDA: tan_sparse
|
5533
5709
|
SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr
|
5534
|
-
tags: pointwise
|
5710
|
+
tags: [core, pointwise]
|
5535
5711
|
|
5536
5712
|
- func: tan_(Tensor(a!) self) -> Tensor(a!)
|
5537
5713
|
device_check: NoCheck # TensorIterator
|
@@ -5592,8 +5768,6 @@
|
|
5592
5768
|
|
5593
5769
|
- func: tensordot.out(Tensor self, Tensor other, int[] dims_self, int[] dims_other, *, Tensor(a!) out) -> Tensor(a!)
|
5594
5770
|
variants: function
|
5595
|
-
dispatch:
|
5596
|
-
CPU, CUDA: tensordot_out
|
5597
5771
|
|
5598
5772
|
# TODO: namespace threshold in 'nn'
|
5599
5773
|
- func: threshold(Tensor self, Scalar threshold, Scalar value) -> Tensor
|
@@ -5635,8 +5809,10 @@
|
|
5635
5809
|
NestedTensorCPU, NestedTensorCUDA: threshold_backwards_nested
|
5636
5810
|
tags: pointwise
|
5637
5811
|
|
5638
|
-
- func: tile(Tensor self,
|
5812
|
+
- func: tile(Tensor self, SymInt[] dims) -> Tensor
|
5639
5813
|
variants: function, method
|
5814
|
+
dispatch:
|
5815
|
+
CompositeImplicitAutograd: tile_symint
|
5640
5816
|
|
5641
5817
|
- func: transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a)
|
5642
5818
|
variants: function, method
|
@@ -5691,12 +5867,13 @@
|
|
5691
5867
|
- func: flipud(Tensor self) -> Tensor
|
5692
5868
|
variants: function, method
|
5693
5869
|
|
5694
|
-
- func: roll(Tensor self,
|
5870
|
+
- func: roll(Tensor self, SymInt[1] shifts, int[1] dims=[]) -> Tensor
|
5695
5871
|
variants: function, method
|
5696
5872
|
dispatch:
|
5697
|
-
CPU:
|
5873
|
+
CPU, MPS: roll
|
5698
5874
|
CUDA: roll_cuda
|
5699
5875
|
autogen: roll.out
|
5876
|
+
tags: core
|
5700
5877
|
|
5701
5878
|
# default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
|
5702
5879
|
|
@@ -5750,10 +5927,11 @@
|
|
5750
5927
|
NestedTensorCPU, NestedTensorCUDA: _nested_tensor_strides
|
5751
5928
|
autogen: _nested_tensor_strides.out
|
5752
5929
|
|
5753
|
-
- func:
|
5930
|
+
- func: _nested_tensor_storage_offsets(Tensor self) -> Tensor
|
5754
5931
|
variants: method
|
5755
5932
|
dispatch:
|
5756
|
-
NestedTensorCPU, NestedTensorCUDA:
|
5933
|
+
NestedTensorCPU, NestedTensorCUDA, NestedTensorMeta: _nested_tensor_storage_offsets
|
5934
|
+
autogen: _nested_tensor_storage_offsets.out
|
5757
5935
|
|
5758
5936
|
# _nested_from_padded is not usable from Python, so
|
5759
5937
|
# _nested_from_padded_and_nested_example is available for testing.
|
@@ -5764,13 +5942,13 @@
|
|
5764
5942
|
|
5765
5943
|
# The input arguments' types to this functions are temporary. When nested tensors switch to using SymInts for their metadata representation
|
5766
5944
|
# this will need to be updated
|
5767
|
-
- func: _nested_view_from_buffer(Tensor(a) self, Tensor nested_size, Tensor nested_strides,
|
5945
|
+
- func: _nested_view_from_buffer(Tensor(a) self, Tensor nested_size, Tensor nested_strides, Tensor offsets) -> Tensor(a)
|
5768
5946
|
variants: function
|
5769
5947
|
device_check: NoCheck
|
5770
5948
|
dispatch:
|
5771
5949
|
CPU, CUDA: _nested_view_from_buffer
|
5772
5950
|
|
5773
|
-
- func: _nested_view_from_buffer_copy(Tensor self, Tensor nested_size, Tensor nested_strides,
|
5951
|
+
- func: _nested_view_from_buffer_copy(Tensor self, Tensor nested_size, Tensor nested_strides, Tensor offsets) -> Tensor
|
5774
5952
|
variants: function
|
5775
5953
|
device_check: NoCheck
|
5776
5954
|
tags: view_copy
|
@@ -5913,18 +6091,19 @@
|
|
5913
6091
|
tags: core
|
5914
6092
|
cpp_no_default_args: ["unbiased"]
|
5915
6093
|
|
5916
|
-
- func: var.correction(Tensor self, int[1]? dim=None, *,
|
6094
|
+
- func: var.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor
|
5917
6095
|
device_check: NoCheck # TensorIterator
|
5918
6096
|
variants: function, method
|
5919
6097
|
dispatch:
|
5920
6098
|
CPU, CUDA: var
|
5921
6099
|
MPS: var_mps
|
6100
|
+
tags: core
|
5922
6101
|
|
5923
6102
|
- func: var.out(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
5924
6103
|
device_check: NoCheck # TensorIterator
|
5925
6104
|
cpp_no_default_args: ["unbiased"]
|
5926
6105
|
|
5927
|
-
- func: var.correction_out(Tensor self, int[1]? dim=None, *,
|
6106
|
+
- func: var.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
|
5928
6107
|
device_check: NoCheck # TensorIterator
|
5929
6108
|
dispatch:
|
5930
6109
|
CPU, CUDA: var_out
|
@@ -5938,11 +6117,11 @@
|
|
5938
6117
|
device_check: NoCheck # TensorIterator
|
5939
6118
|
cpp_no_default_args: ["unbiased"]
|
5940
6119
|
|
5941
|
-
- func: var.correction_names(Tensor self, Dimname[1] dim, *,
|
6120
|
+
- func: var.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor
|
5942
6121
|
device_check: NoCheck # TensorIterator
|
5943
6122
|
variants: function, method
|
5944
6123
|
|
5945
|
-
- func: var.correction_names_out(Tensor self, Dimname[1] dim, *,
|
6124
|
+
- func: var.correction_names_out(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
|
5946
6125
|
device_check: NoCheck # TensorIterator
|
5947
6126
|
variants: function
|
5948
6127
|
|
@@ -5956,7 +6135,7 @@
|
|
5956
6135
|
variants: function
|
5957
6136
|
cpp_no_default_args: ["unbiased"]
|
5958
6137
|
|
5959
|
-
- func: var_mean.correction(Tensor self, int[1]? dim=None, *,
|
6138
|
+
- func: var_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
|
5960
6139
|
device_check: NoCheck # TensorIterator
|
5961
6140
|
variants: function
|
5962
6141
|
dispatch:
|
@@ -5968,7 +6147,7 @@
|
|
5968
6147
|
variants: function
|
5969
6148
|
cpp_no_default_args: ["unbiased"]
|
5970
6149
|
|
5971
|
-
- func: var_mean.correction_names(Tensor self, Dimname[1] dim, *,
|
6150
|
+
- func: var_mean.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
|
5972
6151
|
device_check: NoCheck # TensorIterator
|
5973
6152
|
variants: function
|
5974
6153
|
|
@@ -6036,7 +6215,7 @@
|
|
6036
6215
|
CompositeExplicitAutograd: zeros
|
6037
6216
|
autogen: zeros.names_out
|
6038
6217
|
|
6039
|
-
- func: _efficientzerotensor(
|
6218
|
+
- func: _efficientzerotensor(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6040
6219
|
dispatch:
|
6041
6220
|
CPU: _efficientzerotensor
|
6042
6221
|
CUDA: _efficientzerotensor_cuda
|
@@ -6056,7 +6235,7 @@
|
|
6056
6235
|
dispatch:
|
6057
6236
|
# NB: Although this composite mutates on the inside, it is
|
6058
6237
|
# non-differentiable so NonFunctional doesn't apply
|
6059
|
-
CompositeExplicitAutograd: zeros_like
|
6238
|
+
CompositeExplicitAutograd, CompositeImplicitAutogradNestedTensor: zeros_like
|
6060
6239
|
autogen: zeros_like.out
|
6061
6240
|
|
6062
6241
|
- func: _standard_gamma_grad(Tensor self, Tensor output) -> Tensor
|
@@ -6297,7 +6476,7 @@
|
|
6297
6476
|
QuantizedCPU, QuantizedCUDA: quantized_clone
|
6298
6477
|
NestedTensorCPU, NestedTensorCUDA: clone_nested
|
6299
6478
|
autogen: clone.out
|
6300
|
-
tags: core
|
6479
|
+
tags: [core, pointwise]
|
6301
6480
|
|
6302
6481
|
- func: positive(Tensor(a) self) -> Tensor(a)
|
6303
6482
|
variants: function, method
|
@@ -6309,6 +6488,7 @@
|
|
6309
6488
|
dispatch:
|
6310
6489
|
CompositeExplicitAutograd: resize_as_
|
6311
6490
|
autogen: resize_as, resize_as.out
|
6491
|
+
tags: inplace_view
|
6312
6492
|
|
6313
6493
|
- func: resize_as_sparse_(Tensor(a!) self, Tensor the_template) -> Tensor(a!)
|
6314
6494
|
use_const_ref_for_mutable_tensors: True
|
@@ -6328,6 +6508,7 @@
|
|
6328
6508
|
SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
|
6329
6509
|
SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
|
6330
6510
|
MkldnnCPU: mkldnn_zero_
|
6511
|
+
NestedTensorCPU, NestedTensorCUDA: zero_nested_
|
6331
6512
|
autogen: zero, zero.out
|
6332
6513
|
|
6333
6514
|
- func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
@@ -6347,6 +6528,7 @@
|
|
6347
6528
|
dispatch:
|
6348
6529
|
SparseCPU, SparseCUDA: sub_sparse
|
6349
6530
|
ZeroTensor: sub_zerotensor
|
6531
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sub_Tensor
|
6350
6532
|
tags: [core, pointwise]
|
6351
6533
|
|
6352
6534
|
- func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
@@ -6493,6 +6675,16 @@
|
|
6493
6675
|
structured_delegate: _addmm_activation.out
|
6494
6676
|
variants: function, method
|
6495
6677
|
|
6678
|
+
- func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None) -> (Tensor, Tensor)
|
6679
|
+
variants: function
|
6680
|
+
dispatch:
|
6681
|
+
CUDA: _scaled_mm_cuda
|
6682
|
+
|
6683
|
+
- func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
|
6684
|
+
variants: function
|
6685
|
+
dispatch:
|
6686
|
+
CUDA: _scaled_mm_out_cuda
|
6687
|
+
|
6496
6688
|
# NOTE [ Sparse: autograd and API ]
|
6497
6689
|
#
|
6498
6690
|
#
|
@@ -6605,12 +6797,17 @@
|
|
6605
6797
|
# the default would never make sense.
|
6606
6798
|
|
6607
6799
|
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6800
|
+
dispatch:
|
6801
|
+
CompositeExplicitAutograd: sparse_compressed_tensor
|
6802
|
+
|
6608
6803
|
- func: sparse_csr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6609
6804
|
- func: sparse_csc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6610
6805
|
- func: sparse_bsr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6611
6806
|
- func: sparse_bsc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6612
6807
|
|
6613
6808
|
- func: sparse_compressed_tensor.comp_plain_value(Tensor compressed_indices, Tensor plain_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6809
|
+
dispatch:
|
6810
|
+
CompositeExplicitAutograd: sparse_compressed_tensor
|
6614
6811
|
- func: sparse_csr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6615
6812
|
- func: sparse_csc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6616
6813
|
- func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
@@ -6627,15 +6824,15 @@
|
|
6627
6824
|
CompositeExplicitAutograd: sparse_coo_tensor
|
6628
6825
|
autogen: sparse_coo_tensor.size_out
|
6629
6826
|
|
6630
|
-
- func: sparse_coo_tensor.indices(Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6827
|
+
- func: sparse_coo_tensor.indices(Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor
|
6631
6828
|
|
6632
|
-
- func: sparse_coo_tensor.indices_size(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6829
|
+
- func: sparse_coo_tensor.indices_size(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor
|
6633
6830
|
|
6634
|
-
- func: _sparse_coo_tensor_unsafe(Tensor indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6831
|
+
- func: _sparse_coo_tensor_unsafe(Tensor indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor
|
6635
6832
|
dispatch:
|
6636
6833
|
CompositeImplicitAutograd: _sparse_coo_tensor_unsafe_symint
|
6637
6834
|
|
6638
|
-
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size) -> ()
|
6835
|
+
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None) -> ()
|
6639
6836
|
|
6640
6837
|
- func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout) -> ()
|
6641
6838
|
- func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
@@ -6648,7 +6845,7 @@
|
|
6648
6845
|
SparseCPU, SparseCUDA, SparseMeta, Meta: new_with_dims_sparse
|
6649
6846
|
autogen: _sparse_coo_tensor_with_dims.out
|
6650
6847
|
|
6651
|
-
- func: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6848
|
+
- func: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor
|
6652
6849
|
dispatch:
|
6653
6850
|
SparseCPU, SparseCUDA, SparseMeta, Meta: new_with_dims_and_tensor_sparse_symint
|
6654
6851
|
autogen: _sparse_coo_tensor_with_dims_and_tensors.out
|
@@ -6671,17 +6868,23 @@
|
|
6671
6868
|
variants: method
|
6672
6869
|
dispatch:
|
6673
6870
|
SparseCPU, SparseCUDA: sparse_mask
|
6674
|
-
SparseCsrCPU, SparseCsrCUDA:
|
6871
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_mask_sparse_compressed
|
6675
6872
|
autogen: sparse_mask.out
|
6676
6873
|
|
6874
|
+
- func: _sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor
|
6875
|
+
variants: method
|
6876
|
+
dispatch:
|
6877
|
+
SparseCPU, SparseCUDA: sparse_mask_projection
|
6878
|
+
autogen: _sparse_mask_projection.out
|
6879
|
+
|
6677
6880
|
- func: _to_cpu(Tensor[] tensors) -> Tensor[]
|
6678
6881
|
variants: function
|
6679
6882
|
|
6680
|
-
- func: to_dense(Tensor self, ScalarType? dtype=None) -> Tensor
|
6883
|
+
- func: to_dense(Tensor self, ScalarType? dtype=None, *, bool? masked_grad=None) -> Tensor
|
6681
6884
|
variants: method
|
6682
6885
|
|
6683
6886
|
# Special case of to_dense with custom derivative
|
6684
|
-
- func: _to_dense(Tensor self, ScalarType? dtype=None) -> Tensor
|
6887
|
+
- func: _to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor
|
6685
6888
|
variants: method
|
6686
6889
|
dispatch:
|
6687
6890
|
SparseCPU, SparseCUDA: sparse_to_dense
|
@@ -6689,7 +6892,7 @@
|
|
6689
6892
|
MkldnnCPU: mkldnn_to_dense
|
6690
6893
|
autogen: _to_dense.out
|
6691
6894
|
|
6692
|
-
- func: to_dense_backward(Tensor grad, Tensor input) -> Tensor
|
6895
|
+
- func: to_dense_backward(Tensor grad, Tensor input, bool? masked_grad=None) -> Tensor
|
6693
6896
|
|
6694
6897
|
- func: sparse_dim(Tensor self) -> int
|
6695
6898
|
variants: method
|
@@ -6859,51 +7062,80 @@
|
|
6859
7062
|
|
6860
7063
|
- func: to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
|
6861
7064
|
variants: method
|
7065
|
+
|
7066
|
+
# Special case of to_sparse.sparse_dim with custom derivative
|
7067
|
+
- func: _to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
|
7068
|
+
variants: method
|
6862
7069
|
dispatch:
|
6863
7070
|
CPU, CUDA: dense_to_sparse
|
6864
7071
|
SparseCPU, SparseCUDA: sparse_coo_to_sparse
|
6865
7072
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
|
6866
|
-
autogen:
|
7073
|
+
autogen: _to_sparse.sparse_dim_out
|
6867
7074
|
|
6868
7075
|
- func: to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
|
6869
7076
|
variants: method
|
7077
|
+
|
7078
|
+
# Special case of to_sparse with custom derivative
|
7079
|
+
- func: _to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
|
7080
|
+
variants: method
|
6870
7081
|
dispatch:
|
6871
7082
|
CPU, CUDA: dense_to_sparse
|
6872
7083
|
SparseCPU, SparseCUDA: sparse_coo_to_sparse
|
6873
7084
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
|
6874
|
-
autogen:
|
7085
|
+
autogen: _to_sparse.out
|
6875
7086
|
|
6876
7087
|
- func: to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
|
6877
7088
|
variants: method
|
7089
|
+
|
7090
|
+
# Special case of to_sparse_csr with custom derivative
|
7091
|
+
- func: _to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
|
7092
|
+
variants: method
|
6878
7093
|
dispatch:
|
6879
7094
|
CPU, CUDA: dense_to_sparse_csr
|
6880
7095
|
SparseCPU, SparseCUDA: coo_to_sparse_csr
|
6881
7096
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csr
|
6882
|
-
autogen:
|
7097
|
+
autogen: _to_sparse_csr.out
|
6883
7098
|
|
6884
7099
|
- func: to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
|
6885
7100
|
variants: method
|
7101
|
+
|
7102
|
+
# Special case of to_sparse_csc with custom derivative
|
7103
|
+
- func: _to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
|
7104
|
+
variants: method
|
6886
7105
|
dispatch:
|
6887
7106
|
CPU, CUDA: dense_to_sparse_csc
|
6888
7107
|
SparseCPU, SparseCUDA: coo_to_sparse_csc
|
6889
7108
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csc
|
6890
|
-
autogen:
|
7109
|
+
autogen: _to_sparse_csc.out
|
6891
7110
|
|
6892
7111
|
- func: to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
|
6893
7112
|
variants: method
|
7113
|
+
|
7114
|
+
# Special case of to_sparse_bsr with custom derivative
|
7115
|
+
- func: _to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
|
7116
|
+
variants: method
|
6894
7117
|
dispatch:
|
6895
7118
|
CPU, CUDA: dense_to_sparse_bsr
|
6896
7119
|
SparseCPU, SparseCUDA: coo_to_sparse_bsr
|
6897
7120
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsr
|
6898
|
-
autogen:
|
7121
|
+
autogen: _to_sparse_bsr.out
|
6899
7122
|
|
6900
7123
|
- func: to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
|
6901
7124
|
variants: method
|
7125
|
+
|
7126
|
+
# Special case of to_sparse_bsc with custom derivative
|
7127
|
+
- func: _to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
|
7128
|
+
variants: method
|
6902
7129
|
dispatch:
|
6903
7130
|
CPU, CUDA: dense_to_sparse_bsc
|
6904
7131
|
SparseCPU, SparseCUDA: coo_to_sparse_bsc
|
6905
7132
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsc
|
6906
|
-
autogen:
|
7133
|
+
autogen: _to_sparse_bsc.out
|
7134
|
+
|
7135
|
+
- func: _to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor)
|
7136
|
+
variants: function
|
7137
|
+
dispatch:
|
7138
|
+
CUDA: _to_sparse_semi_structured
|
6907
7139
|
|
6908
7140
|
- func: to_mkldnn(Tensor self, ScalarType? dtype=None) -> Tensor
|
6909
7141
|
variants: method
|
@@ -7174,7 +7406,7 @@
|
|
7174
7406
|
|
7175
7407
|
# NB: Does NOT check precondition that numel == 1
|
7176
7408
|
- func: _local_scalar_dense(Tensor self) -> Scalar
|
7177
|
-
tags: data_dependent_output
|
7409
|
+
tags: [core, data_dependent_output]
|
7178
7410
|
dispatch:
|
7179
7411
|
CPU: _local_scalar_dense_cpu
|
7180
7412
|
CUDA: _local_scalar_dense_cuda
|
@@ -7187,8 +7419,9 @@
|
|
7187
7419
|
dispatch:
|
7188
7420
|
MPS: _lstm_mps
|
7189
7421
|
autogen: _lstm_mps.out
|
7422
|
+
tags: nondeterministic_seeded
|
7190
7423
|
|
7191
|
-
- func: lstm_mps_backward(Tensor grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor[], Tensor[])
|
7424
|
+
- func: lstm_mps_backward(Tensor? grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor[], Tensor[])
|
7192
7425
|
dispatch:
|
7193
7426
|
MPS: lstm_mps_backward
|
7194
7427
|
autogen: lstm_mps_backward.out
|
@@ -7226,20 +7459,28 @@
|
|
7226
7459
|
|
7227
7460
|
# RNN cells and layers
|
7228
7461
|
- func: lstm.input(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor)
|
7462
|
+
tags: nondeterministic_seeded
|
7229
7463
|
|
7230
7464
|
- func: lstm.data(Tensor data, Tensor batch_sizes, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor, Tensor)
|
7465
|
+
tags: nondeterministic_seeded
|
7231
7466
|
|
7232
7467
|
- func: gru.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
|
7468
|
+
tags: nondeterministic_seeded
|
7233
7469
|
|
7234
7470
|
- func: gru.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
|
7471
|
+
tags: nondeterministic_seeded
|
7235
7472
|
|
7236
7473
|
- func: rnn_tanh.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
|
7474
|
+
tags: nondeterministic_seeded
|
7237
7475
|
|
7238
7476
|
- func: rnn_tanh.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
|
7477
|
+
tags: nondeterministic_seeded
|
7239
7478
|
|
7240
7479
|
- func: rnn_relu.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
|
7480
|
+
tags: nondeterministic_seeded
|
7241
7481
|
|
7242
7482
|
- func: rnn_relu.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
|
7483
|
+
tags: nondeterministic_seeded
|
7243
7484
|
|
7244
7485
|
- func: lstm_cell(Tensor input, Tensor[] hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> (Tensor, Tensor)
|
7245
7486
|
|
@@ -7382,6 +7623,7 @@
|
|
7382
7623
|
variants: function, method
|
7383
7624
|
dispatch:
|
7384
7625
|
CompositeExplicitAutograd: masked_fill
|
7626
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_masked_fill
|
7385
7627
|
tags: pointwise
|
7386
7628
|
|
7387
7629
|
- func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
|
@@ -7406,6 +7648,7 @@
|
|
7406
7648
|
dispatch:
|
7407
7649
|
CPU: masked_scatter__cpu
|
7408
7650
|
CUDA: masked_scatter__cuda
|
7651
|
+
MPS: masked_scatter__mps
|
7409
7652
|
autogen: masked_scatter.out
|
7410
7653
|
|
7411
7654
|
- func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor
|
@@ -7503,6 +7746,7 @@
|
|
7503
7746
|
dispatch:
|
7504
7747
|
CPU: index_fill_
|
7505
7748
|
CUDA: index_fill_
|
7749
|
+
MPS: index_fill_mps_
|
7506
7750
|
autogen: index_fill.int_Scalar_out
|
7507
7751
|
|
7508
7752
|
- func: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
|
@@ -7516,6 +7760,7 @@
|
|
7516
7760
|
variants: method
|
7517
7761
|
dispatch:
|
7518
7762
|
CPU, CUDA: index_fill_
|
7763
|
+
MPS: index_fill_mps_
|
7519
7764
|
autogen: index_fill.int_Tensor_out
|
7520
7765
|
|
7521
7766
|
- func: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor
|
@@ -7543,6 +7788,7 @@
|
|
7543
7788
|
- func: scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor
|
7544
7789
|
structured_delegate: scatter.src_out
|
7545
7790
|
variants: function, method
|
7791
|
+
tags: core
|
7546
7792
|
|
7547
7793
|
- func: scatter_.src(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!)
|
7548
7794
|
structured_delegate: scatter.src_out
|
@@ -7558,6 +7804,7 @@
|
|
7558
7804
|
- func: scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
|
7559
7805
|
structured_delegate: scatter.value_out
|
7560
7806
|
variants: function, method
|
7807
|
+
tags: core
|
7561
7808
|
|
7562
7809
|
- func: scatter_.value(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)
|
7563
7810
|
structured_delegate: scatter.value_out
|
@@ -7657,6 +7904,7 @@
|
|
7657
7904
|
variants: function
|
7658
7905
|
dispatch:
|
7659
7906
|
CPU, CUDA: bitwise_and_out
|
7907
|
+
MPS: bitwise_and_out_mps
|
7660
7908
|
tags: pointwise
|
7661
7909
|
|
7662
7910
|
- func: bitwise_and.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -7671,7 +7919,7 @@
|
|
7671
7919
|
variants: method, function
|
7672
7920
|
dispatch:
|
7673
7921
|
CompositeExplicitAutograd: bitwise_and
|
7674
|
-
tags: pointwise
|
7922
|
+
tags: [core, pointwise]
|
7675
7923
|
|
7676
7924
|
- func: bitwise_and.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
7677
7925
|
device_check: NoCheck # TensorIterator
|
@@ -7721,6 +7969,7 @@
|
|
7721
7969
|
variants: function
|
7722
7970
|
dispatch:
|
7723
7971
|
CPU, CUDA: bitwise_or_out
|
7972
|
+
MPS: bitwise_or_out_mps
|
7724
7973
|
tags: pointwise
|
7725
7974
|
|
7726
7975
|
- func: bitwise_or.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -7733,7 +7982,7 @@
|
|
7733
7982
|
- func: bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor
|
7734
7983
|
device_check: NoCheck # TensorIterator
|
7735
7984
|
variants: method, function
|
7736
|
-
tags: pointwise
|
7985
|
+
tags: [core, pointwise]
|
7737
7986
|
|
7738
7987
|
- func: bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
7739
7988
|
device_check: NoCheck # TensorIterator
|
@@ -7783,6 +8032,7 @@
|
|
7783
8032
|
variants: function
|
7784
8033
|
dispatch:
|
7785
8034
|
CPU, CUDA: bitwise_xor_out
|
8035
|
+
MPS: bitwise_xor_out_mps
|
7786
8036
|
tags: pointwise
|
7787
8037
|
|
7788
8038
|
- func: bitwise_xor.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -7795,7 +8045,7 @@
|
|
7795
8045
|
- func: bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor
|
7796
8046
|
device_check: NoCheck # TensorIterator
|
7797
8047
|
variants: method, function
|
7798
|
-
tags: pointwise
|
8048
|
+
tags: [core, pointwise]
|
7799
8049
|
|
7800
8050
|
- func: bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
7801
8051
|
device_check: NoCheck # TensorIterator
|
@@ -8067,6 +8317,7 @@
|
|
8067
8317
|
variants: method
|
8068
8318
|
dispatch:
|
8069
8319
|
CPU, CUDA: random_
|
8320
|
+
MPS: random_mps_
|
8070
8321
|
Meta: random_meta_
|
8071
8322
|
autogen: random, random.out
|
8072
8323
|
|
@@ -8164,7 +8415,7 @@
|
|
8164
8415
|
dispatch:
|
8165
8416
|
CPU: trace_cpu
|
8166
8417
|
CUDA: trace_cuda
|
8167
|
-
MPS:
|
8418
|
+
MPS: trace_mps
|
8168
8419
|
autogen: trace.out
|
8169
8420
|
|
8170
8421
|
- func: trace_backward(Tensor grad, SymInt[] sizes) -> Tensor
|
@@ -8604,6 +8855,15 @@
|
|
8604
8855
|
MPS: nonzero_mps
|
8605
8856
|
tags: [dynamic_output_shape, core]
|
8606
8857
|
|
8858
|
+
- func: nonzero_static.out(Tensor self, *, int size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)
|
8859
|
+
dispatch:
|
8860
|
+
CPU: nonzero_static_out_cpu
|
8861
|
+
|
8862
|
+
- func: nonzero_static(Tensor self, *, int size, int fill_value=-1) -> Tensor
|
8863
|
+
variants: method, function
|
8864
|
+
dispatch:
|
8865
|
+
CPU: nonzero_static_cpu
|
8866
|
+
|
8607
8867
|
- func: nonzero_numpy(Tensor self) -> Tensor[]
|
8608
8868
|
variants: method, function
|
8609
8869
|
|
@@ -8710,8 +8970,10 @@
|
|
8710
8970
|
CPU, CUDA: linalg_solve_triangular
|
8711
8971
|
MPS: linalg_solve_triangular_mps
|
8712
8972
|
|
8713
|
-
- func: linalg_vander(Tensor x, *,
|
8973
|
+
- func: linalg_vander(Tensor x, *, SymInt? N=None) -> Tensor
|
8714
8974
|
python_module: linalg
|
8975
|
+
dispatch:
|
8976
|
+
CompositeImplicitAutograd: linalg_vander_symint
|
8715
8977
|
|
8716
8978
|
- func: svd.U(Tensor self, bool some=True, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) V)
|
8717
8979
|
|
@@ -8917,6 +9179,7 @@
|
|
8917
9179
|
structured_inherits: TensorIteratorBase
|
8918
9180
|
dispatch:
|
8919
9181
|
CPU, CUDA: erfinv_out
|
9182
|
+
MPS: erfinv_out_mps
|
8920
9183
|
SparseCPU, SparseCUDA: erfinv_sparse_out
|
8921
9184
|
SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_out
|
8922
9185
|
tags: pointwise
|
@@ -8999,7 +9262,7 @@
|
|
8999
9262
|
structured_inherits: TensorIteratorBase
|
9000
9263
|
dispatch:
|
9001
9264
|
CPU, CUDA: atan2_out
|
9002
|
-
MPS:
|
9265
|
+
MPS: atan2_out_mps
|
9003
9266
|
tags: pointwise
|
9004
9267
|
|
9005
9268
|
- func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -9030,6 +9293,7 @@
|
|
9030
9293
|
structured_inherits: TensorIteratorBase
|
9031
9294
|
dispatch:
|
9032
9295
|
CPU, CUDA: lerp_Scalar
|
9296
|
+
MPS: lerp_Scalar_mps
|
9033
9297
|
tags: pointwise
|
9034
9298
|
|
9035
9299
|
- func: lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9038,6 +9302,7 @@
|
|
9038
9302
|
structured_inherits: TensorIteratorBase
|
9039
9303
|
dispatch:
|
9040
9304
|
CPU, CUDA: lerp_Tensor
|
9305
|
+
MPS: lerp_Tensor_mps
|
9041
9306
|
tags: pointwise
|
9042
9307
|
|
9043
9308
|
- func: lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor
|
@@ -9054,46 +9319,46 @@
|
|
9054
9319
|
|
9055
9320
|
- func: histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!)
|
9056
9321
|
dispatch:
|
9057
|
-
CPU:
|
9322
|
+
CPU, MPS: histogram_histc_out
|
9058
9323
|
CUDA: _histc_out_cuda
|
9059
9324
|
|
9060
9325
|
- func: histc(Tensor self, int bins=100, Scalar min=0, Scalar max=0) -> Tensor
|
9061
9326
|
variants: method, function
|
9062
9327
|
dispatch:
|
9063
|
-
CPU:
|
9328
|
+
CPU, MPS: histogram_histc
|
9064
9329
|
CUDA: _histc_cuda
|
9065
9330
|
|
9066
9331
|
- func: histogram.bins_tensor_out(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False, Tensor(a!) hist, Tensor(b!) bin_edges) -> (Tensor(a!) hist, Tensor(b!) bin_edges)
|
9067
9332
|
dispatch:
|
9068
|
-
CPU:
|
9333
|
+
CPU, MPS: histogram_out
|
9069
9334
|
|
9070
9335
|
- func: histogram.bins_tensor(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges)
|
9071
9336
|
variants: method, function
|
9072
9337
|
dispatch:
|
9073
|
-
CPU:
|
9338
|
+
CPU, MPS: histogram
|
9074
9339
|
|
9075
9340
|
- func: histogram.bin_ct_out(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False, Tensor(a!) hist, Tensor(b!) bin_edges) -> (Tensor(a!) hist, Tensor(b!) bin_edges)
|
9076
9341
|
dispatch:
|
9077
|
-
CPU:
|
9342
|
+
CPU, MPS: histogram_out
|
9078
9343
|
|
9079
9344
|
- func: histogram.bin_ct(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges)
|
9080
9345
|
variants: method, function
|
9081
9346
|
dispatch:
|
9082
|
-
CPU:
|
9347
|
+
CPU, MPS: histogram
|
9083
9348
|
|
9084
9349
|
- func: _histogramdd_bin_edges(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor[]
|
9085
9350
|
dispatch:
|
9086
|
-
CPU:
|
9351
|
+
CPU, MPS: histogramdd_bin_edges
|
9087
9352
|
autogen: _histogramdd_bin_edges.out
|
9088
9353
|
|
9089
9354
|
- func: _histogramdd_from_bin_cts(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor
|
9090
9355
|
dispatch:
|
9091
|
-
CPU:
|
9356
|
+
CPU, MPS: _histogramdd
|
9092
9357
|
autogen: _histogramdd_from_bin_cts.out
|
9093
9358
|
|
9094
9359
|
- func: _histogramdd_from_bin_tensors(Tensor self, Tensor[] bins, *, Tensor? weight=None, bool density=False) -> Tensor
|
9095
9360
|
dispatch:
|
9096
|
-
CPU:
|
9361
|
+
CPU, MPS: _histogramdd
|
9097
9362
|
autogen: _histogramdd_from_bin_tensors.out
|
9098
9363
|
|
9099
9364
|
- func: histogramdd(Tensor self, int[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)
|
@@ -9113,7 +9378,7 @@
|
|
9113
9378
|
variants: method, function
|
9114
9379
|
dispatch:
|
9115
9380
|
CompositeExplicitAutograd: fmod
|
9116
|
-
tags: pointwise
|
9381
|
+
tags: [core, pointwise]
|
9117
9382
|
|
9118
9383
|
- func: fmod_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
9119
9384
|
device_check: NoCheck # TensorIterator
|
@@ -9148,6 +9413,7 @@
|
|
9148
9413
|
structured_inherits: TensorIteratorBase
|
9149
9414
|
dispatch:
|
9150
9415
|
CPU, CUDA: hypot_out
|
9416
|
+
MPS: hypot_out_mps
|
9151
9417
|
tags: pointwise
|
9152
9418
|
|
9153
9419
|
- func: hypot(Tensor self, Tensor other) -> Tensor
|
@@ -9220,7 +9486,7 @@
|
|
9220
9486
|
variants: method, function
|
9221
9487
|
dispatch:
|
9222
9488
|
CompositeExplicitAutograd: remainder
|
9223
|
-
tags: pointwise
|
9489
|
+
tags: [core, pointwise]
|
9224
9490
|
|
9225
9491
|
- func: remainder_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
9226
9492
|
variants: method
|
@@ -9265,12 +9531,11 @@
|
|
9265
9531
|
MPS: min_mps
|
9266
9532
|
QuantizedCPU: min_quantized_cpu
|
9267
9533
|
|
9268
|
-
|
9269
|
-
|
9270
|
-
|
9271
|
-
|
9272
|
-
|
9273
|
-
# CompositeExplicitAutograd: min_unary_out
|
9534
|
+
- func: min.unary_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
9535
|
+
device_check: NoCheck # TensorIterator
|
9536
|
+
dispatch:
|
9537
|
+
CPU, CUDA: min_unary_out
|
9538
|
+
QuantizedCPU: min_quantized_unary_out
|
9274
9539
|
|
9275
9540
|
- func: fmin(Tensor self, Tensor other) -> Tensor
|
9276
9541
|
structured_delegate: fmin.out
|
@@ -9283,7 +9548,7 @@
|
|
9283
9548
|
structured_inherits: TensorIteratorBase
|
9284
9549
|
device_check: NoCheck # TensorIterator
|
9285
9550
|
dispatch:
|
9286
|
-
CPU, CUDA: fmin_out
|
9551
|
+
CPU, CUDA, MPS: fmin_out
|
9287
9552
|
tags: pointwise
|
9288
9553
|
|
9289
9554
|
- func: max(Tensor self) -> Tensor
|
@@ -9305,7 +9570,7 @@
|
|
9305
9570
|
structured_inherits: TensorIteratorBase
|
9306
9571
|
device_check: NoCheck # TensorIterator
|
9307
9572
|
dispatch:
|
9308
|
-
CPU, CUDA: fmax_out
|
9573
|
+
CPU, CUDA, MPS: fmax_out
|
9309
9574
|
tags: pointwise
|
9310
9575
|
|
9311
9576
|
- func: maximum(Tensor self, Tensor other) -> Tensor
|
@@ -9402,6 +9667,7 @@
|
|
9402
9667
|
variants: method, function
|
9403
9668
|
dispatch:
|
9404
9669
|
CompositeExplicitAutograd: sort
|
9670
|
+
tags: core
|
9405
9671
|
|
9406
9672
|
- func: sort.stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)
|
9407
9673
|
structured_delegate: sort.values_stable
|
@@ -9438,14 +9704,14 @@
|
|
9438
9704
|
- func: argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor
|
9439
9705
|
variants: method, function
|
9440
9706
|
|
9441
|
-
- func: topk.values(Tensor self,
|
9707
|
+
- func: topk.values(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
9442
9708
|
structured: True
|
9443
9709
|
dispatch:
|
9444
9710
|
CPU: topk_out_cpu
|
9445
9711
|
CUDA: topk_out_cuda
|
9446
9712
|
MPS: topk_out_mps
|
9447
9713
|
|
9448
|
-
- func: topk(Tensor self,
|
9714
|
+
- func: topk(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices)
|
9449
9715
|
variants: method, function
|
9450
9716
|
structured_delegate: topk.values
|
9451
9717
|
dispatch:
|
@@ -9470,6 +9736,7 @@
|
|
9470
9736
|
variants: method, function
|
9471
9737
|
dispatch:
|
9472
9738
|
SparseCPU, SparseCUDA: any_sparse
|
9739
|
+
tags: core
|
9473
9740
|
|
9474
9741
|
- func: any.all_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
9475
9742
|
device_check: NoCheck
|
@@ -9483,6 +9750,7 @@
|
|
9483
9750
|
structured: True
|
9484
9751
|
dispatch:
|
9485
9752
|
CPU, CUDA: renorm_out
|
9753
|
+
MPS: renorm_out_mps
|
9486
9754
|
|
9487
9755
|
- func: renorm(Tensor self, Scalar p, int dim, Scalar maxnorm) -> Tensor
|
9488
9756
|
device_check: NoCheck # TensorIterator
|
@@ -9537,6 +9805,7 @@
|
|
9537
9805
|
structured: True
|
9538
9806
|
dispatch:
|
9539
9807
|
CPU, CUDA: pow_Scalar_out
|
9808
|
+
MPS: pow_Scalar_out_mps
|
9540
9809
|
tags: pointwise
|
9541
9810
|
|
9542
9811
|
- func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor
|
@@ -9611,6 +9880,7 @@
|
|
9611
9880
|
MPS: normal_mps_
|
9612
9881
|
Meta: normal_meta_
|
9613
9882
|
SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
|
9883
|
+
NestedTensorCPU, NestedTensorCUDA: normal_nested_
|
9614
9884
|
autogen: normal.out
|
9615
9885
|
|
9616
9886
|
# Only used by the functionalization pass.
|
@@ -9720,156 +9990,155 @@
|
|
9720
9990
|
CUDA: foreach_tensor_add_scalar_kernel_cuda_
|
9721
9991
|
autogen: _foreach_add.Scalar_out
|
9722
9992
|
|
9723
|
-
- func:
|
9993
|
+
- func: _foreach_add.List(Tensor[] self, Tensor[] other, *, Scalar alpha=1) -> Tensor[]
|
9724
9994
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9725
9995
|
variants: function
|
9726
9996
|
dispatch:
|
9727
|
-
CPU:
|
9728
|
-
CUDA:
|
9997
|
+
CPU: foreach_tensor_add_list_kernel_slow
|
9998
|
+
CUDA: foreach_tensor_add_list_kernel_cuda
|
9729
9999
|
|
9730
|
-
- func:
|
10000
|
+
- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
9731
10001
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9732
10002
|
variants: function
|
9733
10003
|
dispatch:
|
9734
|
-
CPU:
|
9735
|
-
CUDA:
|
9736
|
-
autogen:
|
10004
|
+
CPU: foreach_tensor_add_list_kernel_slow_
|
10005
|
+
CUDA: foreach_tensor_add_list_kernel_cuda_
|
10006
|
+
autogen: _foreach_add.List_out
|
9737
10007
|
|
9738
|
-
- func:
|
10008
|
+
- func: _foreach_add.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9739
10009
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9740
10010
|
variants: function
|
9741
10011
|
dispatch:
|
9742
|
-
CPU:
|
9743
|
-
CUDA:
|
10012
|
+
CPU: foreach_tensor_add_scalarlist_kernel_slow
|
10013
|
+
CUDA: foreach_tensor_add_scalarlist_kernel_cuda
|
9744
10014
|
|
9745
|
-
- func:
|
10015
|
+
- func: _foreach_add_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9746
10016
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9747
10017
|
variants: function
|
9748
10018
|
dispatch:
|
9749
|
-
CPU:
|
9750
|
-
CUDA:
|
9751
|
-
autogen:
|
10019
|
+
CPU: foreach_tensor_add_scalarlist_kernel_slow_
|
10020
|
+
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
|
10021
|
+
autogen: _foreach_add.ScalarList_out
|
9752
10022
|
|
9753
|
-
- func:
|
10023
|
+
- func: _foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
9754
10024
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9755
10025
|
variants: function
|
9756
10026
|
dispatch:
|
9757
|
-
CPU:
|
9758
|
-
CUDA:
|
10027
|
+
CPU: foreach_tensor_sub_scalar_kernel_slow
|
10028
|
+
CUDA: foreach_tensor_sub_scalar_kernel_cuda
|
9759
10029
|
|
9760
|
-
- func:
|
10030
|
+
- func: _foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
9761
10031
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9762
10032
|
variants: function
|
9763
10033
|
dispatch:
|
9764
|
-
CPU:
|
9765
|
-
CUDA:
|
9766
|
-
autogen:
|
10034
|
+
CPU: foreach_tensor_sub_scalar_kernel_slow_
|
10035
|
+
CUDA: foreach_tensor_sub_scalar_kernel_cuda_
|
10036
|
+
autogen: _foreach_sub.Scalar_out
|
9767
10037
|
|
9768
|
-
- func:
|
10038
|
+
- func: _foreach_sub.List(Tensor[] self, Tensor[] other, *, Scalar alpha=1) -> Tensor[]
|
9769
10039
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9770
10040
|
variants: function
|
9771
10041
|
dispatch:
|
9772
|
-
CPU:
|
9773
|
-
CUDA:
|
10042
|
+
CPU: foreach_tensor_sub_list_kernel_slow
|
10043
|
+
CUDA: foreach_tensor_sub_list_kernel_cuda
|
9774
10044
|
|
9775
|
-
- func:
|
10045
|
+
- func: _foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
9776
10046
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9777
10047
|
variants: function
|
9778
10048
|
dispatch:
|
9779
|
-
CPU:
|
9780
|
-
CUDA:
|
9781
|
-
autogen:
|
10049
|
+
CPU: foreach_tensor_sub_list_kernel_slow_
|
10050
|
+
CUDA: foreach_tensor_sub_list_kernel_cuda_
|
10051
|
+
autogen: _foreach_sub.List_out
|
9782
10052
|
|
9783
|
-
- func:
|
10053
|
+
- func: _foreach_sub.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9784
10054
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9785
10055
|
variants: function
|
9786
10056
|
dispatch:
|
9787
|
-
CPU:
|
9788
|
-
CUDA:
|
10057
|
+
CPU: foreach_tensor_sub_scalarlist_kernel_slow
|
10058
|
+
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda
|
9789
10059
|
|
9790
|
-
- func:
|
10060
|
+
- func: _foreach_sub_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9791
10061
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9792
10062
|
variants: function
|
9793
10063
|
dispatch:
|
9794
|
-
CPU:
|
9795
|
-
CUDA:
|
9796
|
-
autogen:
|
10064
|
+
CPU: foreach_tensor_sub_scalarlist_kernel_slow_
|
10065
|
+
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
|
10066
|
+
autogen: _foreach_sub.ScalarList_out
|
9797
10067
|
|
9798
|
-
|
9799
|
-
- func: _foreach_maximum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
10068
|
+
- func: _foreach_mul.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
9800
10069
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9801
10070
|
variants: function
|
9802
10071
|
dispatch:
|
9803
|
-
CPU:
|
9804
|
-
CUDA:
|
10072
|
+
CPU: foreach_tensor_mul_scalar_kernel_slow
|
10073
|
+
CUDA: foreach_tensor_mul_scalar_kernel_cuda
|
9805
10074
|
|
9806
|
-
- func:
|
10075
|
+
- func: _foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
9807
10076
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9808
10077
|
variants: function
|
9809
10078
|
dispatch:
|
9810
|
-
CPU:
|
9811
|
-
CUDA:
|
9812
|
-
autogen:
|
10079
|
+
CPU: foreach_tensor_mul_scalar_kernel_slow_
|
10080
|
+
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
|
10081
|
+
autogen: _foreach_mul.Scalar_out
|
9813
10082
|
|
9814
|
-
- func:
|
10083
|
+
- func: _foreach_mul.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
9815
10084
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9816
10085
|
variants: function
|
9817
10086
|
dispatch:
|
9818
|
-
CPU:
|
9819
|
-
CUDA:
|
10087
|
+
CPU: foreach_tensor_mul_list_kernel_slow
|
10088
|
+
CUDA: foreach_tensor_mul_list_kernel_cuda
|
9820
10089
|
|
9821
|
-
- func:
|
10090
|
+
- func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
9822
10091
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9823
10092
|
variants: function
|
9824
10093
|
dispatch:
|
9825
|
-
CPU:
|
9826
|
-
CUDA:
|
9827
|
-
autogen:
|
10094
|
+
CPU: foreach_tensor_mul_list_kernel_slow_
|
10095
|
+
CUDA: foreach_tensor_mul_list_kernel_cuda_
|
10096
|
+
autogen: _foreach_mul.List_out
|
9828
10097
|
|
9829
|
-
- func:
|
10098
|
+
- func: _foreach_mul.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9830
10099
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9831
10100
|
variants: function
|
9832
10101
|
dispatch:
|
9833
|
-
CPU:
|
9834
|
-
CUDA:
|
10102
|
+
CPU: foreach_tensor_mul_scalarlist_kernel_slow
|
10103
|
+
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda
|
9835
10104
|
|
9836
|
-
- func:
|
10105
|
+
- func: _foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9837
10106
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9838
10107
|
variants: function
|
9839
10108
|
dispatch:
|
9840
|
-
CPU:
|
9841
|
-
CUDA:
|
9842
|
-
autogen:
|
10109
|
+
CPU: foreach_tensor_mul_scalarlist_kernel_slow_
|
10110
|
+
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
|
10111
|
+
autogen: _foreach_mul.ScalarList_out
|
9843
10112
|
|
9844
|
-
- func:
|
10113
|
+
- func: _foreach_mul.Tensor(Tensor[] self, Tensor other) -> Tensor[]
|
9845
10114
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9846
10115
|
variants: function
|
9847
10116
|
dispatch:
|
9848
|
-
CPU:
|
9849
|
-
CUDA:
|
10117
|
+
CPU: foreach_tensor_mul_tensor_kernel_slow
|
10118
|
+
CUDA: foreach_tensor_mul_tensor_kernel_cuda
|
9850
10119
|
|
9851
|
-
- func:
|
10120
|
+
- func: _foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
9852
10121
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9853
10122
|
variants: function
|
9854
10123
|
dispatch:
|
9855
|
-
CPU:
|
9856
|
-
CUDA:
|
9857
|
-
autogen:
|
10124
|
+
CPU: foreach_tensor_mul_tensor_kernel_slow_
|
10125
|
+
CUDA: foreach_tensor_mul_tensor_kernel_cuda_
|
10126
|
+
autogen: _foreach_mul.Tensor_out
|
9858
10127
|
|
9859
|
-
- func:
|
10128
|
+
- func: _foreach_div.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
9860
10129
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9861
10130
|
variants: function
|
9862
10131
|
dispatch:
|
9863
|
-
CPU:
|
9864
|
-
CUDA:
|
10132
|
+
CPU: foreach_tensor_div_scalar_kernel_slow
|
10133
|
+
CUDA: foreach_tensor_div_scalar_kernel_cuda
|
9865
10134
|
|
9866
|
-
- func:
|
10135
|
+
- func: _foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
9867
10136
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9868
10137
|
variants: function
|
9869
10138
|
dispatch:
|
9870
|
-
CPU:
|
9871
|
-
CUDA:
|
9872
|
-
autogen:
|
10139
|
+
CPU: foreach_tensor_div_scalar_kernel_slow_
|
10140
|
+
CUDA: foreach_tensor_div_scalar_kernel_cuda_
|
10141
|
+
autogen: _foreach_div.Scalar_out
|
9873
10142
|
|
9874
10143
|
- func: _foreach_div.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
9875
10144
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -9886,20 +10155,35 @@
|
|
9886
10155
|
CUDA: foreach_tensor_div_list_kernel_cuda_
|
9887
10156
|
autogen: _foreach_div.List_out
|
9888
10157
|
|
9889
|
-
- func:
|
10158
|
+
- func: _foreach_div.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9890
10159
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9891
10160
|
variants: function
|
9892
10161
|
dispatch:
|
9893
|
-
CPU:
|
9894
|
-
CUDA:
|
10162
|
+
CPU: foreach_tensor_div_scalarlist_kernel_slow
|
10163
|
+
CUDA: foreach_tensor_div_scalarlist_kernel_cuda
|
9895
10164
|
|
9896
|
-
- func:
|
10165
|
+
- func: _foreach_div_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9897
10166
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9898
10167
|
variants: function
|
9899
10168
|
dispatch:
|
9900
|
-
CPU:
|
9901
|
-
CUDA:
|
9902
|
-
autogen:
|
10169
|
+
CPU: foreach_tensor_div_scalarlist_kernel_slow_
|
10170
|
+
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
|
10171
|
+
autogen: _foreach_div.ScalarList_out
|
10172
|
+
|
10173
|
+
- func: _foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
10174
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10175
|
+
variants: function
|
10176
|
+
dispatch:
|
10177
|
+
CPU: foreach_tensor_clamp_max_scalar_kernel_slow
|
10178
|
+
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10179
|
+
|
10180
|
+
- func: _foreach_clamp_max_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10181
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10182
|
+
variants: function
|
10183
|
+
dispatch:
|
10184
|
+
CPU: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10185
|
+
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10186
|
+
autogen: _foreach_clamp_max.Scalar_out
|
9903
10187
|
|
9904
10188
|
- func: _foreach_clamp_max.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
9905
10189
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -9916,143 +10200,143 @@
|
|
9916
10200
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
9917
10201
|
autogen: _foreach_clamp_max.List_out
|
9918
10202
|
|
9919
|
-
|
9920
|
-
- func: _foreach_maximum.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
10203
|
+
- func: _foreach_clamp_max.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9921
10204
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9922
10205
|
variants: function
|
9923
10206
|
dispatch:
|
9924
|
-
CPU:
|
9925
|
-
CUDA:
|
10207
|
+
CPU: foreach_tensor_clamp_max_scalarlist_kernel_slow
|
10208
|
+
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda
|
9926
10209
|
|
9927
|
-
- func:
|
10210
|
+
- func: _foreach_clamp_max_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9928
10211
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9929
10212
|
variants: function
|
9930
10213
|
dispatch:
|
9931
|
-
CPU:
|
9932
|
-
CUDA:
|
9933
|
-
autogen:
|
10214
|
+
CPU: foreach_tensor_clamp_max_scalarlist_kernel_slow_
|
10215
|
+
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10216
|
+
autogen: _foreach_clamp_max.ScalarList_out
|
9934
10217
|
|
9935
|
-
- func:
|
10218
|
+
- func: _foreach_clamp_min.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
9936
10219
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9937
10220
|
variants: function
|
9938
10221
|
dispatch:
|
9939
|
-
CPU:
|
9940
|
-
CUDA:
|
10222
|
+
CPU: foreach_tensor_clamp_min_scalar_kernel_slow
|
10223
|
+
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
9941
10224
|
|
9942
|
-
- func:
|
10225
|
+
- func: _foreach_clamp_min_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
9943
10226
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9944
10227
|
variants: function
|
9945
10228
|
dispatch:
|
9946
|
-
CPU:
|
9947
|
-
CUDA:
|
9948
|
-
autogen:
|
9949
|
-
|
10229
|
+
CPU: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10230
|
+
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10231
|
+
autogen: _foreach_clamp_min.Scalar_out
|
9950
10232
|
|
9951
|
-
- func:
|
10233
|
+
- func: _foreach_clamp_min.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
9952
10234
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9953
10235
|
variants: function
|
9954
10236
|
dispatch:
|
9955
|
-
CPU:
|
9956
|
-
CUDA:
|
10237
|
+
CPU: foreach_tensor_clamp_min_list_kernel_slow
|
10238
|
+
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
9957
10239
|
|
9958
|
-
- func:
|
10240
|
+
- func: _foreach_clamp_min_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
9959
10241
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9960
10242
|
variants: function
|
9961
10243
|
dispatch:
|
9962
|
-
CPU:
|
9963
|
-
CUDA:
|
9964
|
-
autogen:
|
10244
|
+
CPU: foreach_tensor_clamp_min_list_kernel_slow_
|
10245
|
+
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10246
|
+
autogen: _foreach_clamp_min.List_out
|
9965
10247
|
|
9966
|
-
- func:
|
10248
|
+
- func: _foreach_clamp_min.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9967
10249
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9968
10250
|
variants: function
|
9969
10251
|
dispatch:
|
9970
|
-
CPU:
|
9971
|
-
CUDA:
|
10252
|
+
CPU: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10253
|
+
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
9972
10254
|
|
9973
|
-
- func:
|
10255
|
+
- func: _foreach_clamp_min_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9974
10256
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9975
10257
|
variants: function
|
9976
10258
|
dispatch:
|
9977
|
-
CPU:
|
9978
|
-
CUDA:
|
9979
|
-
autogen:
|
10259
|
+
CPU: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10260
|
+
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10261
|
+
autogen: _foreach_clamp_min.ScalarList_out
|
9980
10262
|
|
9981
|
-
|
10263
|
+
# foreach_minimum/maximum dispatches to clamp_max/min
|
10264
|
+
- func: _foreach_maximum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
9982
10265
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9983
10266
|
variants: function
|
9984
10267
|
dispatch:
|
9985
|
-
CPU:
|
9986
|
-
CUDA:
|
10268
|
+
CPU: foreach_tensor_clamp_min_scalar_kernel_slow
|
10269
|
+
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
9987
10270
|
|
9988
|
-
- func:
|
10271
|
+
- func: _foreach_maximum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
9989
10272
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9990
10273
|
variants: function
|
9991
10274
|
dispatch:
|
9992
|
-
CPU:
|
9993
|
-
CUDA:
|
9994
|
-
autogen:
|
10275
|
+
CPU: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10276
|
+
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10277
|
+
autogen: _foreach_maximum.Scalar_out
|
9995
10278
|
|
9996
|
-
|
10279
|
+
# foreach_minimum/maximum dispatches to clamp_max/min
|
10280
|
+
- func: _foreach_maximum.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
9997
10281
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9998
10282
|
variants: function
|
9999
10283
|
dispatch:
|
10000
|
-
CPU:
|
10001
|
-
CUDA:
|
10284
|
+
CPU: foreach_tensor_clamp_min_list_kernel_slow
|
10285
|
+
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
10002
10286
|
|
10003
|
-
- func:
|
10287
|
+
- func: _foreach_maximum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10004
10288
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10005
10289
|
variants: function
|
10006
10290
|
dispatch:
|
10007
|
-
CPU:
|
10008
|
-
CUDA:
|
10009
|
-
autogen:
|
10291
|
+
CPU: foreach_tensor_clamp_min_list_kernel_slow_
|
10292
|
+
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10293
|
+
autogen: _foreach_maximum.List_out
|
10010
10294
|
|
10011
|
-
|
10295
|
+
# foreach_minimum/maximum dispatches to clamp_max/min
|
10296
|
+
- func: _foreach_maximum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
10012
10297
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10013
10298
|
variants: function
|
10014
10299
|
dispatch:
|
10015
10300
|
CPU: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10016
10301
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
10017
10302
|
|
10018
|
-
- func:
|
10303
|
+
- func: _foreach_maximum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10019
10304
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10020
10305
|
variants: function
|
10021
10306
|
dispatch:
|
10022
10307
|
CPU: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10023
10308
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10024
|
-
autogen:
|
10309
|
+
autogen: _foreach_maximum.ScalarList_out
|
10025
10310
|
|
10026
|
-
- func:
|
10311
|
+
- func: _foreach_minimum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
10027
10312
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10028
10313
|
variants: function
|
10029
10314
|
dispatch:
|
10030
|
-
CPU:
|
10031
|
-
CUDA:
|
10315
|
+
CPU: foreach_tensor_clamp_max_scalar_kernel_slow
|
10316
|
+
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10032
10317
|
|
10033
|
-
- func:
|
10318
|
+
- func: _foreach_minimum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10034
10319
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10035
10320
|
variants: function
|
10036
10321
|
dispatch:
|
10037
|
-
CPU:
|
10038
|
-
CUDA:
|
10039
|
-
autogen:
|
10322
|
+
CPU: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10323
|
+
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10324
|
+
autogen: _foreach_minimum.Scalar_out
|
10040
10325
|
|
10041
|
-
|
10042
|
-
- func: _foreach_maximum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
10326
|
+
- func: _foreach_minimum.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
10043
10327
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10044
10328
|
variants: function
|
10045
10329
|
dispatch:
|
10046
|
-
CPU:
|
10047
|
-
CUDA:
|
10330
|
+
CPU: foreach_tensor_clamp_max_list_kernel_slow
|
10331
|
+
CUDA: foreach_tensor_clamp_max_list_kernel_cuda
|
10048
10332
|
|
10049
|
-
- func:
|
10333
|
+
- func: _foreach_minimum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10050
10334
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10051
10335
|
variants: function
|
10052
10336
|
dispatch:
|
10053
|
-
CPU:
|
10054
|
-
CUDA:
|
10055
|
-
autogen:
|
10337
|
+
CPU: foreach_tensor_clamp_max_list_kernel_slow_
|
10338
|
+
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
10339
|
+
autogen: _foreach_minimum.List_out
|
10056
10340
|
|
10057
10341
|
- func: _foreach_minimum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
10058
10342
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -10069,43 +10353,95 @@
|
|
10069
10353
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10070
10354
|
autogen: _foreach_minimum.ScalarList_out
|
10071
10355
|
|
10072
|
-
- func:
|
10356
|
+
- func: _foreach_addcdiv.Scalar(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
|
10073
10357
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10074
10358
|
variants: function
|
10075
10359
|
dispatch:
|
10076
|
-
CPU:
|
10077
|
-
CUDA:
|
10360
|
+
CPU: foreach_tensor_addcdiv_scalar_slow
|
10361
|
+
CUDA: foreach_tensor_addcdiv_scalar_cuda
|
10078
10362
|
|
10079
|
-
- func:
|
10363
|
+
- func: _foreach_addcdiv.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10080
10364
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10081
10365
|
variants: function
|
10082
10366
|
dispatch:
|
10083
|
-
CPU:
|
10084
|
-
CUDA:
|
10085
|
-
autogen: _foreach_zero, _foreach_zero.out
|
10367
|
+
CPU: foreach_tensor_addcdiv_scalarlist_slow
|
10368
|
+
CUDA: foreach_tensor_addcdiv_scalarlist_cuda
|
10086
10369
|
|
10087
|
-
- func:
|
10370
|
+
- func: _foreach_addcdiv.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10088
10371
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10089
10372
|
variants: function
|
10090
10373
|
dispatch:
|
10091
|
-
CPU:
|
10092
|
-
CUDA:
|
10093
|
-
autogen: _foreach_exp.out
|
10374
|
+
CPU: foreach_tensor_addcdiv_tensor_slow
|
10375
|
+
CUDA: foreach_tensor_addcdiv_tensor_cuda
|
10094
10376
|
|
10095
|
-
- func:
|
10377
|
+
- func: _foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10096
10378
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10097
10379
|
variants: function
|
10098
10380
|
dispatch:
|
10099
|
-
CPU:
|
10100
|
-
CUDA:
|
10381
|
+
CPU: foreach_tensor_addcdiv_scalar_slow_
|
10382
|
+
CUDA: foreach_tensor_addcdiv_scalar_cuda_
|
10383
|
+
autogen: _foreach_addcdiv.Scalar_out
|
10101
10384
|
|
10102
|
-
- func:
|
10385
|
+
- func: _foreach_addcdiv_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
|
10103
10386
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10104
10387
|
variants: function
|
10105
10388
|
dispatch:
|
10106
|
-
CPU:
|
10107
|
-
CUDA:
|
10108
|
-
autogen:
|
10389
|
+
CPU: foreach_tensor_addcdiv_scalarlist_slow_
|
10390
|
+
CUDA: foreach_tensor_addcdiv_scalarlist_cuda_
|
10391
|
+
autogen: _foreach_addcdiv.ScalarList_out
|
10392
|
+
|
10393
|
+
- func: _foreach_addcdiv_.Tensor(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> ()
|
10394
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10395
|
+
variants: function
|
10396
|
+
dispatch:
|
10397
|
+
CPU: foreach_tensor_addcdiv_tensor_slow_
|
10398
|
+
CUDA: foreach_tensor_addcdiv_tensor_cuda_
|
10399
|
+
autogen: _foreach_addcdiv.Tensor_out
|
10400
|
+
|
10401
|
+
- func: _foreach_addcmul.Scalar(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
|
10402
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10403
|
+
variants: function
|
10404
|
+
dispatch:
|
10405
|
+
CPU: foreach_tensor_addcmul_scalar_slow
|
10406
|
+
CUDA: foreach_tensor_addcmul_scalar_cuda
|
10407
|
+
|
10408
|
+
- func: _foreach_addcmul.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10409
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10410
|
+
variants: function
|
10411
|
+
dispatch:
|
10412
|
+
CPU: foreach_tensor_addcmul_scalarlist_slow
|
10413
|
+
CUDA: foreach_tensor_addcmul_scalarlist_cuda
|
10414
|
+
|
10415
|
+
- func: _foreach_addcmul.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10416
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10417
|
+
variants: function
|
10418
|
+
dispatch:
|
10419
|
+
CPU: foreach_tensor_addcmul_tensor_slow
|
10420
|
+
CUDA: foreach_tensor_addcmul_tensor_cuda
|
10421
|
+
|
10422
|
+
- func: _foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10423
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10424
|
+
variants: function
|
10425
|
+
dispatch:
|
10426
|
+
CPU: foreach_tensor_addcmul_scalar_slow_
|
10427
|
+
CUDA: foreach_tensor_addcmul_scalar_cuda_
|
10428
|
+
autogen: _foreach_addcmul.Scalar_out
|
10429
|
+
|
10430
|
+
- func: _foreach_addcmul_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
|
10431
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10432
|
+
variants: function
|
10433
|
+
dispatch:
|
10434
|
+
CPU: foreach_tensor_addcmul_scalarlist_slow_
|
10435
|
+
CUDA: foreach_tensor_addcmul_scalarlist_cuda_
|
10436
|
+
autogen: _foreach_addcmul.ScalarList_out
|
10437
|
+
|
10438
|
+
- func: _foreach_addcmul_.Tensor(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> ()
|
10439
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10440
|
+
variants: function
|
10441
|
+
dispatch:
|
10442
|
+
CPU: foreach_tensor_addcmul_tensor_slow_
|
10443
|
+
CUDA: foreach_tensor_addcmul_tensor_cuda_
|
10444
|
+
autogen: _foreach_addcmul.Tensor_out
|
10109
10445
|
|
10110
10446
|
- func: _foreach_abs(Tensor[] self) -> Tensor[]
|
10111
10447
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -10242,6 +10578,21 @@
|
|
10242
10578
|
CUDA: foreach_tensor_erfc_cuda_
|
10243
10579
|
autogen: _foreach_erfc.out
|
10244
10580
|
|
10581
|
+
- func: _foreach_exp(Tensor[] self) -> Tensor[]
|
10582
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10583
|
+
variants: function
|
10584
|
+
dispatch:
|
10585
|
+
CPU: foreach_tensor_exp_slow
|
10586
|
+
CUDA: foreach_tensor_exp_cuda
|
10587
|
+
|
10588
|
+
- func: _foreach_exp_(Tensor(a!)[] self) -> ()
|
10589
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10590
|
+
variants: function
|
10591
|
+
dispatch:
|
10592
|
+
CPU: foreach_tensor_exp_slow_
|
10593
|
+
CUDA: foreach_tensor_exp_cuda_
|
10594
|
+
autogen: _foreach_exp.out
|
10595
|
+
|
10245
10596
|
- func: _foreach_expm1(Tensor[] self) -> Tensor[]
|
10246
10597
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10247
10598
|
variants: function
|
@@ -10272,6 +10623,68 @@
|
|
10272
10623
|
CUDA: foreach_tensor_floor_cuda_
|
10273
10624
|
autogen: _foreach_floor.out
|
10274
10625
|
|
10626
|
+
- func: _foreach_frac(Tensor[] self) -> Tensor[]
|
10627
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10628
|
+
variants: function
|
10629
|
+
dispatch:
|
10630
|
+
CPU: foreach_tensor_frac_slow
|
10631
|
+
CUDA: foreach_tensor_frac_cuda
|
10632
|
+
|
10633
|
+
- func: _foreach_frac_(Tensor(a!)[] self) -> ()
|
10634
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10635
|
+
variants: function
|
10636
|
+
dispatch:
|
10637
|
+
CPU: foreach_tensor_frac_slow_
|
10638
|
+
CUDA: foreach_tensor_frac_cuda_
|
10639
|
+
autogen: _foreach_frac.out
|
10640
|
+
|
10641
|
+
- func: _foreach_lerp.List(Tensor[] self, Tensor[] tensors1, Tensor[] weights) -> Tensor[]
|
10642
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10643
|
+
variants: function
|
10644
|
+
dispatch:
|
10645
|
+
CPU: foreach_tensor_ternary_lerp_slow
|
10646
|
+
CUDA: foreach_tensor_lerp_ternary_cuda
|
10647
|
+
autogen: _foreach_lerp.List_out
|
10648
|
+
|
10649
|
+
- func: _foreach_lerp_.List(Tensor(a!)[] self, Tensor[] tensors1, Tensor[] weights) -> ()
|
10650
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10651
|
+
variants: function
|
10652
|
+
dispatch:
|
10653
|
+
CPU: foreach_tensor_ternary_lerp_slow_
|
10654
|
+
CUDA: foreach_tensor_lerp_ternary_cuda_
|
10655
|
+
autogen: _foreach_lerp.List_out
|
10656
|
+
|
10657
|
+
- func: _foreach_lerp.Scalar(Tensor[] self, Tensor[] tensors1, Scalar weight) -> Tensor[]
|
10658
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10659
|
+
variants: function
|
10660
|
+
dispatch:
|
10661
|
+
CPU: foreach_tensor_lerp_list_kernel_slow
|
10662
|
+
CUDA: foreach_tensor_lerp_list_cuda
|
10663
|
+
autogen: _foreach_lerp.Scalar_out
|
10664
|
+
|
10665
|
+
- func: _foreach_lerp_.Scalar(Tensor(a!)[] self, Tensor[] tensors1, Scalar weight) -> ()
|
10666
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10667
|
+
variants: function
|
10668
|
+
dispatch:
|
10669
|
+
CPU: foreach_tensor_lerp_list_kernel_slow_
|
10670
|
+
CUDA: foreach_tensor_lerp_list_cuda_
|
10671
|
+
autogen: _foreach_lerp.Scalar_out
|
10672
|
+
|
10673
|
+
- func: _foreach_lgamma(Tensor[] self) -> Tensor[]
|
10674
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10675
|
+
variants: function
|
10676
|
+
dispatch:
|
10677
|
+
CPU: foreach_tensor_lgamma_slow
|
10678
|
+
CUDA: foreach_tensor_lgamma_cuda
|
10679
|
+
|
10680
|
+
- func: _foreach_lgamma_(Tensor(a!)[] self) -> ()
|
10681
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10682
|
+
variants: function
|
10683
|
+
dispatch:
|
10684
|
+
CPU: foreach_tensor_lgamma_slow_
|
10685
|
+
CUDA: foreach_tensor_lgamma_cuda_
|
10686
|
+
autogen: _foreach_lgamma.out
|
10687
|
+
|
10275
10688
|
- func: _foreach_log(Tensor[] self) -> Tensor[]
|
10276
10689
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10277
10690
|
variants: function
|
@@ -10347,110 +10760,65 @@
|
|
10347
10760
|
CUDA: foreach_tensor_neg_cuda_
|
10348
10761
|
autogen: _foreach_neg.out
|
10349
10762
|
|
10350
|
-
- func:
|
10351
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10352
|
-
variants: function
|
10353
|
-
dispatch:
|
10354
|
-
CPU: foreach_tensor_tan_slow
|
10355
|
-
CUDA: foreach_tensor_tan_cuda
|
10356
|
-
|
10357
|
-
- func: _foreach_tan_(Tensor(a!)[] self) -> ()
|
10358
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10359
|
-
variants: function
|
10360
|
-
dispatch:
|
10361
|
-
CPU: foreach_tensor_tan_slow_
|
10362
|
-
CUDA: foreach_tensor_tan_cuda_
|
10363
|
-
autogen: _foreach_tan.out
|
10364
|
-
|
10365
|
-
- func: _foreach_tanh(Tensor[] self) -> Tensor[]
|
10366
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10367
|
-
variants: function
|
10368
|
-
dispatch:
|
10369
|
-
CPU: foreach_tensor_tanh_slow
|
10370
|
-
CUDA: foreach_tensor_tanh_cuda
|
10371
|
-
|
10372
|
-
- func: _foreach_tanh_(Tensor(a!)[] self) -> ()
|
10373
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10374
|
-
variants: function
|
10375
|
-
dispatch:
|
10376
|
-
CPU: foreach_tensor_tanh_slow_
|
10377
|
-
CUDA: foreach_tensor_tanh_cuda_
|
10378
|
-
autogen: _foreach_tanh.out
|
10379
|
-
|
10380
|
-
- func: _foreach_sin(Tensor[] self) -> Tensor[]
|
10381
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10382
|
-
variants: function
|
10383
|
-
dispatch:
|
10384
|
-
CPU: foreach_tensor_sin_slow
|
10385
|
-
CUDA: foreach_tensor_sin_cuda
|
10386
|
-
|
10387
|
-
- func: _foreach_sin_(Tensor(a!)[] self) -> ()
|
10388
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10389
|
-
variants: function
|
10390
|
-
dispatch:
|
10391
|
-
CPU: foreach_tensor_sin_slow_
|
10392
|
-
CUDA: foreach_tensor_sin_cuda_
|
10393
|
-
autogen: _foreach_sin.out
|
10394
|
-
|
10395
|
-
- func: _foreach_sinh(Tensor[] self) -> Tensor[]
|
10763
|
+
- func: _foreach_norm.Scalar(Tensor[] self, Scalar ord=2) -> Tensor[]
|
10396
10764
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10397
10765
|
variants: function
|
10398
10766
|
dispatch:
|
10399
|
-
CPU:
|
10400
|
-
CUDA:
|
10767
|
+
CPU: foreach_tensor_norm_slow
|
10768
|
+
CUDA: foreach_tensor_norm_cuda
|
10769
|
+
autogen: _foreach_norm.Scalar_out
|
10401
10770
|
|
10402
|
-
- func:
|
10771
|
+
- func: _foreach_pow.List(Tensor[] self, Tensor[] exponent) -> Tensor[]
|
10403
10772
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10404
10773
|
variants: function
|
10405
10774
|
dispatch:
|
10406
|
-
CPU:
|
10407
|
-
CUDA:
|
10408
|
-
autogen: _foreach_sinh.out
|
10775
|
+
CPU: foreach_tensor_pow_list_kernel_slow
|
10776
|
+
CUDA: foreach_tensor_pow_list_kernel_cuda
|
10409
10777
|
|
10410
|
-
- func:
|
10778
|
+
- func: _foreach_pow.Scalar(Tensor[] self, Scalar exponent) -> Tensor[]
|
10411
10779
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10412
10780
|
variants: function
|
10413
10781
|
dispatch:
|
10414
|
-
CPU:
|
10415
|
-
CUDA:
|
10782
|
+
CPU: foreach_tensor_pow_scalar_kernel_slow
|
10783
|
+
CUDA: foreach_tensor_pow_scalar_kernel_cuda
|
10416
10784
|
|
10417
|
-
- func:
|
10785
|
+
- func: _foreach_pow.ScalarList(Tensor[] self, Scalar[] exponent) -> Tensor[]
|
10418
10786
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10419
10787
|
variants: function
|
10420
10788
|
dispatch:
|
10421
|
-
CPU:
|
10422
|
-
CUDA:
|
10423
|
-
autogen: _foreach_round.out
|
10789
|
+
CPU: foreach_tensor_pow_scalarlist_kernel_slow
|
10790
|
+
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda
|
10424
10791
|
|
10425
|
-
- func:
|
10792
|
+
- func: _foreach_pow.ScalarAndTensor(Scalar self, Tensor[] exponent) -> Tensor[]
|
10426
10793
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10427
10794
|
variants: function
|
10428
10795
|
dispatch:
|
10429
|
-
CPU:
|
10430
|
-
CUDA:
|
10796
|
+
CPU: foreach_scalar_pow_list_kernel_slow
|
10797
|
+
CUDA: foreach_scalar_pow_list_kernel_cuda
|
10431
10798
|
|
10432
|
-
- func:
|
10433
|
-
device_check: NoCheck
|
10799
|
+
- func: _foreach_pow_.List(Tensor(a!)[] self, Tensor[] exponent) -> ()
|
10800
|
+
device_check: NoCheck
|
10434
10801
|
variants: function
|
10435
10802
|
dispatch:
|
10436
|
-
CPU:
|
10437
|
-
CUDA:
|
10438
|
-
autogen:
|
10803
|
+
CPU: foreach_tensor_pow_list_kernel_slow_
|
10804
|
+
CUDA: foreach_tensor_pow_list_kernel_cuda_
|
10805
|
+
autogen: _foreach_pow.List_out
|
10439
10806
|
|
10440
|
-
- func:
|
10441
|
-
device_check: NoCheck
|
10807
|
+
- func: _foreach_pow_.Scalar(Tensor(a!)[] self, Scalar exponent) -> ()
|
10808
|
+
device_check: NoCheck
|
10442
10809
|
variants: function
|
10443
10810
|
dispatch:
|
10444
|
-
CPU:
|
10445
|
-
CUDA:
|
10811
|
+
CPU: foreach_tensor_pow_scalar_kernel_slow_
|
10812
|
+
CUDA: foreach_tensor_pow_scalar_kernel_cuda_
|
10813
|
+
autogen: _foreach_pow.Scalar_out
|
10446
10814
|
|
10447
|
-
- func:
|
10448
|
-
device_check: NoCheck
|
10815
|
+
- func: _foreach_pow_.ScalarList(Tensor(a!)[] self, Scalar[] exponent) -> ()
|
10816
|
+
device_check: NoCheck
|
10449
10817
|
variants: function
|
10450
10818
|
dispatch:
|
10451
|
-
CPU:
|
10452
|
-
CUDA:
|
10453
|
-
autogen:
|
10819
|
+
CPU: foreach_tensor_pow_scalarlist_kernel_slow_
|
10820
|
+
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda_
|
10821
|
+
autogen: _foreach_pow.ScalarList_out
|
10454
10822
|
|
10455
10823
|
- func: _foreach_reciprocal(Tensor[] self) -> Tensor[]
|
10456
10824
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -10467,6 +10835,21 @@
|
|
10467
10835
|
CUDA: foreach_tensor_reciprocal_cuda_
|
10468
10836
|
autogen: _foreach_reciprocal.out
|
10469
10837
|
|
10838
|
+
- func: _foreach_round(Tensor[] self) -> Tensor[]
|
10839
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10840
|
+
variants: function
|
10841
|
+
dispatch:
|
10842
|
+
CPU: foreach_tensor_round_slow
|
10843
|
+
CUDA: foreach_tensor_round_cuda
|
10844
|
+
|
10845
|
+
- func: _foreach_round_(Tensor(a!)[] self) -> ()
|
10846
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10847
|
+
variants: function
|
10848
|
+
dispatch:
|
10849
|
+
CPU: foreach_tensor_round_slow_
|
10850
|
+
CUDA: foreach_tensor_round_cuda_
|
10851
|
+
autogen: _foreach_round.out
|
10852
|
+
|
10470
10853
|
- func: _foreach_sigmoid(Tensor[] self) -> Tensor[]
|
10471
10854
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10472
10855
|
variants: function
|
@@ -10482,150 +10865,126 @@
|
|
10482
10865
|
CUDA: foreach_tensor_sigmoid_cuda_
|
10483
10866
|
autogen: _foreach_sigmoid.out
|
10484
10867
|
|
10485
|
-
- func:
|
10868
|
+
- func: _foreach_sign(Tensor[] self) -> Tensor[]
|
10486
10869
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10487
10870
|
variants: function
|
10488
10871
|
dispatch:
|
10489
|
-
CPU:
|
10490
|
-
CUDA:
|
10872
|
+
CPU: foreach_tensor_sign_slow
|
10873
|
+
CUDA: foreach_tensor_sign_cuda
|
10491
10874
|
|
10492
|
-
- func:
|
10875
|
+
- func: _foreach_sign_(Tensor(a!)[] self) -> ()
|
10493
10876
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10494
10877
|
variants: function
|
10495
10878
|
dispatch:
|
10496
|
-
CPU:
|
10497
|
-
CUDA:
|
10498
|
-
autogen:
|
10879
|
+
CPU: foreach_tensor_sign_slow_
|
10880
|
+
CUDA: foreach_tensor_sign_cuda_
|
10881
|
+
autogen: _foreach_sign.out
|
10499
10882
|
|
10500
|
-
- func:
|
10883
|
+
- func: _foreach_sin(Tensor[] self) -> Tensor[]
|
10501
10884
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10502
10885
|
variants: function
|
10503
10886
|
dispatch:
|
10504
|
-
CPU:
|
10505
|
-
CUDA:
|
10506
|
-
autogen: _foreach_addcdiv.Scalar_out
|
10887
|
+
CPU: foreach_tensor_sin_slow
|
10888
|
+
CUDA: foreach_tensor_sin_cuda
|
10507
10889
|
|
10508
|
-
- func:
|
10890
|
+
- func: _foreach_sin_(Tensor(a!)[] self) -> ()
|
10509
10891
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10510
10892
|
variants: function
|
10511
10893
|
dispatch:
|
10512
|
-
CPU:
|
10513
|
-
CUDA:
|
10514
|
-
autogen:
|
10894
|
+
CPU: foreach_tensor_sin_slow_
|
10895
|
+
CUDA: foreach_tensor_sin_cuda_
|
10896
|
+
autogen: _foreach_sin.out
|
10515
10897
|
|
10516
|
-
- func:
|
10898
|
+
- func: _foreach_sinh(Tensor[] self) -> Tensor[]
|
10517
10899
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10518
10900
|
variants: function
|
10519
10901
|
dispatch:
|
10520
|
-
CPU:
|
10521
|
-
CUDA:
|
10522
|
-
autogen: _foreach_addcdiv.ScalarList_out
|
10902
|
+
CPU: foreach_tensor_sinh_slow
|
10903
|
+
CUDA: foreach_tensor_sinh_cuda
|
10523
10904
|
|
10524
|
-
- func:
|
10905
|
+
- func: _foreach_sinh_(Tensor(a!)[] self) -> ()
|
10525
10906
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10526
10907
|
variants: function
|
10527
10908
|
dispatch:
|
10528
|
-
CPU:
|
10529
|
-
CUDA:
|
10530
|
-
autogen:
|
10909
|
+
CPU: foreach_tensor_sinh_slow_
|
10910
|
+
CUDA: foreach_tensor_sinh_cuda_
|
10911
|
+
autogen: _foreach_sinh.out
|
10531
10912
|
|
10532
|
-
- func:
|
10913
|
+
- func: _foreach_sqrt(Tensor[] self) -> Tensor[]
|
10533
10914
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10534
10915
|
variants: function
|
10535
10916
|
dispatch:
|
10536
|
-
CPU:
|
10537
|
-
CUDA:
|
10538
|
-
autogen: _foreach_addcmul.ScalarList_out
|
10917
|
+
CPU: foreach_tensor_sqrt_slow
|
10918
|
+
CUDA: foreach_tensor_sqrt_cuda
|
10539
10919
|
|
10540
|
-
- func:
|
10920
|
+
- func: _foreach_sqrt_(Tensor(a!)[] self) -> ()
|
10541
10921
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10542
10922
|
variants: function
|
10543
10923
|
dispatch:
|
10544
|
-
CPU:
|
10545
|
-
CUDA:
|
10546
|
-
autogen:
|
10924
|
+
CPU: foreach_tensor_sqrt_slow_
|
10925
|
+
CUDA: foreach_tensor_sqrt_cuda_
|
10926
|
+
autogen: _foreach_sqrt.out
|
10547
10927
|
|
10548
|
-
- func:
|
10928
|
+
- func: _foreach_tan(Tensor[] self) -> Tensor[]
|
10549
10929
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10550
10930
|
variants: function
|
10551
10931
|
dispatch:
|
10552
|
-
CPU:
|
10553
|
-
CUDA:
|
10932
|
+
CPU: foreach_tensor_tan_slow
|
10933
|
+
CUDA: foreach_tensor_tan_cuda
|
10554
10934
|
|
10555
|
-
- func:
|
10935
|
+
- func: _foreach_tan_(Tensor(a!)[] self) -> ()
|
10556
10936
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10557
10937
|
variants: function
|
10558
10938
|
dispatch:
|
10559
|
-
CPU:
|
10560
|
-
CUDA:
|
10939
|
+
CPU: foreach_tensor_tan_slow_
|
10940
|
+
CUDA: foreach_tensor_tan_cuda_
|
10941
|
+
autogen: _foreach_tan.out
|
10561
10942
|
|
10562
|
-
- func:
|
10943
|
+
- func: _foreach_tanh(Tensor[] self) -> Tensor[]
|
10563
10944
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10564
10945
|
variants: function
|
10565
10946
|
dispatch:
|
10566
|
-
CPU:
|
10567
|
-
CUDA:
|
10947
|
+
CPU: foreach_tensor_tanh_slow
|
10948
|
+
CUDA: foreach_tensor_tanh_cuda
|
10568
10949
|
|
10569
|
-
- func:
|
10950
|
+
- func: _foreach_tanh_(Tensor(a!)[] self) -> ()
|
10570
10951
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10571
10952
|
variants: function
|
10572
10953
|
dispatch:
|
10573
|
-
CPU:
|
10574
|
-
CUDA:
|
10954
|
+
CPU: foreach_tensor_tanh_slow_
|
10955
|
+
CUDA: foreach_tensor_tanh_cuda_
|
10956
|
+
autogen: _foreach_tanh.out
|
10575
10957
|
|
10576
|
-
- func:
|
10958
|
+
- func: _foreach_trunc(Tensor[] self) -> Tensor[]
|
10577
10959
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10578
10960
|
variants: function
|
10579
10961
|
dispatch:
|
10580
|
-
CPU:
|
10581
|
-
CUDA:
|
10962
|
+
CPU: foreach_tensor_trunc_slow
|
10963
|
+
CUDA: foreach_tensor_trunc_cuda
|
10582
10964
|
|
10583
|
-
- func:
|
10965
|
+
- func: _foreach_trunc_(Tensor(a!)[] self) -> ()
|
10584
10966
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10585
10967
|
variants: function
|
10586
10968
|
dispatch:
|
10587
|
-
CPU:
|
10588
|
-
CUDA:
|
10969
|
+
CPU: foreach_tensor_trunc_slow_
|
10970
|
+
CUDA: foreach_tensor_trunc_cuda_
|
10971
|
+
autogen: _foreach_trunc.out
|
10589
10972
|
|
10590
|
-
- func:
|
10973
|
+
- func: _foreach_zero_(Tensor(a!)[] self) -> ()
|
10591
10974
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10592
10975
|
variants: function
|
10593
10976
|
dispatch:
|
10594
|
-
CPU:
|
10595
|
-
CUDA:
|
10596
|
-
autogen:
|
10597
|
-
|
10598
|
-
- func: _foreach_lerp.List(Tensor[] self, Tensor[] tensors1, Tensor[] weights) -> Tensor[]
|
10599
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10600
|
-
variants: function
|
10601
|
-
dispatch:
|
10602
|
-
CPU: foreach_tensor_ternary_lerp_slow
|
10603
|
-
CUDA: foreach_tensor_lerp_ternary_cuda
|
10604
|
-
autogen: _foreach_lerp.List_out
|
10605
|
-
|
10606
|
-
- func: _foreach_lerp_.List(Tensor(a!)[] self, Tensor[] tensors1, Tensor[] weights) -> ()
|
10607
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10608
|
-
variants: function
|
10609
|
-
dispatch:
|
10610
|
-
CPU: foreach_tensor_ternary_lerp_slow_
|
10611
|
-
CUDA: foreach_tensor_lerp_ternary_cuda_
|
10612
|
-
autogen: _foreach_lerp.List_out
|
10613
|
-
|
10614
|
-
- func: _foreach_lerp.Scalar(Tensor[] self, Tensor[] tensors1, Scalar weight) -> Tensor[]
|
10615
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10616
|
-
variants: function
|
10617
|
-
dispatch:
|
10618
|
-
CPU: foreach_tensor_lerp_list_kernel_slow
|
10619
|
-
CUDA: foreach_tensor_lerp_list_cuda
|
10620
|
-
autogen: _foreach_lerp.Scalar_out
|
10977
|
+
CPU: foreach_tensor_zero_slow_
|
10978
|
+
CUDA: foreach_tensor_zero_cuda_
|
10979
|
+
autogen: _foreach_zero, _foreach_zero.out
|
10621
10980
|
|
10622
|
-
- func:
|
10623
|
-
device_check: NoCheck # foreach kernels fall back to slow path when
|
10981
|
+
- func: _foreach_copy_(Tensor(a!)[] self, Tensor[] src, bool non_blocking=False) -> ()
|
10982
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10624
10983
|
variants: function
|
10625
10984
|
dispatch:
|
10626
|
-
CPU:
|
10627
|
-
CUDA:
|
10628
|
-
autogen:
|
10985
|
+
CPU: foreach_tensor_copy_list_kernel_slow_
|
10986
|
+
CUDA: foreach_tensor_copy_list_kernel_cuda_
|
10987
|
+
autogen: _foreach_copy, _foreach_copy.out
|
10629
10988
|
|
10630
10989
|
- func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
|
10631
10990
|
dispatch:
|
@@ -10657,7 +11016,11 @@
|
|
10657
11016
|
dispatch:
|
10658
11017
|
CPU: searchsorted_cpu
|
10659
11018
|
CUDA: searchsorted_cuda
|
10660
|
-
|
11019
|
+
|
11020
|
+
- func: searchsorted.Scalar_out(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
|
11021
|
+
dispatch:
|
11022
|
+
CPU: searchsorted_out_cpu
|
11023
|
+
CUDA: searchsorted_out_cuda
|
10661
11024
|
|
10662
11025
|
- func: _convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor
|
10663
11026
|
structured_delegate: _convert_indices_from_coo_to_csr.out
|
@@ -10981,6 +11344,7 @@
|
|
10981
11344
|
python_module: nn
|
10982
11345
|
dispatch:
|
10983
11346
|
CPU, CUDA: hardsigmoid_out
|
11347
|
+
MPS: hardsigmoid_out_mps
|
10984
11348
|
QuantizedCPU: hardsigmoid_out_quantized_cpu
|
10985
11349
|
|
10986
11350
|
- func: hardsigmoid(Tensor self) -> Tensor
|
@@ -11001,6 +11365,7 @@
|
|
11001
11365
|
python_module: nn
|
11002
11366
|
dispatch:
|
11003
11367
|
CPU, CUDA: hardsigmoid_backward_out
|
11368
|
+
MPS: hardsigmoid_backward_out_mps
|
11004
11369
|
|
11005
11370
|
- func: hardsigmoid_backward(Tensor grad_output, Tensor self) -> Tensor
|
11006
11371
|
structured_delegate: hardsigmoid_backward.grad_input
|
@@ -11119,6 +11484,7 @@
|
|
11119
11484
|
dispatch:
|
11120
11485
|
CPU: log_sigmoid_forward_out_cpu
|
11121
11486
|
CUDA: log_sigmoid_forward_out_cuda
|
11487
|
+
MPS: log_sigmoid_forward_out_mps
|
11122
11488
|
|
11123
11489
|
- func: log_sigmoid_forward(Tensor self) -> (Tensor output, Tensor buffer)
|
11124
11490
|
device_check: NoCheck # TensorIterator
|
@@ -11126,18 +11492,21 @@
|
|
11126
11492
|
dispatch:
|
11127
11493
|
CPU: log_sigmoid_forward_cpu
|
11128
11494
|
CUDA: log_sigmoid_forward_cuda
|
11495
|
+
MPS: log_sigmoid_forward_mps
|
11129
11496
|
|
11130
11497
|
- func: log_sigmoid_backward.grad_input(Tensor grad_output, Tensor self, Tensor buffer, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11131
11498
|
python_module: nn
|
11132
11499
|
dispatch:
|
11133
11500
|
CPU: log_sigmoid_backward_cpu_out
|
11134
11501
|
CUDA: log_sigmoid_backward_cuda_out
|
11502
|
+
MPS: log_sigmoid_backward_mps_out
|
11135
11503
|
|
11136
11504
|
- func: log_sigmoid_backward(Tensor grad_output, Tensor self, Tensor buffer) -> Tensor
|
11137
11505
|
python_module: nn
|
11138
11506
|
dispatch:
|
11139
11507
|
CPU: log_sigmoid_backward_cpu
|
11140
11508
|
CUDA: log_sigmoid_backward_cuda
|
11509
|
+
MPS: log_sigmoid_backward_mps
|
11141
11510
|
|
11142
11511
|
- func: rrelu_with_noise.out(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)
|
11143
11512
|
python_module: nn
|
@@ -11279,6 +11648,7 @@
|
|
11279
11648
|
CUDA: adaptive_avg_pool3d_cuda
|
11280
11649
|
QuantizedCPU: adaptive_avg_pool3d_quantized_cpu
|
11281
11650
|
autogen: _adaptive_avg_pool3d.out
|
11651
|
+
tags: core
|
11282
11652
|
|
11283
11653
|
- func: adaptive_avg_pool3d_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11284
11654
|
python_module: nn
|
@@ -11394,6 +11764,7 @@
|
|
11394
11764
|
dispatch:
|
11395
11765
|
MkldnnCPU: mkldnn_avg_pool3d
|
11396
11766
|
QuantizedCPU: avg_pool3d_quantized_cpu
|
11767
|
+
tags: core
|
11397
11768
|
|
11398
11769
|
- func: avg_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11399
11770
|
python_module: nn
|
@@ -11517,25 +11888,25 @@
|
|
11517
11888
|
CPU: max_pool3d_with_indices_backward_cpu
|
11518
11889
|
CUDA: max_pool3d_with_indices_backward_cuda
|
11519
11890
|
|
11520
|
-
- func: max_unpool2d.out(Tensor self, Tensor indices,
|
11891
|
+
- func: max_unpool2d.out(Tensor self, Tensor indices, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)
|
11521
11892
|
python_module: nn
|
11522
11893
|
dispatch:
|
11523
11894
|
CPU: max_unpooling2d_forward_out_cpu
|
11524
11895
|
CUDA: max_unpooling2d_forward_out_cuda
|
11525
11896
|
|
11526
|
-
- func: max_unpool2d(Tensor self, Tensor indices,
|
11897
|
+
- func: max_unpool2d(Tensor self, Tensor indices, SymInt[2] output_size) -> Tensor
|
11527
11898
|
python_module: nn
|
11528
11899
|
dispatch:
|
11529
11900
|
CPU: max_unpooling2d_forward_cpu
|
11530
11901
|
CUDA: max_unpooling2d_forward_cuda
|
11531
11902
|
|
11532
|
-
- func: max_unpool3d.out(Tensor self, Tensor indices,
|
11903
|
+
- func: max_unpool3d.out(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) out) -> Tensor(a!)
|
11533
11904
|
python_module: nn
|
11534
11905
|
dispatch:
|
11535
11906
|
CPU: max_unpooling3d_forward_out_cpu
|
11536
11907
|
CUDA: max_unpooling3d_forward_out_cuda
|
11537
11908
|
|
11538
|
-
- func: max_unpool3d(Tensor self, Tensor indices,
|
11909
|
+
- func: max_unpool3d(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding) -> Tensor
|
11539
11910
|
python_module: nn
|
11540
11911
|
dispatch:
|
11541
11912
|
CPU: max_unpooling3d_forward_cpu
|
@@ -11553,6 +11924,7 @@
|
|
11553
11924
|
- func: reflection_pad1d(Tensor self, SymInt[2] padding) -> Tensor
|
11554
11925
|
python_module: nn
|
11555
11926
|
structured_delegate: reflection_pad1d.out
|
11927
|
+
tags: core
|
11556
11928
|
|
11557
11929
|
- func: reflection_pad1d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[2] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11558
11930
|
python_module: nn
|
@@ -11607,6 +11979,7 @@
|
|
11607
11979
|
- func: reflection_pad3d(Tensor self, SymInt[6] padding) -> Tensor
|
11608
11980
|
python_module: nn
|
11609
11981
|
structured_delegate: reflection_pad3d.out
|
11982
|
+
tags: core
|
11610
11983
|
|
11611
11984
|
- func: reflection_pad3d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[6] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11612
11985
|
python_module: nn
|
@@ -12069,6 +12442,7 @@
|
|
12069
12442
|
structured_inherits: TensorIteratorBase
|
12070
12443
|
dispatch:
|
12071
12444
|
CPU, CUDA: logit_backward_out
|
12445
|
+
MPS: logit_backward_out_mps
|
12072
12446
|
tags: pointwise
|
12073
12447
|
|
12074
12448
|
- func: logit_backward(Tensor grad_output, Tensor self, float? eps=None) -> Tensor
|
@@ -12715,157 +13089,229 @@
|
|
12715
13089
|
|
12716
13090
|
# torch.fft.fft
|
12717
13091
|
# NOTE: NOT an alias for torch.fft, which has different semantics
|
12718
|
-
- func: fft_fft(Tensor self,
|
13092
|
+
- func: fft_fft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12719
13093
|
python_module: fft
|
12720
13094
|
variants: function
|
13095
|
+
dispatch:
|
13096
|
+
CompositeImplicitAutograd: fft_fft_symint
|
12721
13097
|
|
12722
|
-
- func: fft_fft.out(Tensor self,
|
13098
|
+
- func: fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12723
13099
|
python_module: fft
|
12724
13100
|
variants: function
|
13101
|
+
dispatch:
|
13102
|
+
CompositeImplicitAutograd: fft_fft_symint_out
|
12725
13103
|
|
12726
|
-
- func: fft_ifft(Tensor self,
|
13104
|
+
- func: fft_ifft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12727
13105
|
python_module: fft
|
12728
13106
|
variants: function
|
13107
|
+
dispatch:
|
13108
|
+
CompositeImplicitAutograd: fft_ifft_symint
|
12729
13109
|
|
12730
|
-
- func: fft_ifft.out(Tensor self,
|
13110
|
+
- func: fft_ifft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12731
13111
|
python_module: fft
|
12732
13112
|
variants: function
|
13113
|
+
dispatch:
|
13114
|
+
CompositeImplicitAutograd: fft_ifft_symint_out
|
12733
13115
|
|
12734
|
-
- func: fft_rfft(Tensor self,
|
13116
|
+
- func: fft_rfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12735
13117
|
python_module: fft
|
12736
13118
|
variants: function
|
13119
|
+
dispatch:
|
13120
|
+
CompositeImplicitAutograd: fft_rfft_symint
|
12737
13121
|
|
12738
|
-
- func: fft_rfft.out(Tensor self,
|
13122
|
+
- func: fft_rfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12739
13123
|
python_module: fft
|
12740
13124
|
variants: function
|
13125
|
+
dispatch:
|
13126
|
+
CompositeImplicitAutograd: fft_rfft_symint_out
|
12741
13127
|
|
12742
|
-
- func: fft_irfft(Tensor self,
|
13128
|
+
- func: fft_irfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12743
13129
|
python_module: fft
|
12744
13130
|
variants: function
|
13131
|
+
dispatch:
|
13132
|
+
CompositeImplicitAutograd: fft_irfft_symint
|
12745
13133
|
|
12746
|
-
- func: fft_irfft.out(Tensor self,
|
13134
|
+
- func: fft_irfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12747
13135
|
python_module: fft
|
12748
13136
|
variants: function
|
13137
|
+
dispatch:
|
13138
|
+
CompositeImplicitAutograd: fft_irfft_symint_out
|
12749
13139
|
|
12750
|
-
- func: fft_hfft(Tensor self,
|
13140
|
+
- func: fft_hfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12751
13141
|
python_module: fft
|
12752
13142
|
variants: function
|
13143
|
+
dispatch:
|
13144
|
+
CompositeImplicitAutograd: fft_hfft_symint
|
12753
13145
|
|
12754
|
-
- func: fft_hfft.out(Tensor self,
|
13146
|
+
- func: fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12755
13147
|
python_module: fft
|
12756
13148
|
variants: function
|
13149
|
+
dispatch:
|
13150
|
+
CompositeImplicitAutograd: fft_hfft_symint_out
|
12757
13151
|
|
12758
|
-
- func: fft_ihfft(Tensor self,
|
13152
|
+
- func: fft_ihfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12759
13153
|
python_module: fft
|
12760
13154
|
variants: function
|
13155
|
+
dispatch:
|
13156
|
+
CompositeImplicitAutograd: fft_ihfft_symint
|
12761
13157
|
|
12762
|
-
- func: fft_ihfft.out(Tensor self,
|
13158
|
+
- func: fft_ihfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12763
13159
|
python_module: fft
|
12764
13160
|
variants: function
|
13161
|
+
dispatch:
|
13162
|
+
CompositeImplicitAutograd: fft_ihfft_symint_out
|
12765
13163
|
|
12766
|
-
- func: fft_fft2(Tensor self,
|
13164
|
+
- func: fft_fft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12767
13165
|
python_module: fft
|
12768
13166
|
variants: function
|
13167
|
+
dispatch:
|
13168
|
+
CompositeImplicitAutograd: fft_fft2_symint
|
12769
13169
|
|
12770
|
-
- func: fft_fft2.out(Tensor self,
|
13170
|
+
- func: fft_fft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12771
13171
|
python_module: fft
|
12772
13172
|
variants: function
|
13173
|
+
dispatch:
|
13174
|
+
CompositeImplicitAutograd: fft_fft2_symint_out
|
12773
13175
|
|
12774
|
-
- func: fft_ifft2(Tensor self,
|
13176
|
+
- func: fft_ifft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12775
13177
|
python_module: fft
|
12776
13178
|
variants: function
|
13179
|
+
dispatch:
|
13180
|
+
CompositeImplicitAutograd: fft_ifft2_symint
|
12777
13181
|
|
12778
|
-
- func: fft_ifft2.out(Tensor self,
|
13182
|
+
- func: fft_ifft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12779
13183
|
python_module: fft
|
12780
13184
|
variants: function
|
13185
|
+
dispatch:
|
13186
|
+
CompositeImplicitAutograd: fft_ifft2_symint_out
|
12781
13187
|
|
12782
|
-
- func: fft_rfft2(Tensor self,
|
13188
|
+
- func: fft_rfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12783
13189
|
python_module: fft
|
12784
13190
|
variants: function
|
13191
|
+
dispatch:
|
13192
|
+
CompositeImplicitAutograd: fft_rfft2_symint
|
12785
13193
|
|
12786
|
-
- func: fft_rfft2.out(Tensor self,
|
13194
|
+
- func: fft_rfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12787
13195
|
python_module: fft
|
12788
13196
|
variants: function
|
13197
|
+
dispatch:
|
13198
|
+
CompositeImplicitAutograd: fft_rfft2_symint_out
|
12789
13199
|
|
12790
|
-
- func: fft_irfft2(Tensor self,
|
13200
|
+
- func: fft_irfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12791
13201
|
python_module: fft
|
12792
13202
|
variants: function
|
13203
|
+
dispatch:
|
13204
|
+
CompositeImplicitAutograd: fft_irfft2_symint
|
12793
13205
|
|
12794
|
-
- func: fft_irfft2.out(Tensor self,
|
13206
|
+
- func: fft_irfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12795
13207
|
python_module: fft
|
12796
13208
|
variants: function
|
13209
|
+
dispatch:
|
13210
|
+
CompositeImplicitAutograd: fft_irfft2_symint_out
|
12797
13211
|
|
12798
|
-
- func: fft_hfft2(Tensor self,
|
13212
|
+
- func: fft_hfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12799
13213
|
use_const_ref_for_mutable_tensors: True
|
12800
13214
|
python_module: fft
|
12801
13215
|
variants: function
|
13216
|
+
dispatch:
|
13217
|
+
CompositeImplicitAutograd: fft_hfft2_symint
|
12802
13218
|
|
12803
|
-
- func: fft_hfft2.out(Tensor self,
|
13219
|
+
- func: fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12804
13220
|
use_const_ref_for_mutable_tensors: True
|
12805
13221
|
python_module: fft
|
12806
13222
|
variants: function
|
13223
|
+
dispatch:
|
13224
|
+
CompositeImplicitAutograd: fft_hfft2_symint_out
|
12807
13225
|
|
12808
|
-
- func: fft_ihfft2(Tensor self,
|
13226
|
+
- func: fft_ihfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12809
13227
|
use_const_ref_for_mutable_tensors: True
|
12810
13228
|
python_module: fft
|
12811
13229
|
variants: function
|
13230
|
+
dispatch:
|
13231
|
+
CompositeImplicitAutograd: fft_ihfft2_symint
|
12812
13232
|
|
12813
|
-
- func: fft_ihfft2.out(Tensor self,
|
13233
|
+
- func: fft_ihfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12814
13234
|
use_const_ref_for_mutable_tensors: True
|
12815
13235
|
python_module: fft
|
12816
13236
|
variants: function
|
13237
|
+
dispatch:
|
13238
|
+
CompositeImplicitAutograd: fft_ihfft2_symint_out
|
12817
13239
|
|
12818
|
-
- func: fft_fftn(Tensor self,
|
13240
|
+
- func: fft_fftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12819
13241
|
python_module: fft
|
12820
13242
|
variants: function
|
13243
|
+
dispatch:
|
13244
|
+
CompositeImplicitAutograd: fft_fftn_symint
|
12821
13245
|
|
12822
|
-
- func: fft_fftn.out(Tensor self,
|
13246
|
+
- func: fft_fftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12823
13247
|
python_module: fft
|
12824
13248
|
variants: function
|
13249
|
+
dispatch:
|
13250
|
+
CompositeImplicitAutograd: fft_fftn_symint_out
|
12825
13251
|
|
12826
|
-
- func: fft_ifftn(Tensor self,
|
13252
|
+
- func: fft_ifftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12827
13253
|
python_module: fft
|
12828
13254
|
variants: function
|
13255
|
+
dispatch:
|
13256
|
+
CompositeImplicitAutograd: fft_ifftn_symint
|
12829
13257
|
|
12830
|
-
- func: fft_ifftn.out(Tensor self,
|
13258
|
+
- func: fft_ifftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12831
13259
|
python_module: fft
|
12832
13260
|
variants: function
|
13261
|
+
dispatch:
|
13262
|
+
CompositeImplicitAutograd: fft_ifftn_symint_out
|
12833
13263
|
|
12834
|
-
- func: fft_rfftn(Tensor self,
|
13264
|
+
- func: fft_rfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12835
13265
|
python_module: fft
|
12836
13266
|
variants: function
|
13267
|
+
dispatch:
|
13268
|
+
CompositeImplicitAutograd: fft_rfftn_symint
|
12837
13269
|
|
12838
|
-
- func: fft_rfftn.out(Tensor self,
|
13270
|
+
- func: fft_rfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12839
13271
|
python_module: fft
|
12840
13272
|
variants: function
|
13273
|
+
dispatch:
|
13274
|
+
CompositeImplicitAutograd: fft_rfftn_symint_out
|
12841
13275
|
|
12842
|
-
- func: fft_irfftn(Tensor self,
|
13276
|
+
- func: fft_irfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12843
13277
|
python_module: fft
|
12844
13278
|
variants: function
|
13279
|
+
dispatch:
|
13280
|
+
CompositeImplicitAutograd: fft_irfftn_symint
|
12845
13281
|
|
12846
|
-
- func: fft_irfftn.out(Tensor self,
|
13282
|
+
- func: fft_irfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12847
13283
|
python_module: fft
|
12848
13284
|
variants: function
|
13285
|
+
dispatch:
|
13286
|
+
CompositeImplicitAutograd: fft_irfftn_symint_out
|
12849
13287
|
|
12850
|
-
- func: fft_hfftn(Tensor self,
|
13288
|
+
- func: fft_hfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12851
13289
|
use_const_ref_for_mutable_tensors: True
|
12852
13290
|
python_module: fft
|
12853
13291
|
variants: function
|
13292
|
+
dispatch:
|
13293
|
+
CompositeImplicitAutograd: fft_hfftn_symint
|
12854
13294
|
|
12855
|
-
- func: fft_hfftn.out(Tensor self,
|
13295
|
+
- func: fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12856
13296
|
use_const_ref_for_mutable_tensors: True
|
12857
13297
|
python_module: fft
|
12858
13298
|
variants: function
|
13299
|
+
dispatch:
|
13300
|
+
CompositeImplicitAutograd: fft_hfftn_symint_out
|
12859
13301
|
|
12860
|
-
- func: fft_ihfftn(Tensor self,
|
13302
|
+
- func: fft_ihfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12861
13303
|
use_const_ref_for_mutable_tensors: True
|
12862
13304
|
python_module: fft
|
12863
13305
|
variants: function
|
13306
|
+
dispatch:
|
13307
|
+
CompositeImplicitAutograd: fft_ihfftn_symint
|
12864
13308
|
|
12865
|
-
- func: fft_ihfftn.out(Tensor self,
|
13309
|
+
- func: fft_ihfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12866
13310
|
use_const_ref_for_mutable_tensors: True
|
12867
13311
|
python_module: fft
|
12868
13312
|
variants: function
|
13313
|
+
dispatch:
|
13314
|
+
CompositeImplicitAutograd: fft_ihfftn_symint_out
|
12869
13315
|
|
12870
13316
|
- func: fft_fftfreq(int n, float d=1.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
12871
13317
|
python_module: fft
|
@@ -13210,6 +13656,7 @@
|
|
13210
13656
|
structured: True
|
13211
13657
|
dispatch:
|
13212
13658
|
CPU, CUDA: linalg_vector_norm_out
|
13659
|
+
MPS: linalg_vector_norm_out_mps
|
13213
13660
|
|
13214
13661
|
- func: linalg_matrix_norm(Tensor self, Scalar ord, int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
13215
13662
|
python_module: linalg
|
@@ -13788,6 +14235,7 @@
|
|
13788
14235
|
dispatch:
|
13789
14236
|
NestedTensorCPU: NestedTensor_softmax_dropout
|
13790
14237
|
NestedTensorCUDA: NestedTensor_softmax_dropout_cuda
|
14238
|
+
tags: nondeterministic_seeded
|
13791
14239
|
|
13792
14240
|
# Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
|
13793
14241
|
- func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
|
@@ -13803,67 +14251,71 @@
|
|
13803
14251
|
CUDA, NestedTensorCUDA: native_multi_head_attention_cuda
|
13804
14252
|
autogen: _native_multi_head_attention.out
|
13805
14253
|
|
13806
|
-
- func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False) -> Tensor
|
14254
|
+
- func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> Tensor
|
13807
14255
|
python_module: nn
|
13808
14256
|
variants: function
|
13809
14257
|
autogen: scaled_dot_product_attention.out
|
13810
|
-
|
13811
|
-
# TODO: THIS NEEDS TO BE REMOVED BUT PEOPLE HAVE TRAINED THEIR MODELS WITH THIS OP BUILTIN
|
13812
|
-
- func: _scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool need_attn_weights=False, bool is_causal=False) -> (Tensor, Tensor)
|
13813
|
-
python_module: nn
|
13814
|
-
variants: function
|
13815
|
-
autogen: _scaled_dot_product_attention.out
|
14258
|
+
tags: nondeterministic_seeded
|
13816
14259
|
|
13817
14260
|
# This aten function is kept so that we can test the choice function from Python
|
13818
|
-
- func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False) -> int
|
14261
|
+
- func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> int
|
13819
14262
|
dispatch:
|
13820
14263
|
Meta: _fused_sdp_choice_meta
|
13821
14264
|
CPU, NestedTensorCPU: _fused_sdp_choice_cpp
|
13822
14265
|
CUDA, NestedTensorCUDA: _fused_sdp_choice_cuda
|
14266
|
+
tags: nondeterministic_seeded
|
13823
14267
|
|
13824
|
-
- func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None) -> (Tensor, Tensor)
|
14268
|
+
- func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
|
13825
14269
|
variants: function
|
14270
|
+
tags: nondeterministic_seeded
|
13826
14271
|
|
13827
|
-
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False) -> (Tensor ouput, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k,
|
14272
|
+
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor ouput, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
13828
14273
|
dispatch:
|
14274
|
+
CPU: _scaled_dot_product_flash_attention_cpu
|
13829
14275
|
CUDA: _scaled_dot_product_flash_attention_cuda
|
13830
14276
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
|
14277
|
+
tags: nondeterministic_seeded
|
13831
14278
|
|
13832
|
-
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal,
|
14279
|
+
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14280
|
+
device_check: NoCheck
|
13833
14281
|
variants: function
|
13834
14282
|
dispatch:
|
14283
|
+
CPU: _scaled_dot_product_flash_attention_backward_cpu
|
13835
14284
|
CUDA: _scaled_dot_product_flash_attention_backward_cuda
|
13836
14285
|
|
13837
|
-
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, bool compute_log_sumexp, bool is_causal=False) -> (Tensor, Tensor)
|
14286
|
+
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
|
13838
14287
|
dispatch:
|
13839
14288
|
CUDA: _scaled_dot_product_efficient_attention_cuda
|
13840
14289
|
NestedTensorCUDA: _scaled_dot_product_efficient_attention_nestedtensor_cuda
|
14290
|
+
tags: nondeterministic_seeded
|
13841
14291
|
|
13842
|
-
- func: _scaled_dot_product_efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, bool is_causal=False,
|
14292
|
+
- func: _scaled_dot_product_efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor attn_bias, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, float dropout_p, bool[4] grad_input_mask, bool is_causal=False, *, float? scale=None) -> (Tensor, Tensor, Tensor, Tensor)
|
14293
|
+
device_check: NoCheck
|
13843
14294
|
dispatch:
|
13844
14295
|
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
|
14296
|
+
tags: nondeterministic_seeded
|
13845
14297
|
|
13846
|
-
- func:
|
13847
|
-
dispatch:
|
13848
|
-
CUDA: _chunk_grad_outputs_efficient_attention
|
13849
|
-
|
13850
|
-
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, bool return_debug_mask) -> (Tensor output, Tensor softmax_logsumexp, int philox_seed, int philox_offset, Tensor debug_attn_mask)
|
14298
|
+
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
13851
14299
|
variants: function
|
13852
14300
|
dispatch:
|
13853
14301
|
CUDA: _flash_attention_forward
|
14302
|
+
tags: nondeterministic_seeded
|
13854
14303
|
|
13855
|
-
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal,
|
14304
|
+
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14305
|
+
device_check: NoCheck
|
13856
14306
|
variants: function
|
13857
14307
|
dispatch:
|
13858
14308
|
CUDA: _flash_attention_backward
|
13859
14309
|
|
13860
14310
|
# Returns ouput, logsumexp if compute_logsumexp
|
13861
|
-
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, bool compute_log_sumexp=False,
|
14311
|
+
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
|
13862
14312
|
variants: function
|
13863
14313
|
dispatch:
|
13864
14314
|
CUDA: _efficient_attention_forward
|
14315
|
+
tags: nondeterministic_seeded
|
13865
14316
|
|
13866
|
-
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, bool
|
14317
|
+
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int max_seqlen_k, int max_seqlen_q, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
|
14318
|
+
device_check: NoCheck
|
13867
14319
|
variants: function
|
13868
14320
|
dispatch:
|
13869
14321
|
CUDA: _efficient_attention_backward
|
@@ -13872,8 +14324,15 @@
|
|
13872
14324
|
variants: function
|
13873
14325
|
dispatch:
|
13874
14326
|
CUDA: triton_scaled_dot_attention
|
14327
|
+
tags: nondeterministic_seeded
|
13875
14328
|
autogen: _triton_scaled_dot_attention.out
|
13876
14329
|
|
14330
|
+
- func: _fill_mem_eff_dropout_mask_(Tensor(a!) self, float dropout_p, int seed, int offset) -> Tensor(a!)
|
14331
|
+
variants: function
|
14332
|
+
dispatch:
|
14333
|
+
CUDA: _fill_mem_eff_dropout_mask_
|
14334
|
+
tags: nondeterministic_seeded
|
14335
|
+
|
13877
14336
|
- func: _triton_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor
|
13878
14337
|
variants: function
|
13879
14338
|
dispatch:
|
@@ -13895,18 +14354,6 @@
|
|
13895
14354
|
variants: function
|
13896
14355
|
tags: pointwise
|
13897
14356
|
|
13898
|
-
- func: _transformer_decoder_only_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, Tensor? incr_key=None, Tensor? incr_value=None) -> (Tensor, Tensor, Tensor)
|
13899
|
-
variants: function
|
13900
|
-
dispatch:
|
13901
|
-
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: transformer_decoder_only_layer_forward
|
13902
|
-
autogen: _transformer_decoder_only_layer_fwd.out
|
13903
|
-
|
13904
|
-
- func: _native_decoder_only_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, Tensor? incr_key=None, Tensor? incr_value=None, bool need_weights=True, bool average_attn_weights=True) -> (Tensor, Tensor, Tensor, Tensor)
|
13905
|
-
variants: function
|
13906
|
-
dispatch:
|
13907
|
-
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: native_decoder_only_multi_head_attention
|
13908
|
-
autogen: _native_decoder_only_multi_head_attention.out
|
13909
|
-
|
13910
14357
|
- func: special_bessel_j0(Tensor self) -> Tensor
|
13911
14358
|
python_module: special
|
13912
14359
|
structured_delegate: special_bessel_j0.out
|
@@ -14603,9 +15050,31 @@
|
|
14603
15050
|
CUDA: _fused_adam_kernel_cuda_
|
14604
15051
|
autogen: _fused_adam, _fused_adam.out
|
14605
15052
|
|
15053
|
+
- func: _fused_adam_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15054
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now),
|
15055
|
+
# but still skip the device check as the Tensor LR can be on CPU
|
15056
|
+
device_check: NoCheck
|
15057
|
+
variants: function
|
15058
|
+
dispatch:
|
15059
|
+
CUDA: _fused_adam_kernel_cuda_
|
15060
|
+
autogen: _fused_adam.tensor_lr, _fused_adam.tensor_lr_out
|
15061
|
+
|
14606
15062
|
- func: _fused_adamw_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
14607
15063
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
14608
15064
|
variants: function
|
14609
15065
|
dispatch:
|
14610
15066
|
CUDA: _fused_adamw_kernel_cuda_
|
14611
15067
|
autogen: _fused_adamw, _fused_adamw.out
|
15068
|
+
|
15069
|
+
- func: _fused_adamw_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15070
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now),
|
15071
|
+
# but still skip the device check as the Tensor LR can be on CPU
|
15072
|
+
device_check: NoCheck
|
15073
|
+
variants: function
|
15074
|
+
dispatch:
|
15075
|
+
CUDA: _fused_adamw_kernel_cuda_
|
15076
|
+
autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
|
15077
|
+
|
15078
|
+
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15079
|
+
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15080
|
+
variants: function
|