torch-rb 0.13.2 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -0
- data/codegen/generate_functions.rb +6 -1
- data/codegen/native_functions.yaml +985 -516
- data/ext/torch/ruby_arg_parser.cpp +27 -2
- data/ext/torch/torch.cpp +10 -6
- data/ext/torch/utils.h +1 -1
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +5 -2
- metadata +3 -3
@@ -170,8 +170,36 @@
|
|
170
170
|
CPU: _assert_async_cpu
|
171
171
|
CUDA: _assert_async_cuda
|
172
172
|
|
173
|
+
- func: _assert_async.msg(Tensor self, str assert_msg) -> ()
|
174
|
+
dispatch:
|
175
|
+
CPU: _assert_async_msg_cpu
|
176
|
+
CUDA: _assert_async_msg_cuda
|
177
|
+
|
178
|
+
- func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
|
179
|
+
dispatch:
|
180
|
+
CPU: _functional_assert_async_msg_cpu
|
181
|
+
|
182
|
+
- func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
|
183
|
+
|
184
|
+
- func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
|
185
|
+
dispatch:
|
186
|
+
CompositeExplicitAutograd: sym_constrain_range
|
187
|
+
|
188
|
+
- func: sym_constrain_range_for_size(Scalar size, *, int? min, int? max) -> ()
|
189
|
+
dispatch:
|
190
|
+
CompositeExplicitAutograd: sym_constrain_range_for_size
|
173
191
|
|
174
|
-
- func:
|
192
|
+
- func: _functional_sym_constrain_range(Scalar size, int? min, int? max, Tensor dep_token) -> Tensor
|
193
|
+
dispatch:
|
194
|
+
CompositeExplicitAutograd: _functional_sym_constrain_range
|
195
|
+
|
196
|
+
- func: _functional_sym_constrain_range_for_size(Scalar size, int? min, int? max, Tensor dep_token) -> Tensor
|
197
|
+
dispatch:
|
198
|
+
CompositeExplicitAutograd: _functional_sym_constrain_range_for_size
|
199
|
+
|
200
|
+
- func: _make_dep_token(*, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
201
|
+
dispatch:
|
202
|
+
CPU: _make_dep_token_cpu
|
175
203
|
|
176
204
|
- func: refine_names(Tensor(a) self, Dimname[] names) -> Tensor(a)
|
177
205
|
variants: method
|
@@ -211,6 +239,7 @@
|
|
211
239
|
dispatch:
|
212
240
|
CUDA: _cudnn_rnn
|
213
241
|
autogen: _cudnn_rnn.out
|
242
|
+
tags: nondeterministic_seeded
|
214
243
|
|
215
244
|
- func: _cudnn_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])
|
216
245
|
dispatch:
|
@@ -221,6 +250,7 @@
|
|
221
250
|
dispatch:
|
222
251
|
CUDA: _cudnn_init_dropout_state
|
223
252
|
autogen: _cudnn_init_dropout_state.out
|
253
|
+
tags: nondeterministic_seeded
|
224
254
|
|
225
255
|
- func: _debug_has_internal_overlap(Tensor self) -> int
|
226
256
|
variants: function
|
@@ -297,6 +327,7 @@
|
|
297
327
|
CompositeExplicitAutograd: abs
|
298
328
|
SparseCPU, SparseCUDA: abs_sparse
|
299
329
|
SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr
|
330
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs
|
300
331
|
tags: [core, pointwise]
|
301
332
|
|
302
333
|
- func: abs_(Tensor(a!) self) -> Tensor(a!)
|
@@ -306,6 +337,7 @@
|
|
306
337
|
CompositeExplicitAutograd: abs_
|
307
338
|
SparseCPU, SparseCUDA: abs_sparse_
|
308
339
|
SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_
|
340
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs_
|
309
341
|
|
310
342
|
- func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
311
343
|
device_check: NoCheck # TensorIterator
|
@@ -374,7 +406,7 @@
|
|
374
406
|
- func: view_as_complex(Tensor(a) self) -> Tensor(a)
|
375
407
|
variants: function
|
376
408
|
dispatch:
|
377
|
-
CPU, CUDA, Meta: view_as_complex
|
409
|
+
CPU, CUDA, MPS, Meta: view_as_complex
|
378
410
|
|
379
411
|
- func: sgn(Tensor self) -> Tensor
|
380
412
|
variants: function, method
|
@@ -382,6 +414,7 @@
|
|
382
414
|
dispatch:
|
383
415
|
SparseCPU, SparseCUDA: sgn_sparse
|
384
416
|
SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr
|
417
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn
|
385
418
|
tags: pointwise
|
386
419
|
|
387
420
|
- func: sgn_(Tensor(a!) self) -> Tensor(a!)
|
@@ -390,6 +423,7 @@
|
|
390
423
|
dispatch:
|
391
424
|
SparseCPU, SparseCUDA: sgn_sparse_
|
392
425
|
SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_
|
426
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn_
|
393
427
|
tags: pointwise
|
394
428
|
|
395
429
|
- func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -488,8 +522,10 @@
|
|
488
522
|
- func: arccos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
489
523
|
|
490
524
|
- func: avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor
|
525
|
+
tags: core
|
491
526
|
|
492
527
|
- func: adaptive_avg_pool1d(Tensor self, int[1] output_size) -> Tensor
|
528
|
+
tags: core
|
493
529
|
|
494
530
|
# Return: (Tensor output, Tensor indices)
|
495
531
|
- func: adaptive_max_pool1d(Tensor self, int[1] output_size) -> (Tensor, Tensor)
|
@@ -610,13 +646,13 @@
|
|
610
646
|
MPS: addr_out_mps
|
611
647
|
CompositeExplicitAutograd: math_addr_out
|
612
648
|
|
613
|
-
- func: affine_grid_generator(Tensor theta,
|
649
|
+
- func: affine_grid_generator(Tensor theta, SymInt[] size, bool align_corners) -> Tensor
|
614
650
|
variants: function
|
615
651
|
dispatch:
|
616
652
|
CompositeExplicitAutograd: affine_grid_generator
|
617
653
|
autogen: affine_grid_generator.out
|
618
654
|
|
619
|
-
- func: affine_grid_generator_backward(Tensor grad,
|
655
|
+
- func: affine_grid_generator_backward(Tensor grad, SymInt[] size, bool align_corners) -> Tensor
|
620
656
|
variants: function
|
621
657
|
|
622
658
|
- func: _is_all_true(Tensor self) -> Tensor
|
@@ -633,6 +669,13 @@
|
|
633
669
|
- func: _test_check_tensor(Tensor self) -> Tensor
|
634
670
|
variants: function
|
635
671
|
|
672
|
+
# Note; this function is only for testing
|
673
|
+
- func: _test_functorch_fallback(Tensor self, Tensor other) -> Tensor
|
674
|
+
variants: function
|
675
|
+
dispatch:
|
676
|
+
CPU: _test_functorch_fallback
|
677
|
+
autogen: _test_functorch_fallback.out
|
678
|
+
|
636
679
|
- func: all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor
|
637
680
|
device_check: NoCheck # TensorIterator
|
638
681
|
structured_delegate: all.out
|
@@ -664,6 +707,7 @@
|
|
664
707
|
device_check: NoCheck # TensorIterator
|
665
708
|
structured_delegate: any.out
|
666
709
|
variants: function, method
|
710
|
+
tags: core
|
667
711
|
|
668
712
|
- func: any.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
669
713
|
device_check: NoCheck # TensorIterator
|
@@ -1108,6 +1152,7 @@
|
|
1108
1152
|
structured_inherits: TensorIteratorBase
|
1109
1153
|
dispatch:
|
1110
1154
|
CPU, CUDA: bitwise_not_out
|
1155
|
+
MPS: bitwise_not_out_mps
|
1111
1156
|
tags: pointwise
|
1112
1157
|
|
1113
1158
|
- func: copysign.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1115,7 +1160,7 @@
|
|
1115
1160
|
structured: True
|
1116
1161
|
structured_inherits: TensorIteratorBase
|
1117
1162
|
dispatch:
|
1118
|
-
CPU, CUDA: copysign_out
|
1163
|
+
CPU, CUDA, MPS: copysign_out
|
1119
1164
|
tags: pointwise
|
1120
1165
|
|
1121
1166
|
- func: copysign.Tensor(Tensor self, Tensor other) -> Tensor
|
@@ -1150,6 +1195,7 @@
|
|
1150
1195
|
variants: function, method
|
1151
1196
|
dispatch:
|
1152
1197
|
CompositeExplicitAutograd: logical_not
|
1198
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not
|
1153
1199
|
tags: [core, pointwise]
|
1154
1200
|
|
1155
1201
|
- func: logical_not_(Tensor(a!) self) -> Tensor(a!)
|
@@ -1157,6 +1203,7 @@
|
|
1157
1203
|
variants: method
|
1158
1204
|
dispatch:
|
1159
1205
|
CompositeExplicitAutograd: logical_not_
|
1206
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_logical_not_
|
1160
1207
|
tags: pointwise
|
1161
1208
|
|
1162
1209
|
- func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1171,7 +1218,7 @@
|
|
1171
1218
|
variants: function, method
|
1172
1219
|
dispatch:
|
1173
1220
|
CompositeExplicitAutograd: logical_xor
|
1174
|
-
tags: pointwise
|
1221
|
+
tags: [core, pointwise]
|
1175
1222
|
|
1176
1223
|
- func: logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
1177
1224
|
device_check: NoCheck # TensorIterator
|
@@ -1326,7 +1373,7 @@
|
|
1326
1373
|
dispatch:
|
1327
1374
|
SparseCPU, SparseCUDA: ceil_sparse
|
1328
1375
|
SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr
|
1329
|
-
tags: pointwise
|
1376
|
+
tags: [core, pointwise]
|
1330
1377
|
|
1331
1378
|
- func: ceil_(Tensor(a!) self) -> Tensor(a!)
|
1332
1379
|
device_check: NoCheck # TensorIterator
|
@@ -1393,7 +1440,7 @@
|
|
1393
1440
|
- func: clamp.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor
|
1394
1441
|
variants: function, method
|
1395
1442
|
structured_delegate: clamp.Tensor_out
|
1396
|
-
tags: pointwise
|
1443
|
+
tags: [core, pointwise]
|
1397
1444
|
|
1398
1445
|
- func: clamp_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)
|
1399
1446
|
device_check: NoCheck # TensorIterator
|
@@ -1552,6 +1599,7 @@
|
|
1552
1599
|
- func: polar.out(Tensor abs, Tensor angle, *, Tensor(a!) out) -> Tensor(a!)
|
1553
1600
|
dispatch:
|
1554
1601
|
CPU, CUDA: polar_out
|
1602
|
+
MPS: polar_out_mps
|
1555
1603
|
|
1556
1604
|
- func: constant_pad_nd(Tensor self, SymInt[] pad, Scalar value=0) -> Tensor
|
1557
1605
|
variants: function
|
@@ -1598,11 +1646,17 @@
|
|
1598
1646
|
|
1599
1647
|
- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
|
1600
1648
|
|
1601
|
-
- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1,
|
1649
|
+
- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, SymInt[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor
|
1650
|
+
dispatch:
|
1651
|
+
CompositeImplicitAutograd: conv1d_symint
|
1602
1652
|
|
1603
|
-
- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1,
|
1653
|
+
- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor
|
1654
|
+
dispatch:
|
1655
|
+
CompositeImplicitAutograd: conv2d_symint
|
1604
1656
|
|
1605
|
-
- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1,
|
1657
|
+
- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor
|
1658
|
+
dispatch:
|
1659
|
+
CompositeImplicitAutograd: conv3d_symint
|
1606
1660
|
|
1607
1661
|
- func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, str padding="valid", int[1] dilation=1, int groups=1) -> Tensor
|
1608
1662
|
cpp_no_default_args: ['bias', 'stride', 'padding']
|
@@ -1621,11 +1675,17 @@
|
|
1621
1675
|
- func: conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)
|
1622
1676
|
|
1623
1677
|
# NB: we inherit the goofy argument order from PyTorch torch.nn.functional
|
1624
|
-
- func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1,
|
1678
|
+
- func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, int groups=1, int[1] dilation=1) -> Tensor
|
1679
|
+
dispatch:
|
1680
|
+
CompositeImplicitAutograd: conv_transpose1d_symint
|
1625
1681
|
|
1626
|
-
- func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1,
|
1682
|
+
- func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, int groups=1, int[2] dilation=1) -> Tensor
|
1683
|
+
dispatch:
|
1684
|
+
CompositeImplicitAutograd: conv_transpose2d_symint
|
1627
1685
|
|
1628
|
-
- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1,
|
1686
|
+
- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor
|
1687
|
+
dispatch:
|
1688
|
+
CompositeImplicitAutograd: conv_transpose3d_symint
|
1629
1689
|
|
1630
1690
|
- func: copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor
|
1631
1691
|
variants: function
|
@@ -1850,6 +1910,7 @@
|
|
1850
1910
|
device_check: NoCheck # TensorIterator
|
1851
1911
|
dispatch:
|
1852
1912
|
CPU, CUDA: cumprod_out
|
1913
|
+
MPS: cumprod_out_mps
|
1853
1914
|
|
1854
1915
|
- func: cumprod.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
|
1855
1916
|
device_check: NoCheck # TensorIterator
|
@@ -1870,6 +1931,7 @@
|
|
1870
1931
|
structured_delegate: cumsum.out
|
1871
1932
|
device_check: NoCheck # TensorIterator
|
1872
1933
|
variants: function, method
|
1934
|
+
tags: core
|
1873
1935
|
|
1874
1936
|
- func: cumsum_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!)
|
1875
1937
|
structured_delegate: cumsum.out
|
@@ -2145,6 +2207,7 @@
|
|
2145
2207
|
CompositeExplicitAutograd: embedding_symint
|
2146
2208
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_embedding
|
2147
2209
|
autogen: embedding.out
|
2210
|
+
tags: core
|
2148
2211
|
|
2149
2212
|
- func: embedding_backward(Tensor grad, Tensor indices, SymInt num_weights, SymInt padding_idx, bool scale_grad_by_freq, bool sparse) -> Tensor
|
2150
2213
|
dispatch:
|
@@ -2202,6 +2265,7 @@
|
|
2202
2265
|
CPU: _embedding_bag_cpu
|
2203
2266
|
CUDA: _embedding_bag_cuda
|
2204
2267
|
autogen: _embedding_bag.out
|
2268
|
+
tags: core
|
2205
2269
|
|
2206
2270
|
- func: _embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
|
2207
2271
|
dispatch:
|
@@ -2240,6 +2304,12 @@
|
|
2240
2304
|
SparseCPU, SparseCUDA, SparseMeta: empty_sparse
|
2241
2305
|
SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
|
2242
2306
|
QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
|
2307
|
+
tags: core
|
2308
|
+
|
2309
|
+
- func: empty_permuted(SymInt[] size, int[] physical_layout, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
2310
|
+
dispatch:
|
2311
|
+
CompositeExplicitAutograd: empty_permuted_symint
|
2312
|
+
autogen: empty_permuted.out
|
2243
2313
|
|
2244
2314
|
# We do not make new_empty a composite that calls into new_empty_strided, as the strided version
|
2245
2315
|
# is significantly more difficult to implement by different backends
|
@@ -2280,7 +2350,7 @@
|
|
2280
2350
|
autogen: new_ones.out
|
2281
2351
|
|
2282
2352
|
# other overrides are to provide a more helpful error message that dtype is required
|
2283
|
-
- func: _empty_affine_quantized(
|
2353
|
+
- func: _empty_affine_quantized(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, float scale=1, int zero_point=0, MemoryFormat? memory_format=contiguous_format) -> Tensor
|
2284
2354
|
dispatch:
|
2285
2355
|
CPU: empty_affine_quantized_other_backends_stub
|
2286
2356
|
QuantizedCPU, QuantizedCUDA: empty_affine_quantized
|
@@ -2288,7 +2358,7 @@
|
|
2288
2358
|
|
2289
2359
|
# it's a factory function receiving a tensor argument, thus overriding explicitly
|
2290
2360
|
# other overrides are to provide a more helpful error message that dtype is required
|
2291
|
-
- func: _empty_per_channel_affine_quantized(
|
2361
|
+
- func: _empty_per_channel_affine_quantized(SymInt[] size, *, Tensor scales, Tensor zero_points, int axis, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=contiguous_format) -> Tensor
|
2292
2362
|
category_override: factory
|
2293
2363
|
dispatch:
|
2294
2364
|
CPU: empty_per_channel_affine_quantized_other_backends_stub
|
@@ -2313,7 +2383,7 @@
|
|
2313
2383
|
# This is a utility function to enable users to resize out tensor while registering kernels for out variants.
|
2314
2384
|
# Eventually, we can consider exposing `resize_output` as a public API to ship it with python op registration
|
2315
2385
|
# to make it easy to register out variants for ops.
|
2316
|
-
- func: _resize_output_(Tensor(a!) self,
|
2386
|
+
- func: _resize_output_(Tensor(a!) self, SymInt[] size, Device device) -> Tensor(a!)
|
2317
2387
|
use_const_ref_for_mutable_tensors: True
|
2318
2388
|
variants: function
|
2319
2389
|
dispatch:
|
@@ -2483,21 +2553,21 @@
|
|
2483
2553
|
device_guard: False
|
2484
2554
|
|
2485
2555
|
# decomposes to eye.m
|
2486
|
-
- func: eye(
|
2556
|
+
- func: eye(SymInt n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
2487
2557
|
dispatch:
|
2488
2558
|
CompositeExplicitAutograd: eye
|
2489
2559
|
|
2490
|
-
- func: eye.m(
|
2560
|
+
- func: eye.m(SymInt n, SymInt m, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
2491
2561
|
dispatch:
|
2492
2562
|
CompositeExplicitAutograd: eye
|
2493
2563
|
|
2494
|
-
- func: eye.out(
|
2564
|
+
- func: eye.out(SymInt n, *, Tensor(a!) out) -> Tensor(a!)
|
2495
2565
|
dispatch:
|
2496
2566
|
CPU, Meta: eye_out_cpu
|
2497
2567
|
CUDA: eye_out_cuda
|
2498
2568
|
MPS: eye_out_mps
|
2499
2569
|
|
2500
|
-
- func: eye.m_out(
|
2570
|
+
- func: eye.m_out(SymInt n, SymInt m, *, Tensor(a!) out) -> Tensor(a!)
|
2501
2571
|
dispatch:
|
2502
2572
|
CPU, Meta: eye_out_cpu
|
2503
2573
|
CUDA: eye_out_cuda
|
@@ -2515,11 +2585,15 @@
|
|
2515
2585
|
- func: flatten.DimnameList(Tensor(a) self, Dimname[] dims, Dimname out_dim) -> Tensor(a)
|
2516
2586
|
variants: function, method
|
2517
2587
|
|
2518
|
-
- func: unflatten.int(Tensor(a) self, int dim,
|
2588
|
+
- func: unflatten.int(Tensor(a) self, int dim, SymInt[] sizes) -> Tensor(a)
|
2519
2589
|
variants: function, method
|
2590
|
+
dispatch:
|
2591
|
+
CompositeImplicitAutograd: unflatten_symint
|
2520
2592
|
|
2521
|
-
- func: unflatten.Dimname(Tensor(a) self, Dimname dim,
|
2593
|
+
- func: unflatten.Dimname(Tensor(a) self, Dimname dim, SymInt[] sizes, Dimname[] names) -> Tensor(a)
|
2522
2594
|
variants: function, method
|
2595
|
+
dispatch:
|
2596
|
+
CompositeImplicitAutograd: unflatten_dimname_symint
|
2523
2597
|
|
2524
2598
|
- func: fill.Scalar(Tensor self, Scalar value) -> Tensor
|
2525
2599
|
variants: function
|
@@ -2839,13 +2913,13 @@
|
|
2839
2913
|
CUDA: _fft_r2c_cufft_out
|
2840
2914
|
|
2841
2915
|
# Complex to real inverse FFT
|
2842
|
-
- func: _fft_c2r(Tensor self, int[] dim, int normalization,
|
2916
|
+
- func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
|
2843
2917
|
variants: function
|
2844
2918
|
dispatch:
|
2845
2919
|
CPU: _fft_c2r_mkl
|
2846
2920
|
CUDA: _fft_c2r_cufft
|
2847
2921
|
|
2848
|
-
- func: _fft_c2r.out(Tensor self, int[] dim, int normalization,
|
2922
|
+
- func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
|
2849
2923
|
variants: function
|
2850
2924
|
dispatch:
|
2851
2925
|
CPU: _fft_c2r_mkl_out
|
@@ -2871,13 +2945,13 @@
|
|
2871
2945
|
CPU: _validate_compressed_sparse_indices_cpu
|
2872
2946
|
CUDA: _validate_compressed_sparse_indices_cuda
|
2873
2947
|
|
2874
|
-
- func: _cufft_get_plan_cache_size(
|
2948
|
+
- func: _cufft_get_plan_cache_size(DeviceIndex device_index) -> int
|
2875
2949
|
|
2876
|
-
- func: _cufft_get_plan_cache_max_size(
|
2950
|
+
- func: _cufft_get_plan_cache_max_size(DeviceIndex device_index) -> int
|
2877
2951
|
|
2878
|
-
- func: _cufft_set_plan_cache_max_size(
|
2952
|
+
- func: _cufft_set_plan_cache_max_size(DeviceIndex device_index, int max_size) -> ()
|
2879
2953
|
|
2880
|
-
- func: _cufft_clear_plan_cache(
|
2954
|
+
- func: _cufft_clear_plan_cache(DeviceIndex device_index) -> ()
|
2881
2955
|
|
2882
2956
|
- func: index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
|
2883
2957
|
device_check: NoCheck # TensorIterator
|
@@ -2885,7 +2959,7 @@
|
|
2885
2959
|
variants: function, method
|
2886
2960
|
dispatch:
|
2887
2961
|
QuantizedCPU: quantized_index
|
2888
|
-
tags: dynamic_output_shape
|
2962
|
+
tags: [core, dynamic_output_shape]
|
2889
2963
|
# NB: This function is special-cased in tools/autograd/gen_variable_type.py
|
2890
2964
|
# NB: The following functions are declared in aten/src/ATen/templates/TensorBody.h and defined in aten/src/ATen/TensorIndexing.cpp:
|
2891
2965
|
# - Tensor Tensor::index(ArrayRef<TensorIndex> indices)
|
@@ -2900,6 +2974,13 @@
|
|
2900
2974
|
dispatch:
|
2901
2975
|
CPU, CUDA, MPS: index_out
|
2902
2976
|
|
2977
|
+
# Used by inductor to signal indexing without bounds checks
|
2978
|
+
# Note that we don't support boolean indexing, to avoid dynamic output shapes
|
2979
|
+
- func: _unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
|
2980
|
+
variants: function
|
2981
|
+
dispatch:
|
2982
|
+
CPU, CUDA: _unsafe_index
|
2983
|
+
|
2903
2984
|
- func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
|
2904
2985
|
structured: True
|
2905
2986
|
variants: function
|
@@ -2939,6 +3020,13 @@
|
|
2939
3020
|
variants: function, method
|
2940
3021
|
dispatch:
|
2941
3022
|
CompositeExplicitAutograd: index_put
|
3023
|
+
tags: core
|
3024
|
+
|
3025
|
+
- func: _unsafe_index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor
|
3026
|
+
device_check: NoCheck # delegate to _index_put_impl_ after clone, which leverages TensorIterator
|
3027
|
+
variants: function
|
3028
|
+
dispatch:
|
3029
|
+
CompositeExplicitAutograd: _unsafe_index_put
|
2942
3030
|
|
2943
3031
|
- func: _index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!)
|
2944
3032
|
device_check: NoCheck # TensorIterator
|
@@ -3097,6 +3185,7 @@
|
|
3097
3185
|
CPU: layer_norm_backward_cpu
|
3098
3186
|
CUDA: layer_norm_backward_cuda
|
3099
3187
|
MPS: layer_norm_backward_mps
|
3188
|
+
NestedTensorCPU, NestedTensorCUDA: layer_norm_backward_nested
|
3100
3189
|
autogen: native_layer_norm_backward.out
|
3101
3190
|
tags: core
|
3102
3191
|
|
@@ -3160,6 +3249,18 @@
|
|
3160
3249
|
MkldnnCPU: mkldnn_linear_backward
|
3161
3250
|
autogen: mkldnn_linear_backward.out
|
3162
3251
|
|
3252
|
+
- func: _cslt_compress(Tensor input) -> Tensor
|
3253
|
+
dispatch:
|
3254
|
+
CUDA: _cslt_compress
|
3255
|
+
|
3256
|
+
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, bool transpose_result=False) -> Tensor
|
3257
|
+
dispatch:
|
3258
|
+
CUDA: _cslt_sparse_mm
|
3259
|
+
|
3260
|
+
- func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None) -> Tensor
|
3261
|
+
dispatch:
|
3262
|
+
CUDA: _sparse_semi_structured_linear
|
3263
|
+
|
3163
3264
|
- func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
3164
3265
|
|
3165
3266
|
- func: fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
|
@@ -3355,6 +3456,7 @@
|
|
3355
3456
|
variants: function
|
3356
3457
|
dispatch:
|
3357
3458
|
CPU, CUDA: xlogy_out
|
3459
|
+
MPS: xlogy_out_mps
|
3358
3460
|
tags: pointwise
|
3359
3461
|
|
3360
3462
|
- func: xlogy.OutScalar_Self(Scalar self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -3510,6 +3612,7 @@
|
|
3510
3612
|
structured: True
|
3511
3613
|
dispatch:
|
3512
3614
|
CPU, CUDA: aminmax_out
|
3615
|
+
MPS: aminmax_out_mps
|
3513
3616
|
|
3514
3617
|
- func: _compute_linear_combination(Tensor input, Tensor coefficients) -> Tensor
|
3515
3618
|
dispatch:
|
@@ -3607,6 +3710,11 @@
|
|
3607
3710
|
QuantizedCUDA: quantized_max_pool2d_cudnn
|
3608
3711
|
autogen: quantized_max_pool2d.out
|
3609
3712
|
|
3713
|
+
- func: quantized_max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
|
3714
|
+
dispatch:
|
3715
|
+
QuantizedCPU: quantized_max_pool3d
|
3716
|
+
autogen: quantized_max_pool3d.out
|
3717
|
+
|
3610
3718
|
- func: max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
|
3611
3719
|
|
3612
3720
|
# The CPU and GPU dispatch variants are named weirdly here because otherwise there
|
@@ -3616,6 +3724,7 @@
|
|
3616
3724
|
variants: function, method
|
3617
3725
|
dispatch:
|
3618
3726
|
CompositeExplicitAutograd: mean
|
3727
|
+
tags: core
|
3619
3728
|
|
3620
3729
|
# For normal naming convention this should be `mean.out`. However since we already have `mean.out` we have to rename this.
|
3621
3730
|
# FIXME: fix CI jobs and re-enable this
|
@@ -3756,6 +3865,7 @@
|
|
3756
3865
|
- func: mkldnn_rnn_layer(Tensor input, Tensor weight0, Tensor weight1, Tensor weight2, Tensor weight3, Tensor hx_, Tensor cx_, bool reverse, int[] batch_sizes, int mode, int hidden_size, int num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train) -> (Tensor, Tensor, Tensor, Tensor)
|
3757
3866
|
dispatch:
|
3758
3867
|
CPU: mkldnn_rnn_layer
|
3868
|
+
MkldnnCPU: mkldnn_rnn_layer
|
3759
3869
|
autogen: mkldnn_rnn_layer.out
|
3760
3870
|
|
3761
3871
|
- func: mkldnn_rnn_layer_backward(Tensor input, Tensor weight1, Tensor weight2, Tensor weight3, Tensor weight4, Tensor hx_, Tensor cx_tmp, Tensor output, Tensor hy_, Tensor cy_, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, bool reverse, int mode, int hidden_size, int num_layers, bool has_biases, bool train, bool bidirectional, int[] batch_sizes, bool batch_first, Tensor workspace) -> (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor)
|
@@ -3800,6 +3910,8 @@
|
|
3800
3910
|
dispatch:
|
3801
3911
|
CUDA: miopen_rnn
|
3802
3912
|
autogen: miopen_rnn.out
|
3913
|
+
tags: nondeterministic_seeded
|
3914
|
+
|
3803
3915
|
|
3804
3916
|
- func: miopen_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])
|
3805
3917
|
dispatch:
|
@@ -3823,6 +3935,14 @@
|
|
3823
3935
|
SparseCPU, SparseCUDA: _sparse_mm_out
|
3824
3936
|
SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm_out
|
3825
3937
|
|
3938
|
+
- func: _int_mm(Tensor self, Tensor mat2) -> Tensor
|
3939
|
+
dispatch:
|
3940
|
+
CUDA: _int_mm_cuda
|
3941
|
+
|
3942
|
+
- func: _int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
|
3943
|
+
dispatch:
|
3944
|
+
CUDA: _int_mm_out_cuda
|
3945
|
+
|
3826
3946
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
3827
3947
|
python_module: sparse
|
3828
3948
|
|
@@ -3981,7 +4101,6 @@
|
|
3981
4101
|
CUDA: batch_norm_cuda
|
3982
4102
|
MPS: batch_norm_mps
|
3983
4103
|
MkldnnCPU: mkldnn_batch_norm
|
3984
|
-
tags: core
|
3985
4104
|
|
3986
4105
|
- func: native_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!))
|
3987
4106
|
dispatch:
|
@@ -3997,6 +4116,16 @@
|
|
3997
4116
|
MPS: _batch_norm_legit_mps
|
3998
4117
|
MkldnnCPU: _mkldnn_batch_norm_legit
|
3999
4118
|
autogen: _native_batch_norm_legit_functional
|
4119
|
+
tags: core
|
4120
|
+
|
4121
|
+
# HACK: identical to _native_batch_norm_legit, but training is known to be False,
|
4122
|
+
# So we known that running stats will not be mutated.
|
4123
|
+
# The real fix here is batch norm consolidation.
|
4124
|
+
- func: _native_batch_norm_legit_no_training(Tensor input, Tensor? weight, Tensor? bias, Tensor running_mean, Tensor running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor)
|
4125
|
+
dispatch:
|
4126
|
+
CompositeExplicitAutograd: _batch_norm_legit_no_training
|
4127
|
+
autogen: _native_batch_norm_legit_no_training.out
|
4128
|
+
tags: core
|
4000
4129
|
|
4001
4130
|
- func: _native_batch_norm_legit.out(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, bool training, float momentum, float eps, *, Tensor(d!) out, Tensor(e!) save_mean, Tensor(f!) save_invstd) -> (Tensor(d!), Tensor(e!), Tensor(f!))
|
4002
4131
|
dispatch:
|
@@ -4055,7 +4184,7 @@
|
|
4055
4184
|
CUDA: batch_norm_backward_reduce_cuda
|
4056
4185
|
autogen: batch_norm_backward_reduce.out
|
4057
4186
|
|
4058
|
-
- func: batch_norm_backward_elemt(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor
|
4187
|
+
- func: batch_norm_backward_elemt(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor sum_dy, Tensor sum_dy_xmu, Tensor count) -> Tensor
|
4059
4188
|
dispatch:
|
4060
4189
|
CUDA: batch_norm_backward_elemt_cuda
|
4061
4190
|
autogen: batch_norm_backward_elemt.out
|
@@ -4113,6 +4242,7 @@
|
|
4113
4242
|
CPU, CUDA: _cdist_forward
|
4114
4243
|
MPS: _cdist_forward_mps
|
4115
4244
|
autogen: _cdist_forward.out
|
4245
|
+
tags: core
|
4116
4246
|
|
4117
4247
|
- func: _cdist_backward(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist) -> Tensor
|
4118
4248
|
dispatch:
|
@@ -4125,6 +4255,7 @@
|
|
4125
4255
|
dispatch:
|
4126
4256
|
CPU, CUDA: _pdist_forward
|
4127
4257
|
autogen: _pdist_forward.out
|
4258
|
+
tags: core
|
4128
4259
|
|
4129
4260
|
- func: _pdist_backward(Tensor grad, Tensor self, float p, Tensor pdist) -> Tensor
|
4130
4261
|
dispatch:
|
@@ -4185,6 +4316,7 @@
|
|
4185
4316
|
CPU: pixel_shuffle_cpu
|
4186
4317
|
CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
|
4187
4318
|
autogen: pixel_shuffle.out
|
4319
|
+
tags: core
|
4188
4320
|
|
4189
4321
|
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
|
4190
4322
|
dispatch:
|
@@ -4194,7 +4326,7 @@
|
|
4194
4326
|
|
4195
4327
|
- func: channel_shuffle(Tensor self, int groups) -> Tensor
|
4196
4328
|
dispatch:
|
4197
|
-
CPU: channel_shuffle
|
4329
|
+
CPU, CUDA: channel_shuffle
|
4198
4330
|
QuantizedCPU: channel_shuffle_quantized_cpu
|
4199
4331
|
autogen: channel_shuffle.out
|
4200
4332
|
|
@@ -4294,7 +4426,7 @@
|
|
4294
4426
|
autogen: rand.generator_with_names_out
|
4295
4427
|
|
4296
4428
|
- func: rand(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4297
|
-
tags: nondeterministic_seeded
|
4429
|
+
tags: [core, nondeterministic_seeded]
|
4298
4430
|
dispatch:
|
4299
4431
|
CompositeExplicitAutograd: rand
|
4300
4432
|
|
@@ -4319,47 +4451,47 @@
|
|
4319
4451
|
CompositeExplicitAutograd: rand_like
|
4320
4452
|
autogen: rand_like.out
|
4321
4453
|
|
4322
|
-
- func: randint(
|
4454
|
+
- func: randint(SymInt high, SymInt[] size, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4323
4455
|
tags: nondeterministic_seeded
|
4324
4456
|
dispatch:
|
4325
4457
|
CompositeExplicitAutograd: randint
|
4326
4458
|
|
4327
|
-
- func: randint.generator(
|
4459
|
+
- func: randint.generator(SymInt high, SymInt[] size, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4328
4460
|
tags: nondeterministic_seeded
|
4329
4461
|
dispatch:
|
4330
4462
|
CompositeExplicitAutograd: randint
|
4331
4463
|
|
4332
|
-
- func: randint.low(
|
4464
|
+
- func: randint.low(SymInt low, SymInt high, SymInt[] size, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4333
4465
|
tags: nondeterministic_seeded
|
4334
4466
|
dispatch:
|
4335
4467
|
CompositeExplicitAutograd: randint
|
4336
4468
|
|
4337
|
-
- func: randint.low_generator(
|
4469
|
+
- func: randint.low_generator(SymInt low, SymInt high, SymInt[] size, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4338
4470
|
tags: nondeterministic_seeded
|
4339
4471
|
dispatch:
|
4340
4472
|
CompositeExplicitAutograd: randint
|
4341
4473
|
|
4342
|
-
- func: randint.out(
|
4474
|
+
- func: randint.out(SymInt high, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)
|
4343
4475
|
tags: nondeterministic_seeded
|
4344
4476
|
dispatch:
|
4345
4477
|
CompositeExplicitAutograd: randint_out
|
4346
4478
|
|
4347
|
-
- func: randint.generator_out(
|
4479
|
+
- func: randint.generator_out(SymInt high, SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)
|
4348
4480
|
tags: nondeterministic_seeded
|
4349
4481
|
dispatch:
|
4350
4482
|
CompositeExplicitAutograd: randint_out
|
4351
4483
|
|
4352
|
-
- func: randint.low_out(
|
4484
|
+
- func: randint.low_out(SymInt low, SymInt high, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)
|
4353
4485
|
tags: nondeterministic_seeded
|
4354
4486
|
dispatch:
|
4355
4487
|
CompositeExplicitAutograd: randint_out
|
4356
4488
|
|
4357
|
-
- func: randint.low_generator_out(
|
4489
|
+
- func: randint.low_generator_out(SymInt low, SymInt high, SymInt[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)
|
4358
4490
|
tags: nondeterministic_seeded
|
4359
4491
|
dispatch:
|
4360
4492
|
CompositeExplicitAutograd: randint_out
|
4361
4493
|
|
4362
|
-
- func: randint_like(Tensor self,
|
4494
|
+
- func: randint_like(Tensor self, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
4363
4495
|
tags: nondeterministic_seeded
|
4364
4496
|
dispatch:
|
4365
4497
|
# NB: Although this composite mutates on the inside, it is
|
@@ -4367,7 +4499,7 @@
|
|
4367
4499
|
CompositeExplicitAutograd: randint_like
|
4368
4500
|
autogen: randint_like.out
|
4369
4501
|
|
4370
|
-
- func: randint_like.low_dtype(Tensor self,
|
4502
|
+
- func: randint_like.low_dtype(Tensor self, SymInt low, SymInt high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
|
4371
4503
|
tags: nondeterministic_seeded
|
4372
4504
|
dispatch:
|
4373
4505
|
# NB: Although this composite mutates on the inside, it is
|
@@ -4376,7 +4508,7 @@
|
|
4376
4508
|
autogen: randint_like.low_dtype_out
|
4377
4509
|
|
4378
4510
|
- func: randn(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4379
|
-
tags: nondeterministic_seeded
|
4511
|
+
tags: [core, nondeterministic_seeded]
|
4380
4512
|
dispatch:
|
4381
4513
|
CompositeExplicitAutograd: randn
|
4382
4514
|
|
@@ -4412,25 +4544,25 @@
|
|
4412
4544
|
dispatch:
|
4413
4545
|
# NB: Although this composite mutates on the inside, it is
|
4414
4546
|
# non-differentiable so NonFunctional doesn't apply
|
4415
|
-
CompositeExplicitAutograd: randn_like
|
4547
|
+
CompositeExplicitAutograd, CompositeImplicitAutogradNestedTensor: randn_like
|
4416
4548
|
autogen: randn_like.out
|
4417
4549
|
|
4418
|
-
- func: randperm(
|
4419
|
-
tags: nondeterministic_seeded
|
4550
|
+
- func: randperm(SymInt n, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4551
|
+
tags: [core, nondeterministic_seeded]
|
4420
4552
|
dispatch:
|
4421
4553
|
CompositeExplicitAutograd: randperm
|
4422
4554
|
|
4423
|
-
- func: randperm.generator(
|
4555
|
+
- func: randperm.generator(SymInt n, *, Generator? generator, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
4424
4556
|
tags: nondeterministic_seeded
|
4425
4557
|
dispatch:
|
4426
4558
|
CompositeExplicitAutograd: randperm
|
4427
4559
|
|
4428
|
-
- func: randperm.out(
|
4560
|
+
- func: randperm.out(SymInt n, *, Tensor(a!) out) -> Tensor(a!)
|
4429
4561
|
tags: nondeterministic_seeded
|
4430
4562
|
dispatch:
|
4431
4563
|
CompositeExplicitAutograd: randperm_out
|
4432
4564
|
|
4433
|
-
- func: randperm.generator_out(
|
4565
|
+
- func: randperm.generator_out(SymInt n, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)
|
4434
4566
|
tags: nondeterministic_seeded
|
4435
4567
|
dispatch:
|
4436
4568
|
CPU: randperm_out_cpu
|
@@ -4591,7 +4723,7 @@
|
|
4591
4723
|
dispatch:
|
4592
4724
|
SparseCPU, SparseCUDA: round_sparse
|
4593
4725
|
SparseCsrCPU, SparseCsrCUDA: round_sparse_csr
|
4594
|
-
tags: pointwise
|
4726
|
+
tags: [core, pointwise]
|
4595
4727
|
|
4596
4728
|
- func: round_(Tensor(a!) self) -> Tensor(a!)
|
4597
4729
|
device_check: NoCheck # TensorIterator
|
@@ -4839,10 +4971,14 @@
|
|
4839
4971
|
- func: silu(Tensor self) -> Tensor
|
4840
4972
|
structured_delegate: silu.out
|
4841
4973
|
python_module: nn
|
4974
|
+
dispatch:
|
4975
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
|
4842
4976
|
|
4843
4977
|
- func: silu_(Tensor(a!) self) -> Tensor(a!)
|
4844
4978
|
structured_delegate: silu.out
|
4845
4979
|
python_module: nn
|
4980
|
+
dispatch:
|
4981
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
|
4846
4982
|
|
4847
4983
|
- func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
4848
4984
|
structured: True
|
@@ -4865,6 +5001,7 @@
|
|
4865
5001
|
python_module: nn
|
4866
5002
|
dispatch:
|
4867
5003
|
CompositeImplicitAutograd: math_silu_backward
|
5004
|
+
NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
|
4868
5005
|
|
4869
5006
|
- func: mish(Tensor self) -> Tensor
|
4870
5007
|
structured_delegate: mish.out
|
@@ -4917,6 +5054,7 @@
|
|
4917
5054
|
variants: function, method
|
4918
5055
|
dispatch:
|
4919
5056
|
CPU, CUDA: logit
|
5057
|
+
MPS: logit_mps
|
4920
5058
|
tags: pointwise
|
4921
5059
|
|
4922
5060
|
- func: logit_(Tensor(a!) self, float? eps=None) -> Tensor(a!)
|
@@ -4928,6 +5066,7 @@
|
|
4928
5066
|
- func: logit.out(Tensor self, float? eps=None, *, Tensor(a!) out) -> Tensor(a!)
|
4929
5067
|
dispatch:
|
4930
5068
|
CPU, CUDA: logit_out
|
5069
|
+
MPS: logit_out_mps
|
4931
5070
|
tags: pointwise
|
4932
5071
|
|
4933
5072
|
- func: sin(Tensor self) -> Tensor
|
@@ -5042,6 +5181,27 @@
|
|
5042
5181
|
device_check: NoCheck
|
5043
5182
|
device_guard: False
|
5044
5183
|
|
5184
|
+
- func: sym_size.int(Tensor self, int dim) -> SymInt
|
5185
|
+
variants: function
|
5186
|
+
device_check: NoCheck
|
5187
|
+
device_guard: False
|
5188
|
+
tags: core
|
5189
|
+
manual_cpp_binding: True
|
5190
|
+
|
5191
|
+
- func: sym_numel(Tensor self) -> SymInt
|
5192
|
+
variants: function
|
5193
|
+
device_check: NoCheck
|
5194
|
+
device_guard: False
|
5195
|
+
tags: core
|
5196
|
+
manual_cpp_binding: True
|
5197
|
+
|
5198
|
+
- func: sym_storage_offset(Tensor self) -> SymInt
|
5199
|
+
variants: function
|
5200
|
+
device_check: NoCheck
|
5201
|
+
device_guard: False
|
5202
|
+
tags: core
|
5203
|
+
manual_cpp_binding: True
|
5204
|
+
|
5045
5205
|
- func: slice.Tensor(Tensor(a) self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
|
5046
5206
|
variants: function, method
|
5047
5207
|
device_check: NoCheck
|
@@ -5066,7 +5226,7 @@
|
|
5066
5226
|
device_check: NoCheck
|
5067
5227
|
device_guard: False
|
5068
5228
|
dispatch:
|
5069
|
-
|
5229
|
+
CompositeExplicitAutogradNonFunctional: slice_scatter
|
5070
5230
|
autogen: slice_scatter.out
|
5071
5231
|
tags: core
|
5072
5232
|
|
@@ -5075,15 +5235,16 @@
|
|
5075
5235
|
device_check: NoCheck
|
5076
5236
|
device_guard: False
|
5077
5237
|
dispatch:
|
5078
|
-
|
5238
|
+
CompositeExplicitAutogradNonFunctional: select_scatter_symint
|
5079
5239
|
autogen: select_scatter.out
|
5240
|
+
tags: core
|
5080
5241
|
|
5081
5242
|
- func: diagonal_scatter(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1) -> Tensor
|
5082
5243
|
variants: function, method
|
5083
5244
|
device_check: NoCheck
|
5084
5245
|
device_guard: False
|
5085
5246
|
dispatch:
|
5086
|
-
|
5247
|
+
CompositeExplicitAutogradNonFunctional: diagonal_scatter
|
5087
5248
|
autogen: diagonal_scatter.out
|
5088
5249
|
|
5089
5250
|
- func: as_strided_scatter(Tensor self, Tensor src, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor
|
@@ -5091,7 +5252,7 @@
|
|
5091
5252
|
device_check: NoCheck
|
5092
5253
|
device_guard: False
|
5093
5254
|
dispatch:
|
5094
|
-
|
5255
|
+
CompositeExplicitAutogradNonFunctional: as_strided_scatter_symint
|
5095
5256
|
autogen: as_strided_scatter.out
|
5096
5257
|
|
5097
5258
|
- func: smm(Tensor self, Tensor mat2) -> Tensor
|
@@ -5170,6 +5331,8 @@
|
|
5170
5331
|
device_guard: False
|
5171
5332
|
dispatch:
|
5172
5333
|
CompositeExplicitAutograd: split_with_sizes
|
5334
|
+
NestedTensorCPU, NestedTensorCUDA: split_with_sizes_nested
|
5335
|
+
tags: core
|
5173
5336
|
|
5174
5337
|
- func: hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
|
5175
5338
|
variants: function, method
|
@@ -5316,6 +5479,13 @@
|
|
5316
5479
|
device_check: NoCheck
|
5317
5480
|
device_guard: False
|
5318
5481
|
|
5482
|
+
- func: sym_stride.int(Tensor self, int dim) -> SymInt
|
5483
|
+
variants: function
|
5484
|
+
device_check: NoCheck
|
5485
|
+
device_guard: False
|
5486
|
+
tags: core
|
5487
|
+
manual_cpp_binding: True
|
5488
|
+
|
5319
5489
|
- func: sum(Tensor self, *, ScalarType? dtype=None) -> Tensor
|
5320
5490
|
device_check: NoCheck # TensorIterator
|
5321
5491
|
variants: function, method
|
@@ -5326,12 +5496,14 @@
|
|
5326
5496
|
autogen: sum.out
|
5327
5497
|
|
5328
5498
|
- func: sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
5499
|
+
# TODO: Align the signature of sum.dim_IntList and _sparse_csr_sum.dim_dtype
|
5329
5500
|
structured_delegate: sum.IntList_out
|
5330
5501
|
device_check: NoCheck # TensorIterator
|
5331
5502
|
variants: function, method
|
5332
5503
|
dispatch:
|
5333
5504
|
NestedTensorCPU: NestedTensor_sum_dim_CPU
|
5334
5505
|
SparseCPU, SparseCUDA: sum_sparse_coo
|
5506
|
+
SparseCsrCPU, SparseCsrCUDA: sum_sparse_compressed
|
5335
5507
|
tags: core
|
5336
5508
|
|
5337
5509
|
- func: sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
@@ -5364,10 +5536,12 @@
|
|
5364
5536
|
CPU, CUDA: nansum_out
|
5365
5537
|
MPS: nansum_out_mps
|
5366
5538
|
|
5367
|
-
- func: sum_to_size(Tensor self,
|
5539
|
+
- func: sum_to_size(Tensor self, SymInt[] size) -> Tensor
|
5368
5540
|
variants: method
|
5369
5541
|
device_check: NoCheck
|
5370
5542
|
device_guard: False
|
5543
|
+
dispatch:
|
5544
|
+
CompositeImplicitAutograd: sum_to_size_symint
|
5371
5545
|
|
5372
5546
|
- func: sqrt(Tensor self) -> Tensor
|
5373
5547
|
device_check: NoCheck # TensorIterator
|
@@ -5421,7 +5595,7 @@
|
|
5421
5595
|
variants: function, method
|
5422
5596
|
cpp_no_default_args: ["unbiased"]
|
5423
5597
|
|
5424
|
-
- func: std.correction(Tensor self, int[1]? dim=None, *,
|
5598
|
+
- func: std.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor
|
5425
5599
|
device_check: NoCheck # TensorIterator
|
5426
5600
|
variants: function, method
|
5427
5601
|
dispatch:
|
@@ -5439,7 +5613,7 @@
|
|
5439
5613
|
variants: function
|
5440
5614
|
cpp_no_default_args: ["unbiased"]
|
5441
5615
|
|
5442
|
-
- func: std_mean.correction(Tensor self, int[1]? dim=None, *,
|
5616
|
+
- func: std_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
|
5443
5617
|
device_check: NoCheck # TensorIterator
|
5444
5618
|
variants: function
|
5445
5619
|
dispatch:
|
@@ -5451,7 +5625,7 @@
|
|
5451
5625
|
variants: function
|
5452
5626
|
cpp_no_default_args: ["unbiased"]
|
5453
5627
|
|
5454
|
-
- func: std_mean.correction_names(Tensor self, Dimname[1] dim, *,
|
5628
|
+
- func: std_mean.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
|
5455
5629
|
device_check: NoCheck # TensorIterator
|
5456
5630
|
variants: function
|
5457
5631
|
|
@@ -5459,7 +5633,7 @@
|
|
5459
5633
|
device_check: NoCheck # TensorIterator
|
5460
5634
|
cpp_no_default_args: ["unbiased"]
|
5461
5635
|
|
5462
|
-
- func: std.correction_out(Tensor self, int[1]? dim=None, *,
|
5636
|
+
- func: std.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
|
5463
5637
|
device_check: NoCheck # TensorIterator
|
5464
5638
|
dispatch:
|
5465
5639
|
CPU, CUDA: std_out
|
@@ -5474,11 +5648,11 @@
|
|
5474
5648
|
device_check: NoCheck # TensorIterator
|
5475
5649
|
cpp_no_default_args: ["unbiased"]
|
5476
5650
|
|
5477
|
-
- func: std.correction_names(Tensor self, Dimname[1] dim, *,
|
5651
|
+
- func: std.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor
|
5478
5652
|
device_check: NoCheck # TensorIterator
|
5479
5653
|
variants: function, method
|
5480
5654
|
|
5481
|
-
- func: std.correction_names_out(Tensor self, Dimname[1] dim, *,
|
5655
|
+
- func: std.correction_names_out(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
|
5482
5656
|
device_check: NoCheck # TensorIterator
|
5483
5657
|
variants: function
|
5484
5658
|
|
@@ -5489,11 +5663,13 @@
|
|
5489
5663
|
CPU, CUDA: prod
|
5490
5664
|
MPS: prod_mps
|
5491
5665
|
autogen: prod.out
|
5666
|
+
tags: core
|
5492
5667
|
|
5493
5668
|
- func: prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
5494
5669
|
structured_delegate: prod.int_out
|
5495
5670
|
device_check: NoCheck # TensorIterator
|
5496
5671
|
variants: function, method
|
5672
|
+
tags: core
|
5497
5673
|
|
5498
5674
|
- func: prod.int_out(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
|
5499
5675
|
structured: True
|
@@ -5531,7 +5707,7 @@
|
|
5531
5707
|
dispatch:
|
5532
5708
|
SparseCPU, SparseCUDA: tan_sparse
|
5533
5709
|
SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr
|
5534
|
-
tags: pointwise
|
5710
|
+
tags: [core, pointwise]
|
5535
5711
|
|
5536
5712
|
- func: tan_(Tensor(a!) self) -> Tensor(a!)
|
5537
5713
|
device_check: NoCheck # TensorIterator
|
@@ -5592,8 +5768,6 @@
|
|
5592
5768
|
|
5593
5769
|
- func: tensordot.out(Tensor self, Tensor other, int[] dims_self, int[] dims_other, *, Tensor(a!) out) -> Tensor(a!)
|
5594
5770
|
variants: function
|
5595
|
-
dispatch:
|
5596
|
-
CPU, CUDA: tensordot_out
|
5597
5771
|
|
5598
5772
|
# TODO: namespace threshold in 'nn'
|
5599
5773
|
- func: threshold(Tensor self, Scalar threshold, Scalar value) -> Tensor
|
@@ -5635,8 +5809,10 @@
|
|
5635
5809
|
NestedTensorCPU, NestedTensorCUDA: threshold_backwards_nested
|
5636
5810
|
tags: pointwise
|
5637
5811
|
|
5638
|
-
- func: tile(Tensor self,
|
5812
|
+
- func: tile(Tensor self, SymInt[] dims) -> Tensor
|
5639
5813
|
variants: function, method
|
5814
|
+
dispatch:
|
5815
|
+
CompositeImplicitAutograd: tile_symint
|
5640
5816
|
|
5641
5817
|
- func: transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a)
|
5642
5818
|
variants: function, method
|
@@ -5691,12 +5867,13 @@
|
|
5691
5867
|
- func: flipud(Tensor self) -> Tensor
|
5692
5868
|
variants: function, method
|
5693
5869
|
|
5694
|
-
- func: roll(Tensor self,
|
5870
|
+
- func: roll(Tensor self, SymInt[1] shifts, int[1] dims=[]) -> Tensor
|
5695
5871
|
variants: function, method
|
5696
5872
|
dispatch:
|
5697
|
-
CPU:
|
5873
|
+
CPU, MPS: roll
|
5698
5874
|
CUDA: roll_cuda
|
5699
5875
|
autogen: roll.out
|
5876
|
+
tags: core
|
5700
5877
|
|
5701
5878
|
# default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
|
5702
5879
|
|
@@ -5750,10 +5927,11 @@
|
|
5750
5927
|
NestedTensorCPU, NestedTensorCUDA: _nested_tensor_strides
|
5751
5928
|
autogen: _nested_tensor_strides.out
|
5752
5929
|
|
5753
|
-
- func:
|
5930
|
+
- func: _nested_tensor_storage_offsets(Tensor self) -> Tensor
|
5754
5931
|
variants: method
|
5755
5932
|
dispatch:
|
5756
|
-
NestedTensorCPU, NestedTensorCUDA:
|
5933
|
+
NestedTensorCPU, NestedTensorCUDA, NestedTensorMeta: _nested_tensor_storage_offsets
|
5934
|
+
autogen: _nested_tensor_storage_offsets.out
|
5757
5935
|
|
5758
5936
|
# _nested_from_padded is not usable from Python, so
|
5759
5937
|
# _nested_from_padded_and_nested_example is available for testing.
|
@@ -5764,13 +5942,13 @@
|
|
5764
5942
|
|
5765
5943
|
# The input arguments' types to this functions are temporary. When nested tensors switch to using SymInts for their metadata representation
|
5766
5944
|
# this will need to be updated
|
5767
|
-
- func: _nested_view_from_buffer(Tensor(a) self, Tensor nested_size, Tensor nested_strides,
|
5945
|
+
- func: _nested_view_from_buffer(Tensor(a) self, Tensor nested_size, Tensor nested_strides, Tensor offsets) -> Tensor(a)
|
5768
5946
|
variants: function
|
5769
5947
|
device_check: NoCheck
|
5770
5948
|
dispatch:
|
5771
5949
|
CPU, CUDA: _nested_view_from_buffer
|
5772
5950
|
|
5773
|
-
- func: _nested_view_from_buffer_copy(Tensor self, Tensor nested_size, Tensor nested_strides,
|
5951
|
+
- func: _nested_view_from_buffer_copy(Tensor self, Tensor nested_size, Tensor nested_strides, Tensor offsets) -> Tensor
|
5774
5952
|
variants: function
|
5775
5953
|
device_check: NoCheck
|
5776
5954
|
tags: view_copy
|
@@ -5913,18 +6091,19 @@
|
|
5913
6091
|
tags: core
|
5914
6092
|
cpp_no_default_args: ["unbiased"]
|
5915
6093
|
|
5916
|
-
- func: var.correction(Tensor self, int[1]? dim=None, *,
|
6094
|
+
- func: var.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor
|
5917
6095
|
device_check: NoCheck # TensorIterator
|
5918
6096
|
variants: function, method
|
5919
6097
|
dispatch:
|
5920
6098
|
CPU, CUDA: var
|
5921
6099
|
MPS: var_mps
|
6100
|
+
tags: core
|
5922
6101
|
|
5923
6102
|
- func: var.out(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
|
5924
6103
|
device_check: NoCheck # TensorIterator
|
5925
6104
|
cpp_no_default_args: ["unbiased"]
|
5926
6105
|
|
5927
|
-
- func: var.correction_out(Tensor self, int[1]? dim=None, *,
|
6106
|
+
- func: var.correction_out(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
|
5928
6107
|
device_check: NoCheck # TensorIterator
|
5929
6108
|
dispatch:
|
5930
6109
|
CPU, CUDA: var_out
|
@@ -5938,11 +6117,11 @@
|
|
5938
6117
|
device_check: NoCheck # TensorIterator
|
5939
6118
|
cpp_no_default_args: ["unbiased"]
|
5940
6119
|
|
5941
|
-
- func: var.correction_names(Tensor self, Dimname[1] dim, *,
|
6120
|
+
- func: var.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor
|
5942
6121
|
device_check: NoCheck # TensorIterator
|
5943
6122
|
variants: function, method
|
5944
6123
|
|
5945
|
-
- func: var.correction_names_out(Tensor self, Dimname[1] dim, *,
|
6124
|
+
- func: var.correction_names_out(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False, Tensor(a!) out) -> Tensor(a!)
|
5946
6125
|
device_check: NoCheck # TensorIterator
|
5947
6126
|
variants: function
|
5948
6127
|
|
@@ -5956,7 +6135,7 @@
|
|
5956
6135
|
variants: function
|
5957
6136
|
cpp_no_default_args: ["unbiased"]
|
5958
6137
|
|
5959
|
-
- func: var_mean.correction(Tensor self, int[1]? dim=None, *,
|
6138
|
+
- func: var_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
|
5960
6139
|
device_check: NoCheck # TensorIterator
|
5961
6140
|
variants: function
|
5962
6141
|
dispatch:
|
@@ -5968,7 +6147,7 @@
|
|
5968
6147
|
variants: function
|
5969
6148
|
cpp_no_default_args: ["unbiased"]
|
5970
6149
|
|
5971
|
-
- func: var_mean.correction_names(Tensor self, Dimname[1] dim, *,
|
6150
|
+
- func: var_mean.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
|
5972
6151
|
device_check: NoCheck # TensorIterator
|
5973
6152
|
variants: function
|
5974
6153
|
|
@@ -6036,7 +6215,7 @@
|
|
6036
6215
|
CompositeExplicitAutograd: zeros
|
6037
6216
|
autogen: zeros.names_out
|
6038
6217
|
|
6039
|
-
- func: _efficientzerotensor(
|
6218
|
+
- func: _efficientzerotensor(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6040
6219
|
dispatch:
|
6041
6220
|
CPU: _efficientzerotensor
|
6042
6221
|
CUDA: _efficientzerotensor_cuda
|
@@ -6056,7 +6235,7 @@
|
|
6056
6235
|
dispatch:
|
6057
6236
|
# NB: Although this composite mutates on the inside, it is
|
6058
6237
|
# non-differentiable so NonFunctional doesn't apply
|
6059
|
-
CompositeExplicitAutograd: zeros_like
|
6238
|
+
CompositeExplicitAutograd, CompositeImplicitAutogradNestedTensor: zeros_like
|
6060
6239
|
autogen: zeros_like.out
|
6061
6240
|
|
6062
6241
|
- func: _standard_gamma_grad(Tensor self, Tensor output) -> Tensor
|
@@ -6297,7 +6476,7 @@
|
|
6297
6476
|
QuantizedCPU, QuantizedCUDA: quantized_clone
|
6298
6477
|
NestedTensorCPU, NestedTensorCUDA: clone_nested
|
6299
6478
|
autogen: clone.out
|
6300
|
-
tags: core
|
6479
|
+
tags: [core, pointwise]
|
6301
6480
|
|
6302
6481
|
- func: positive(Tensor(a) self) -> Tensor(a)
|
6303
6482
|
variants: function, method
|
@@ -6309,6 +6488,7 @@
|
|
6309
6488
|
dispatch:
|
6310
6489
|
CompositeExplicitAutograd: resize_as_
|
6311
6490
|
autogen: resize_as, resize_as.out
|
6491
|
+
tags: inplace_view
|
6312
6492
|
|
6313
6493
|
- func: resize_as_sparse_(Tensor(a!) self, Tensor the_template) -> Tensor(a!)
|
6314
6494
|
use_const_ref_for_mutable_tensors: True
|
@@ -6328,6 +6508,7 @@
|
|
6328
6508
|
SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
|
6329
6509
|
SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
|
6330
6510
|
MkldnnCPU: mkldnn_zero_
|
6511
|
+
NestedTensorCPU, NestedTensorCUDA: zero_nested_
|
6331
6512
|
autogen: zero, zero.out
|
6332
6513
|
|
6333
6514
|
- func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
|
@@ -6347,6 +6528,7 @@
|
|
6347
6528
|
dispatch:
|
6348
6529
|
SparseCPU, SparseCUDA: sub_sparse
|
6349
6530
|
ZeroTensor: sub_zerotensor
|
6531
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sub_Tensor
|
6350
6532
|
tags: [core, pointwise]
|
6351
6533
|
|
6352
6534
|
- func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
|
@@ -6493,6 +6675,16 @@
|
|
6493
6675
|
structured_delegate: _addmm_activation.out
|
6494
6676
|
variants: function, method
|
6495
6677
|
|
6678
|
+
- func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None) -> (Tensor, Tensor)
|
6679
|
+
variants: function
|
6680
|
+
dispatch:
|
6681
|
+
CUDA: _scaled_mm_cuda
|
6682
|
+
|
6683
|
+
- func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
|
6684
|
+
variants: function
|
6685
|
+
dispatch:
|
6686
|
+
CUDA: _scaled_mm_out_cuda
|
6687
|
+
|
6496
6688
|
# NOTE [ Sparse: autograd and API ]
|
6497
6689
|
#
|
6498
6690
|
#
|
@@ -6605,12 +6797,17 @@
|
|
6605
6797
|
# the default would never make sense.
|
6606
6798
|
|
6607
6799
|
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6800
|
+
dispatch:
|
6801
|
+
CompositeExplicitAutograd: sparse_compressed_tensor
|
6802
|
+
|
6608
6803
|
- func: sparse_csr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6609
6804
|
- func: sparse_csc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6610
6805
|
- func: sparse_bsr_tensor.crow_col_value_size(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6611
6806
|
- func: sparse_bsc_tensor.ccol_row_value_size(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6612
6807
|
|
6613
6808
|
- func: sparse_compressed_tensor.comp_plain_value(Tensor compressed_indices, Tensor plain_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6809
|
+
dispatch:
|
6810
|
+
CompositeExplicitAutograd: sparse_compressed_tensor
|
6614
6811
|
- func: sparse_csr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6615
6812
|
- func: sparse_csc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6616
6813
|
- func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
@@ -6627,15 +6824,15 @@
|
|
6627
6824
|
CompositeExplicitAutograd: sparse_coo_tensor
|
6628
6825
|
autogen: sparse_coo_tensor.size_out
|
6629
6826
|
|
6630
|
-
- func: sparse_coo_tensor.indices(Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6827
|
+
- func: sparse_coo_tensor.indices(Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor
|
6631
6828
|
|
6632
|
-
- func: sparse_coo_tensor.indices_size(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6829
|
+
- func: sparse_coo_tensor.indices_size(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor
|
6633
6830
|
|
6634
|
-
- func: _sparse_coo_tensor_unsafe(Tensor indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6831
|
+
- func: _sparse_coo_tensor_unsafe(Tensor indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool? is_coalesced=None) -> Tensor
|
6635
6832
|
dispatch:
|
6636
6833
|
CompositeImplicitAutograd: _sparse_coo_tensor_unsafe_symint
|
6637
6834
|
|
6638
|
-
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size) -> ()
|
6835
|
+
- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size, bool? is_coalesced=None) -> ()
|
6639
6836
|
|
6640
6837
|
- func: _validate_sparse_compressed_tensor_args(Tensor compressed_indices, Tensor plain_indices, Tensor values, int[] size, Layout layout) -> ()
|
6641
6838
|
- func: _validate_sparse_csr_tensor_args(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size) -> ()
|
@@ -6648,7 +6845,7 @@
|
|
6648
6845
|
SparseCPU, SparseCUDA, SparseMeta, Meta: new_with_dims_sparse
|
6649
6846
|
autogen: _sparse_coo_tensor_with_dims.out
|
6650
6847
|
|
6651
|
-
- func: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6848
|
+
- func: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor
|
6652
6849
|
dispatch:
|
6653
6850
|
SparseCPU, SparseCUDA, SparseMeta, Meta: new_with_dims_and_tensor_sparse_symint
|
6654
6851
|
autogen: _sparse_coo_tensor_with_dims_and_tensors.out
|
@@ -6671,17 +6868,23 @@
|
|
6671
6868
|
variants: method
|
6672
6869
|
dispatch:
|
6673
6870
|
SparseCPU, SparseCUDA: sparse_mask
|
6674
|
-
SparseCsrCPU, SparseCsrCUDA:
|
6871
|
+
SparseCsrCPU, SparseCsrCUDA: sparse_mask_sparse_compressed
|
6675
6872
|
autogen: sparse_mask.out
|
6676
6873
|
|
6874
|
+
- func: _sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor
|
6875
|
+
variants: method
|
6876
|
+
dispatch:
|
6877
|
+
SparseCPU, SparseCUDA: sparse_mask_projection
|
6878
|
+
autogen: _sparse_mask_projection.out
|
6879
|
+
|
6677
6880
|
- func: _to_cpu(Tensor[] tensors) -> Tensor[]
|
6678
6881
|
variants: function
|
6679
6882
|
|
6680
|
-
- func: to_dense(Tensor self, ScalarType? dtype=None) -> Tensor
|
6883
|
+
- func: to_dense(Tensor self, ScalarType? dtype=None, *, bool? masked_grad=None) -> Tensor
|
6681
6884
|
variants: method
|
6682
6885
|
|
6683
6886
|
# Special case of to_dense with custom derivative
|
6684
|
-
- func: _to_dense(Tensor self, ScalarType? dtype=None) -> Tensor
|
6887
|
+
- func: _to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor
|
6685
6888
|
variants: method
|
6686
6889
|
dispatch:
|
6687
6890
|
SparseCPU, SparseCUDA: sparse_to_dense
|
@@ -6689,7 +6892,7 @@
|
|
6689
6892
|
MkldnnCPU: mkldnn_to_dense
|
6690
6893
|
autogen: _to_dense.out
|
6691
6894
|
|
6692
|
-
- func: to_dense_backward(Tensor grad, Tensor input) -> Tensor
|
6895
|
+
- func: to_dense_backward(Tensor grad, Tensor input, bool? masked_grad=None) -> Tensor
|
6693
6896
|
|
6694
6897
|
- func: sparse_dim(Tensor self) -> int
|
6695
6898
|
variants: method
|
@@ -6859,51 +7062,80 @@
|
|
6859
7062
|
|
6860
7063
|
- func: to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
|
6861
7064
|
variants: method
|
7065
|
+
|
7066
|
+
# Special case of to_sparse.sparse_dim with custom derivative
|
7067
|
+
- func: _to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
|
7068
|
+
variants: method
|
6862
7069
|
dispatch:
|
6863
7070
|
CPU, CUDA: dense_to_sparse
|
6864
7071
|
SparseCPU, SparseCUDA: sparse_coo_to_sparse
|
6865
7072
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
|
6866
|
-
autogen:
|
7073
|
+
autogen: _to_sparse.sparse_dim_out
|
6867
7074
|
|
6868
7075
|
- func: to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
|
6869
7076
|
variants: method
|
7077
|
+
|
7078
|
+
# Special case of to_sparse with custom derivative
|
7079
|
+
- func: _to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
|
7080
|
+
variants: method
|
6870
7081
|
dispatch:
|
6871
7082
|
CPU, CUDA: dense_to_sparse
|
6872
7083
|
SparseCPU, SparseCUDA: sparse_coo_to_sparse
|
6873
7084
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
|
6874
|
-
autogen:
|
7085
|
+
autogen: _to_sparse.out
|
6875
7086
|
|
6876
7087
|
- func: to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
|
6877
7088
|
variants: method
|
7089
|
+
|
7090
|
+
# Special case of to_sparse_csr with custom derivative
|
7091
|
+
- func: _to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
|
7092
|
+
variants: method
|
6878
7093
|
dispatch:
|
6879
7094
|
CPU, CUDA: dense_to_sparse_csr
|
6880
7095
|
SparseCPU, SparseCUDA: coo_to_sparse_csr
|
6881
7096
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csr
|
6882
|
-
autogen:
|
7097
|
+
autogen: _to_sparse_csr.out
|
6883
7098
|
|
6884
7099
|
- func: to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
|
6885
7100
|
variants: method
|
7101
|
+
|
7102
|
+
# Special case of to_sparse_csc with custom derivative
|
7103
|
+
- func: _to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
|
7104
|
+
variants: method
|
6886
7105
|
dispatch:
|
6887
7106
|
CPU, CUDA: dense_to_sparse_csc
|
6888
7107
|
SparseCPU, SparseCUDA: coo_to_sparse_csc
|
6889
7108
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csc
|
6890
|
-
autogen:
|
7109
|
+
autogen: _to_sparse_csc.out
|
6891
7110
|
|
6892
7111
|
- func: to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
|
6893
7112
|
variants: method
|
7113
|
+
|
7114
|
+
# Special case of to_sparse_bsr with custom derivative
|
7115
|
+
- func: _to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
|
7116
|
+
variants: method
|
6894
7117
|
dispatch:
|
6895
7118
|
CPU, CUDA: dense_to_sparse_bsr
|
6896
7119
|
SparseCPU, SparseCUDA: coo_to_sparse_bsr
|
6897
7120
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsr
|
6898
|
-
autogen:
|
7121
|
+
autogen: _to_sparse_bsr.out
|
6899
7122
|
|
6900
7123
|
- func: to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
|
6901
7124
|
variants: method
|
7125
|
+
|
7126
|
+
# Special case of to_sparse_bsc with custom derivative
|
7127
|
+
- func: _to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
|
7128
|
+
variants: method
|
6902
7129
|
dispatch:
|
6903
7130
|
CPU, CUDA: dense_to_sparse_bsc
|
6904
7131
|
SparseCPU, SparseCUDA: coo_to_sparse_bsc
|
6905
7132
|
SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsc
|
6906
|
-
autogen:
|
7133
|
+
autogen: _to_sparse_bsc.out
|
7134
|
+
|
7135
|
+
- func: _to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor)
|
7136
|
+
variants: function
|
7137
|
+
dispatch:
|
7138
|
+
CUDA: _to_sparse_semi_structured
|
6907
7139
|
|
6908
7140
|
- func: to_mkldnn(Tensor self, ScalarType? dtype=None) -> Tensor
|
6909
7141
|
variants: method
|
@@ -7174,7 +7406,7 @@
|
|
7174
7406
|
|
7175
7407
|
# NB: Does NOT check precondition that numel == 1
|
7176
7408
|
- func: _local_scalar_dense(Tensor self) -> Scalar
|
7177
|
-
tags: data_dependent_output
|
7409
|
+
tags: [core, data_dependent_output]
|
7178
7410
|
dispatch:
|
7179
7411
|
CPU: _local_scalar_dense_cpu
|
7180
7412
|
CUDA: _local_scalar_dense_cuda
|
@@ -7187,8 +7419,9 @@
|
|
7187
7419
|
dispatch:
|
7188
7420
|
MPS: _lstm_mps
|
7189
7421
|
autogen: _lstm_mps.out
|
7422
|
+
tags: nondeterministic_seeded
|
7190
7423
|
|
7191
|
-
- func: lstm_mps_backward(Tensor grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor[], Tensor[])
|
7424
|
+
- func: lstm_mps_backward(Tensor? grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor[], Tensor[])
|
7192
7425
|
dispatch:
|
7193
7426
|
MPS: lstm_mps_backward
|
7194
7427
|
autogen: lstm_mps_backward.out
|
@@ -7226,20 +7459,28 @@
|
|
7226
7459
|
|
7227
7460
|
# RNN cells and layers
|
7228
7461
|
- func: lstm.input(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor)
|
7462
|
+
tags: nondeterministic_seeded
|
7229
7463
|
|
7230
7464
|
- func: lstm.data(Tensor data, Tensor batch_sizes, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor, Tensor)
|
7465
|
+
tags: nondeterministic_seeded
|
7231
7466
|
|
7232
7467
|
- func: gru.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
|
7468
|
+
tags: nondeterministic_seeded
|
7233
7469
|
|
7234
7470
|
- func: gru.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
|
7471
|
+
tags: nondeterministic_seeded
|
7235
7472
|
|
7236
7473
|
- func: rnn_tanh.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
|
7474
|
+
tags: nondeterministic_seeded
|
7237
7475
|
|
7238
7476
|
- func: rnn_tanh.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
|
7477
|
+
tags: nondeterministic_seeded
|
7239
7478
|
|
7240
7479
|
- func: rnn_relu.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
|
7480
|
+
tags: nondeterministic_seeded
|
7241
7481
|
|
7242
7482
|
- func: rnn_relu.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
|
7483
|
+
tags: nondeterministic_seeded
|
7243
7484
|
|
7244
7485
|
- func: lstm_cell(Tensor input, Tensor[] hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> (Tensor, Tensor)
|
7245
7486
|
|
@@ -7382,6 +7623,7 @@
|
|
7382
7623
|
variants: function, method
|
7383
7624
|
dispatch:
|
7384
7625
|
CompositeExplicitAutograd: masked_fill
|
7626
|
+
NestedTensorCPU, NestedTensorCUDA: NestedTensor_masked_fill
|
7385
7627
|
tags: pointwise
|
7386
7628
|
|
7387
7629
|
- func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
|
@@ -7406,6 +7648,7 @@
|
|
7406
7648
|
dispatch:
|
7407
7649
|
CPU: masked_scatter__cpu
|
7408
7650
|
CUDA: masked_scatter__cuda
|
7651
|
+
MPS: masked_scatter__mps
|
7409
7652
|
autogen: masked_scatter.out
|
7410
7653
|
|
7411
7654
|
- func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor
|
@@ -7503,6 +7746,7 @@
|
|
7503
7746
|
dispatch:
|
7504
7747
|
CPU: index_fill_
|
7505
7748
|
CUDA: index_fill_
|
7749
|
+
MPS: index_fill_mps_
|
7506
7750
|
autogen: index_fill.int_Scalar_out
|
7507
7751
|
|
7508
7752
|
- func: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
|
@@ -7516,6 +7760,7 @@
|
|
7516
7760
|
variants: method
|
7517
7761
|
dispatch:
|
7518
7762
|
CPU, CUDA: index_fill_
|
7763
|
+
MPS: index_fill_mps_
|
7519
7764
|
autogen: index_fill.int_Tensor_out
|
7520
7765
|
|
7521
7766
|
- func: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor
|
@@ -7543,6 +7788,7 @@
|
|
7543
7788
|
- func: scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor
|
7544
7789
|
structured_delegate: scatter.src_out
|
7545
7790
|
variants: function, method
|
7791
|
+
tags: core
|
7546
7792
|
|
7547
7793
|
- func: scatter_.src(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!)
|
7548
7794
|
structured_delegate: scatter.src_out
|
@@ -7558,6 +7804,7 @@
|
|
7558
7804
|
- func: scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
|
7559
7805
|
structured_delegate: scatter.value_out
|
7560
7806
|
variants: function, method
|
7807
|
+
tags: core
|
7561
7808
|
|
7562
7809
|
- func: scatter_.value(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)
|
7563
7810
|
structured_delegate: scatter.value_out
|
@@ -7657,6 +7904,7 @@
|
|
7657
7904
|
variants: function
|
7658
7905
|
dispatch:
|
7659
7906
|
CPU, CUDA: bitwise_and_out
|
7907
|
+
MPS: bitwise_and_out_mps
|
7660
7908
|
tags: pointwise
|
7661
7909
|
|
7662
7910
|
- func: bitwise_and.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -7671,7 +7919,7 @@
|
|
7671
7919
|
variants: method, function
|
7672
7920
|
dispatch:
|
7673
7921
|
CompositeExplicitAutograd: bitwise_and
|
7674
|
-
tags: pointwise
|
7922
|
+
tags: [core, pointwise]
|
7675
7923
|
|
7676
7924
|
- func: bitwise_and.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
7677
7925
|
device_check: NoCheck # TensorIterator
|
@@ -7721,6 +7969,7 @@
|
|
7721
7969
|
variants: function
|
7722
7970
|
dispatch:
|
7723
7971
|
CPU, CUDA: bitwise_or_out
|
7972
|
+
MPS: bitwise_or_out_mps
|
7724
7973
|
tags: pointwise
|
7725
7974
|
|
7726
7975
|
- func: bitwise_or.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -7733,7 +7982,7 @@
|
|
7733
7982
|
- func: bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor
|
7734
7983
|
device_check: NoCheck # TensorIterator
|
7735
7984
|
variants: method, function
|
7736
|
-
tags: pointwise
|
7985
|
+
tags: [core, pointwise]
|
7737
7986
|
|
7738
7987
|
- func: bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
7739
7988
|
device_check: NoCheck # TensorIterator
|
@@ -7783,6 +8032,7 @@
|
|
7783
8032
|
variants: function
|
7784
8033
|
dispatch:
|
7785
8034
|
CPU, CUDA: bitwise_xor_out
|
8035
|
+
MPS: bitwise_xor_out_mps
|
7786
8036
|
tags: pointwise
|
7787
8037
|
|
7788
8038
|
- func: bitwise_xor.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -7795,7 +8045,7 @@
|
|
7795
8045
|
- func: bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor
|
7796
8046
|
device_check: NoCheck # TensorIterator
|
7797
8047
|
variants: method, function
|
7798
|
-
tags: pointwise
|
8048
|
+
tags: [core, pointwise]
|
7799
8049
|
|
7800
8050
|
- func: bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
|
7801
8051
|
device_check: NoCheck # TensorIterator
|
@@ -8067,6 +8317,7 @@
|
|
8067
8317
|
variants: method
|
8068
8318
|
dispatch:
|
8069
8319
|
CPU, CUDA: random_
|
8320
|
+
MPS: random_mps_
|
8070
8321
|
Meta: random_meta_
|
8071
8322
|
autogen: random, random.out
|
8072
8323
|
|
@@ -8164,7 +8415,7 @@
|
|
8164
8415
|
dispatch:
|
8165
8416
|
CPU: trace_cpu
|
8166
8417
|
CUDA: trace_cuda
|
8167
|
-
MPS:
|
8418
|
+
MPS: trace_mps
|
8168
8419
|
autogen: trace.out
|
8169
8420
|
|
8170
8421
|
- func: trace_backward(Tensor grad, SymInt[] sizes) -> Tensor
|
@@ -8604,6 +8855,15 @@
|
|
8604
8855
|
MPS: nonzero_mps
|
8605
8856
|
tags: [dynamic_output_shape, core]
|
8606
8857
|
|
8858
|
+
- func: nonzero_static.out(Tensor self, *, int size, int fill_value=-1, Tensor(a!) out) -> Tensor(a!)
|
8859
|
+
dispatch:
|
8860
|
+
CPU: nonzero_static_out_cpu
|
8861
|
+
|
8862
|
+
- func: nonzero_static(Tensor self, *, int size, int fill_value=-1) -> Tensor
|
8863
|
+
variants: method, function
|
8864
|
+
dispatch:
|
8865
|
+
CPU: nonzero_static_cpu
|
8866
|
+
|
8607
8867
|
- func: nonzero_numpy(Tensor self) -> Tensor[]
|
8608
8868
|
variants: method, function
|
8609
8869
|
|
@@ -8710,8 +8970,10 @@
|
|
8710
8970
|
CPU, CUDA: linalg_solve_triangular
|
8711
8971
|
MPS: linalg_solve_triangular_mps
|
8712
8972
|
|
8713
|
-
- func: linalg_vander(Tensor x, *,
|
8973
|
+
- func: linalg_vander(Tensor x, *, SymInt? N=None) -> Tensor
|
8714
8974
|
python_module: linalg
|
8975
|
+
dispatch:
|
8976
|
+
CompositeImplicitAutograd: linalg_vander_symint
|
8715
8977
|
|
8716
8978
|
- func: svd.U(Tensor self, bool some=True, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) V)
|
8717
8979
|
|
@@ -8917,6 +9179,7 @@
|
|
8917
9179
|
structured_inherits: TensorIteratorBase
|
8918
9180
|
dispatch:
|
8919
9181
|
CPU, CUDA: erfinv_out
|
9182
|
+
MPS: erfinv_out_mps
|
8920
9183
|
SparseCPU, SparseCUDA: erfinv_sparse_out
|
8921
9184
|
SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_out
|
8922
9185
|
tags: pointwise
|
@@ -8999,7 +9262,7 @@
|
|
8999
9262
|
structured_inherits: TensorIteratorBase
|
9000
9263
|
dispatch:
|
9001
9264
|
CPU, CUDA: atan2_out
|
9002
|
-
MPS:
|
9265
|
+
MPS: atan2_out_mps
|
9003
9266
|
tags: pointwise
|
9004
9267
|
|
9005
9268
|
- func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
@@ -9030,6 +9293,7 @@
|
|
9030
9293
|
structured_inherits: TensorIteratorBase
|
9031
9294
|
dispatch:
|
9032
9295
|
CPU, CUDA: lerp_Scalar
|
9296
|
+
MPS: lerp_Scalar_mps
|
9033
9297
|
tags: pointwise
|
9034
9298
|
|
9035
9299
|
- func: lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -9038,6 +9302,7 @@
|
|
9038
9302
|
structured_inherits: TensorIteratorBase
|
9039
9303
|
dispatch:
|
9040
9304
|
CPU, CUDA: lerp_Tensor
|
9305
|
+
MPS: lerp_Tensor_mps
|
9041
9306
|
tags: pointwise
|
9042
9307
|
|
9043
9308
|
- func: lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor
|
@@ -9054,46 +9319,46 @@
|
|
9054
9319
|
|
9055
9320
|
- func: histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!)
|
9056
9321
|
dispatch:
|
9057
|
-
CPU:
|
9322
|
+
CPU, MPS: histogram_histc_out
|
9058
9323
|
CUDA: _histc_out_cuda
|
9059
9324
|
|
9060
9325
|
- func: histc(Tensor self, int bins=100, Scalar min=0, Scalar max=0) -> Tensor
|
9061
9326
|
variants: method, function
|
9062
9327
|
dispatch:
|
9063
|
-
CPU:
|
9328
|
+
CPU, MPS: histogram_histc
|
9064
9329
|
CUDA: _histc_cuda
|
9065
9330
|
|
9066
9331
|
- func: histogram.bins_tensor_out(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False, Tensor(a!) hist, Tensor(b!) bin_edges) -> (Tensor(a!) hist, Tensor(b!) bin_edges)
|
9067
9332
|
dispatch:
|
9068
|
-
CPU:
|
9333
|
+
CPU, MPS: histogram_out
|
9069
9334
|
|
9070
9335
|
- func: histogram.bins_tensor(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges)
|
9071
9336
|
variants: method, function
|
9072
9337
|
dispatch:
|
9073
|
-
CPU:
|
9338
|
+
CPU, MPS: histogram
|
9074
9339
|
|
9075
9340
|
- func: histogram.bin_ct_out(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False, Tensor(a!) hist, Tensor(b!) bin_edges) -> (Tensor(a!) hist, Tensor(b!) bin_edges)
|
9076
9341
|
dispatch:
|
9077
|
-
CPU:
|
9342
|
+
CPU, MPS: histogram_out
|
9078
9343
|
|
9079
9344
|
- func: histogram.bin_ct(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges)
|
9080
9345
|
variants: method, function
|
9081
9346
|
dispatch:
|
9082
|
-
CPU:
|
9347
|
+
CPU, MPS: histogram
|
9083
9348
|
|
9084
9349
|
- func: _histogramdd_bin_edges(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor[]
|
9085
9350
|
dispatch:
|
9086
|
-
CPU:
|
9351
|
+
CPU, MPS: histogramdd_bin_edges
|
9087
9352
|
autogen: _histogramdd_bin_edges.out
|
9088
9353
|
|
9089
9354
|
- func: _histogramdd_from_bin_cts(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor
|
9090
9355
|
dispatch:
|
9091
|
-
CPU:
|
9356
|
+
CPU, MPS: _histogramdd
|
9092
9357
|
autogen: _histogramdd_from_bin_cts.out
|
9093
9358
|
|
9094
9359
|
- func: _histogramdd_from_bin_tensors(Tensor self, Tensor[] bins, *, Tensor? weight=None, bool density=False) -> Tensor
|
9095
9360
|
dispatch:
|
9096
|
-
CPU:
|
9361
|
+
CPU, MPS: _histogramdd
|
9097
9362
|
autogen: _histogramdd_from_bin_tensors.out
|
9098
9363
|
|
9099
9364
|
- func: histogramdd(Tensor self, int[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)
|
@@ -9113,7 +9378,7 @@
|
|
9113
9378
|
variants: method, function
|
9114
9379
|
dispatch:
|
9115
9380
|
CompositeExplicitAutograd: fmod
|
9116
|
-
tags: pointwise
|
9381
|
+
tags: [core, pointwise]
|
9117
9382
|
|
9118
9383
|
- func: fmod_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
9119
9384
|
device_check: NoCheck # TensorIterator
|
@@ -9148,6 +9413,7 @@
|
|
9148
9413
|
structured_inherits: TensorIteratorBase
|
9149
9414
|
dispatch:
|
9150
9415
|
CPU, CUDA: hypot_out
|
9416
|
+
MPS: hypot_out_mps
|
9151
9417
|
tags: pointwise
|
9152
9418
|
|
9153
9419
|
- func: hypot(Tensor self, Tensor other) -> Tensor
|
@@ -9220,7 +9486,7 @@
|
|
9220
9486
|
variants: method, function
|
9221
9487
|
dispatch:
|
9222
9488
|
CompositeExplicitAutograd: remainder
|
9223
|
-
tags: pointwise
|
9489
|
+
tags: [core, pointwise]
|
9224
9490
|
|
9225
9491
|
- func: remainder_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
9226
9492
|
variants: method
|
@@ -9265,12 +9531,11 @@
|
|
9265
9531
|
MPS: min_mps
|
9266
9532
|
QuantizedCPU: min_quantized_cpu
|
9267
9533
|
|
9268
|
-
|
9269
|
-
|
9270
|
-
|
9271
|
-
|
9272
|
-
|
9273
|
-
# CompositeExplicitAutograd: min_unary_out
|
9534
|
+
- func: min.unary_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
9535
|
+
device_check: NoCheck # TensorIterator
|
9536
|
+
dispatch:
|
9537
|
+
CPU, CUDA: min_unary_out
|
9538
|
+
QuantizedCPU: min_quantized_unary_out
|
9274
9539
|
|
9275
9540
|
- func: fmin(Tensor self, Tensor other) -> Tensor
|
9276
9541
|
structured_delegate: fmin.out
|
@@ -9283,7 +9548,7 @@
|
|
9283
9548
|
structured_inherits: TensorIteratorBase
|
9284
9549
|
device_check: NoCheck # TensorIterator
|
9285
9550
|
dispatch:
|
9286
|
-
CPU, CUDA: fmin_out
|
9551
|
+
CPU, CUDA, MPS: fmin_out
|
9287
9552
|
tags: pointwise
|
9288
9553
|
|
9289
9554
|
- func: max(Tensor self) -> Tensor
|
@@ -9305,7 +9570,7 @@
|
|
9305
9570
|
structured_inherits: TensorIteratorBase
|
9306
9571
|
device_check: NoCheck # TensorIterator
|
9307
9572
|
dispatch:
|
9308
|
-
CPU, CUDA: fmax_out
|
9573
|
+
CPU, CUDA, MPS: fmax_out
|
9309
9574
|
tags: pointwise
|
9310
9575
|
|
9311
9576
|
- func: maximum(Tensor self, Tensor other) -> Tensor
|
@@ -9402,6 +9667,7 @@
|
|
9402
9667
|
variants: method, function
|
9403
9668
|
dispatch:
|
9404
9669
|
CompositeExplicitAutograd: sort
|
9670
|
+
tags: core
|
9405
9671
|
|
9406
9672
|
- func: sort.stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)
|
9407
9673
|
structured_delegate: sort.values_stable
|
@@ -9438,14 +9704,14 @@
|
|
9438
9704
|
- func: argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor
|
9439
9705
|
variants: method, function
|
9440
9706
|
|
9441
|
-
- func: topk.values(Tensor self,
|
9707
|
+
- func: topk.values(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
|
9442
9708
|
structured: True
|
9443
9709
|
dispatch:
|
9444
9710
|
CPU: topk_out_cpu
|
9445
9711
|
CUDA: topk_out_cuda
|
9446
9712
|
MPS: topk_out_mps
|
9447
9713
|
|
9448
|
-
- func: topk(Tensor self,
|
9714
|
+
- func: topk(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices)
|
9449
9715
|
variants: method, function
|
9450
9716
|
structured_delegate: topk.values
|
9451
9717
|
dispatch:
|
@@ -9470,6 +9736,7 @@
|
|
9470
9736
|
variants: method, function
|
9471
9737
|
dispatch:
|
9472
9738
|
SparseCPU, SparseCUDA: any_sparse
|
9739
|
+
tags: core
|
9473
9740
|
|
9474
9741
|
- func: any.all_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
9475
9742
|
device_check: NoCheck
|
@@ -9483,6 +9750,7 @@
|
|
9483
9750
|
structured: True
|
9484
9751
|
dispatch:
|
9485
9752
|
CPU, CUDA: renorm_out
|
9753
|
+
MPS: renorm_out_mps
|
9486
9754
|
|
9487
9755
|
- func: renorm(Tensor self, Scalar p, int dim, Scalar maxnorm) -> Tensor
|
9488
9756
|
device_check: NoCheck # TensorIterator
|
@@ -9537,6 +9805,7 @@
|
|
9537
9805
|
structured: True
|
9538
9806
|
dispatch:
|
9539
9807
|
CPU, CUDA: pow_Scalar_out
|
9808
|
+
MPS: pow_Scalar_out_mps
|
9540
9809
|
tags: pointwise
|
9541
9810
|
|
9542
9811
|
- func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor
|
@@ -9611,6 +9880,7 @@
|
|
9611
9880
|
MPS: normal_mps_
|
9612
9881
|
Meta: normal_meta_
|
9613
9882
|
SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
|
9883
|
+
NestedTensorCPU, NestedTensorCUDA: normal_nested_
|
9614
9884
|
autogen: normal.out
|
9615
9885
|
|
9616
9886
|
# Only used by the functionalization pass.
|
@@ -9720,156 +9990,155 @@
|
|
9720
9990
|
CUDA: foreach_tensor_add_scalar_kernel_cuda_
|
9721
9991
|
autogen: _foreach_add.Scalar_out
|
9722
9992
|
|
9723
|
-
- func:
|
9993
|
+
- func: _foreach_add.List(Tensor[] self, Tensor[] other, *, Scalar alpha=1) -> Tensor[]
|
9724
9994
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9725
9995
|
variants: function
|
9726
9996
|
dispatch:
|
9727
|
-
CPU:
|
9728
|
-
CUDA:
|
9997
|
+
CPU: foreach_tensor_add_list_kernel_slow
|
9998
|
+
CUDA: foreach_tensor_add_list_kernel_cuda
|
9729
9999
|
|
9730
|
-
- func:
|
10000
|
+
- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
9731
10001
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9732
10002
|
variants: function
|
9733
10003
|
dispatch:
|
9734
|
-
CPU:
|
9735
|
-
CUDA:
|
9736
|
-
autogen:
|
10004
|
+
CPU: foreach_tensor_add_list_kernel_slow_
|
10005
|
+
CUDA: foreach_tensor_add_list_kernel_cuda_
|
10006
|
+
autogen: _foreach_add.List_out
|
9737
10007
|
|
9738
|
-
- func:
|
10008
|
+
- func: _foreach_add.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9739
10009
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9740
10010
|
variants: function
|
9741
10011
|
dispatch:
|
9742
|
-
CPU:
|
9743
|
-
CUDA:
|
10012
|
+
CPU: foreach_tensor_add_scalarlist_kernel_slow
|
10013
|
+
CUDA: foreach_tensor_add_scalarlist_kernel_cuda
|
9744
10014
|
|
9745
|
-
- func:
|
10015
|
+
- func: _foreach_add_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9746
10016
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9747
10017
|
variants: function
|
9748
10018
|
dispatch:
|
9749
|
-
CPU:
|
9750
|
-
CUDA:
|
9751
|
-
autogen:
|
10019
|
+
CPU: foreach_tensor_add_scalarlist_kernel_slow_
|
10020
|
+
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
|
10021
|
+
autogen: _foreach_add.ScalarList_out
|
9752
10022
|
|
9753
|
-
- func:
|
10023
|
+
- func: _foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
9754
10024
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9755
10025
|
variants: function
|
9756
10026
|
dispatch:
|
9757
|
-
CPU:
|
9758
|
-
CUDA:
|
10027
|
+
CPU: foreach_tensor_sub_scalar_kernel_slow
|
10028
|
+
CUDA: foreach_tensor_sub_scalar_kernel_cuda
|
9759
10029
|
|
9760
|
-
- func:
|
10030
|
+
- func: _foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
9761
10031
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9762
10032
|
variants: function
|
9763
10033
|
dispatch:
|
9764
|
-
CPU:
|
9765
|
-
CUDA:
|
9766
|
-
autogen:
|
10034
|
+
CPU: foreach_tensor_sub_scalar_kernel_slow_
|
10035
|
+
CUDA: foreach_tensor_sub_scalar_kernel_cuda_
|
10036
|
+
autogen: _foreach_sub.Scalar_out
|
9767
10037
|
|
9768
|
-
- func:
|
10038
|
+
- func: _foreach_sub.List(Tensor[] self, Tensor[] other, *, Scalar alpha=1) -> Tensor[]
|
9769
10039
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9770
10040
|
variants: function
|
9771
10041
|
dispatch:
|
9772
|
-
CPU:
|
9773
|
-
CUDA:
|
10042
|
+
CPU: foreach_tensor_sub_list_kernel_slow
|
10043
|
+
CUDA: foreach_tensor_sub_list_kernel_cuda
|
9774
10044
|
|
9775
|
-
- func:
|
10045
|
+
- func: _foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
9776
10046
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9777
10047
|
variants: function
|
9778
10048
|
dispatch:
|
9779
|
-
CPU:
|
9780
|
-
CUDA:
|
9781
|
-
autogen:
|
10049
|
+
CPU: foreach_tensor_sub_list_kernel_slow_
|
10050
|
+
CUDA: foreach_tensor_sub_list_kernel_cuda_
|
10051
|
+
autogen: _foreach_sub.List_out
|
9782
10052
|
|
9783
|
-
- func:
|
10053
|
+
- func: _foreach_sub.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9784
10054
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9785
10055
|
variants: function
|
9786
10056
|
dispatch:
|
9787
|
-
CPU:
|
9788
|
-
CUDA:
|
10057
|
+
CPU: foreach_tensor_sub_scalarlist_kernel_slow
|
10058
|
+
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda
|
9789
10059
|
|
9790
|
-
- func:
|
10060
|
+
- func: _foreach_sub_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9791
10061
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9792
10062
|
variants: function
|
9793
10063
|
dispatch:
|
9794
|
-
CPU:
|
9795
|
-
CUDA:
|
9796
|
-
autogen:
|
10064
|
+
CPU: foreach_tensor_sub_scalarlist_kernel_slow_
|
10065
|
+
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
|
10066
|
+
autogen: _foreach_sub.ScalarList_out
|
9797
10067
|
|
9798
|
-
|
9799
|
-
- func: _foreach_maximum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
10068
|
+
- func: _foreach_mul.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
9800
10069
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9801
10070
|
variants: function
|
9802
10071
|
dispatch:
|
9803
|
-
CPU:
|
9804
|
-
CUDA:
|
10072
|
+
CPU: foreach_tensor_mul_scalar_kernel_slow
|
10073
|
+
CUDA: foreach_tensor_mul_scalar_kernel_cuda
|
9805
10074
|
|
9806
|
-
- func:
|
10075
|
+
- func: _foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
9807
10076
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9808
10077
|
variants: function
|
9809
10078
|
dispatch:
|
9810
|
-
CPU:
|
9811
|
-
CUDA:
|
9812
|
-
autogen:
|
10079
|
+
CPU: foreach_tensor_mul_scalar_kernel_slow_
|
10080
|
+
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
|
10081
|
+
autogen: _foreach_mul.Scalar_out
|
9813
10082
|
|
9814
|
-
- func:
|
10083
|
+
- func: _foreach_mul.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
9815
10084
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9816
10085
|
variants: function
|
9817
10086
|
dispatch:
|
9818
|
-
CPU:
|
9819
|
-
CUDA:
|
10087
|
+
CPU: foreach_tensor_mul_list_kernel_slow
|
10088
|
+
CUDA: foreach_tensor_mul_list_kernel_cuda
|
9820
10089
|
|
9821
|
-
- func:
|
10090
|
+
- func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
9822
10091
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9823
10092
|
variants: function
|
9824
10093
|
dispatch:
|
9825
|
-
CPU:
|
9826
|
-
CUDA:
|
9827
|
-
autogen:
|
10094
|
+
CPU: foreach_tensor_mul_list_kernel_slow_
|
10095
|
+
CUDA: foreach_tensor_mul_list_kernel_cuda_
|
10096
|
+
autogen: _foreach_mul.List_out
|
9828
10097
|
|
9829
|
-
- func:
|
10098
|
+
- func: _foreach_mul.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9830
10099
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9831
10100
|
variants: function
|
9832
10101
|
dispatch:
|
9833
|
-
CPU:
|
9834
|
-
CUDA:
|
10102
|
+
CPU: foreach_tensor_mul_scalarlist_kernel_slow
|
10103
|
+
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda
|
9835
10104
|
|
9836
|
-
- func:
|
10105
|
+
- func: _foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9837
10106
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9838
10107
|
variants: function
|
9839
10108
|
dispatch:
|
9840
|
-
CPU:
|
9841
|
-
CUDA:
|
9842
|
-
autogen:
|
10109
|
+
CPU: foreach_tensor_mul_scalarlist_kernel_slow_
|
10110
|
+
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
|
10111
|
+
autogen: _foreach_mul.ScalarList_out
|
9843
10112
|
|
9844
|
-
- func:
|
10113
|
+
- func: _foreach_mul.Tensor(Tensor[] self, Tensor other) -> Tensor[]
|
9845
10114
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9846
10115
|
variants: function
|
9847
10116
|
dispatch:
|
9848
|
-
CPU:
|
9849
|
-
CUDA:
|
10117
|
+
CPU: foreach_tensor_mul_tensor_kernel_slow
|
10118
|
+
CUDA: foreach_tensor_mul_tensor_kernel_cuda
|
9850
10119
|
|
9851
|
-
- func:
|
10120
|
+
- func: _foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
9852
10121
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9853
10122
|
variants: function
|
9854
10123
|
dispatch:
|
9855
|
-
CPU:
|
9856
|
-
CUDA:
|
9857
|
-
autogen:
|
10124
|
+
CPU: foreach_tensor_mul_tensor_kernel_slow_
|
10125
|
+
CUDA: foreach_tensor_mul_tensor_kernel_cuda_
|
10126
|
+
autogen: _foreach_mul.Tensor_out
|
9858
10127
|
|
9859
|
-
- func:
|
10128
|
+
- func: _foreach_div.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
9860
10129
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9861
10130
|
variants: function
|
9862
10131
|
dispatch:
|
9863
|
-
CPU:
|
9864
|
-
CUDA:
|
10132
|
+
CPU: foreach_tensor_div_scalar_kernel_slow
|
10133
|
+
CUDA: foreach_tensor_div_scalar_kernel_cuda
|
9865
10134
|
|
9866
|
-
- func:
|
10135
|
+
- func: _foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
9867
10136
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9868
10137
|
variants: function
|
9869
10138
|
dispatch:
|
9870
|
-
CPU:
|
9871
|
-
CUDA:
|
9872
|
-
autogen:
|
10139
|
+
CPU: foreach_tensor_div_scalar_kernel_slow_
|
10140
|
+
CUDA: foreach_tensor_div_scalar_kernel_cuda_
|
10141
|
+
autogen: _foreach_div.Scalar_out
|
9873
10142
|
|
9874
10143
|
- func: _foreach_div.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
9875
10144
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -9886,20 +10155,35 @@
|
|
9886
10155
|
CUDA: foreach_tensor_div_list_kernel_cuda_
|
9887
10156
|
autogen: _foreach_div.List_out
|
9888
10157
|
|
9889
|
-
- func:
|
10158
|
+
- func: _foreach_div.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9890
10159
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9891
10160
|
variants: function
|
9892
10161
|
dispatch:
|
9893
|
-
CPU:
|
9894
|
-
CUDA:
|
10162
|
+
CPU: foreach_tensor_div_scalarlist_kernel_slow
|
10163
|
+
CUDA: foreach_tensor_div_scalarlist_kernel_cuda
|
9895
10164
|
|
9896
|
-
- func:
|
10165
|
+
- func: _foreach_div_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9897
10166
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9898
10167
|
variants: function
|
9899
10168
|
dispatch:
|
9900
|
-
CPU:
|
9901
|
-
CUDA:
|
9902
|
-
autogen:
|
10169
|
+
CPU: foreach_tensor_div_scalarlist_kernel_slow_
|
10170
|
+
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
|
10171
|
+
autogen: _foreach_div.ScalarList_out
|
10172
|
+
|
10173
|
+
- func: _foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
10174
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10175
|
+
variants: function
|
10176
|
+
dispatch:
|
10177
|
+
CPU: foreach_tensor_clamp_max_scalar_kernel_slow
|
10178
|
+
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10179
|
+
|
10180
|
+
- func: _foreach_clamp_max_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10181
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10182
|
+
variants: function
|
10183
|
+
dispatch:
|
10184
|
+
CPU: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10185
|
+
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10186
|
+
autogen: _foreach_clamp_max.Scalar_out
|
9903
10187
|
|
9904
10188
|
- func: _foreach_clamp_max.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
9905
10189
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -9916,143 +10200,143 @@
|
|
9916
10200
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
9917
10201
|
autogen: _foreach_clamp_max.List_out
|
9918
10202
|
|
9919
|
-
|
9920
|
-
- func: _foreach_maximum.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
10203
|
+
- func: _foreach_clamp_max.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9921
10204
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9922
10205
|
variants: function
|
9923
10206
|
dispatch:
|
9924
|
-
CPU:
|
9925
|
-
CUDA:
|
10207
|
+
CPU: foreach_tensor_clamp_max_scalarlist_kernel_slow
|
10208
|
+
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda
|
9926
10209
|
|
9927
|
-
- func:
|
10210
|
+
- func: _foreach_clamp_max_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9928
10211
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9929
10212
|
variants: function
|
9930
10213
|
dispatch:
|
9931
|
-
CPU:
|
9932
|
-
CUDA:
|
9933
|
-
autogen:
|
10214
|
+
CPU: foreach_tensor_clamp_max_scalarlist_kernel_slow_
|
10215
|
+
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10216
|
+
autogen: _foreach_clamp_max.ScalarList_out
|
9934
10217
|
|
9935
|
-
- func:
|
10218
|
+
- func: _foreach_clamp_min.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
9936
10219
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9937
10220
|
variants: function
|
9938
10221
|
dispatch:
|
9939
|
-
CPU:
|
9940
|
-
CUDA:
|
10222
|
+
CPU: foreach_tensor_clamp_min_scalar_kernel_slow
|
10223
|
+
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
9941
10224
|
|
9942
|
-
- func:
|
10225
|
+
- func: _foreach_clamp_min_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
9943
10226
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9944
10227
|
variants: function
|
9945
10228
|
dispatch:
|
9946
|
-
CPU:
|
9947
|
-
CUDA:
|
9948
|
-
autogen:
|
9949
|
-
|
10229
|
+
CPU: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10230
|
+
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10231
|
+
autogen: _foreach_clamp_min.Scalar_out
|
9950
10232
|
|
9951
|
-
- func:
|
10233
|
+
- func: _foreach_clamp_min.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
9952
10234
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9953
10235
|
variants: function
|
9954
10236
|
dispatch:
|
9955
|
-
CPU:
|
9956
|
-
CUDA:
|
10237
|
+
CPU: foreach_tensor_clamp_min_list_kernel_slow
|
10238
|
+
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
9957
10239
|
|
9958
|
-
- func:
|
10240
|
+
- func: _foreach_clamp_min_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
9959
10241
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9960
10242
|
variants: function
|
9961
10243
|
dispatch:
|
9962
|
-
CPU:
|
9963
|
-
CUDA:
|
9964
|
-
autogen:
|
10244
|
+
CPU: foreach_tensor_clamp_min_list_kernel_slow_
|
10245
|
+
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10246
|
+
autogen: _foreach_clamp_min.List_out
|
9965
10247
|
|
9966
|
-
- func:
|
10248
|
+
- func: _foreach_clamp_min.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
9967
10249
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9968
10250
|
variants: function
|
9969
10251
|
dispatch:
|
9970
|
-
CPU:
|
9971
|
-
CUDA:
|
10252
|
+
CPU: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10253
|
+
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
9972
10254
|
|
9973
|
-
- func:
|
10255
|
+
- func: _foreach_clamp_min_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
9974
10256
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9975
10257
|
variants: function
|
9976
10258
|
dispatch:
|
9977
|
-
CPU:
|
9978
|
-
CUDA:
|
9979
|
-
autogen:
|
10259
|
+
CPU: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10260
|
+
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10261
|
+
autogen: _foreach_clamp_min.ScalarList_out
|
9980
10262
|
|
9981
|
-
|
10263
|
+
# foreach_minimum/maximum dispatches to clamp_max/min
|
10264
|
+
- func: _foreach_maximum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
9982
10265
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9983
10266
|
variants: function
|
9984
10267
|
dispatch:
|
9985
|
-
CPU:
|
9986
|
-
CUDA:
|
10268
|
+
CPU: foreach_tensor_clamp_min_scalar_kernel_slow
|
10269
|
+
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
9987
10270
|
|
9988
|
-
- func:
|
10271
|
+
- func: _foreach_maximum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
9989
10272
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9990
10273
|
variants: function
|
9991
10274
|
dispatch:
|
9992
|
-
CPU:
|
9993
|
-
CUDA:
|
9994
|
-
autogen:
|
10275
|
+
CPU: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10276
|
+
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10277
|
+
autogen: _foreach_maximum.Scalar_out
|
9995
10278
|
|
9996
|
-
|
10279
|
+
# foreach_minimum/maximum dispatches to clamp_max/min
|
10280
|
+
- func: _foreach_maximum.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
9997
10281
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
9998
10282
|
variants: function
|
9999
10283
|
dispatch:
|
10000
|
-
CPU:
|
10001
|
-
CUDA:
|
10284
|
+
CPU: foreach_tensor_clamp_min_list_kernel_slow
|
10285
|
+
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
10002
10286
|
|
10003
|
-
- func:
|
10287
|
+
- func: _foreach_maximum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10004
10288
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10005
10289
|
variants: function
|
10006
10290
|
dispatch:
|
10007
|
-
CPU:
|
10008
|
-
CUDA:
|
10009
|
-
autogen:
|
10291
|
+
CPU: foreach_tensor_clamp_min_list_kernel_slow_
|
10292
|
+
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10293
|
+
autogen: _foreach_maximum.List_out
|
10010
10294
|
|
10011
|
-
|
10295
|
+
# foreach_minimum/maximum dispatches to clamp_max/min
|
10296
|
+
- func: _foreach_maximum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
10012
10297
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10013
10298
|
variants: function
|
10014
10299
|
dispatch:
|
10015
10300
|
CPU: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10016
10301
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
10017
10302
|
|
10018
|
-
- func:
|
10303
|
+
- func: _foreach_maximum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10019
10304
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10020
10305
|
variants: function
|
10021
10306
|
dispatch:
|
10022
10307
|
CPU: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10023
10308
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10024
|
-
autogen:
|
10309
|
+
autogen: _foreach_maximum.ScalarList_out
|
10025
10310
|
|
10026
|
-
- func:
|
10311
|
+
- func: _foreach_minimum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
|
10027
10312
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10028
10313
|
variants: function
|
10029
10314
|
dispatch:
|
10030
|
-
CPU:
|
10031
|
-
CUDA:
|
10315
|
+
CPU: foreach_tensor_clamp_max_scalar_kernel_slow
|
10316
|
+
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10032
10317
|
|
10033
|
-
- func:
|
10318
|
+
- func: _foreach_minimum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10034
10319
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10035
10320
|
variants: function
|
10036
10321
|
dispatch:
|
10037
|
-
CPU:
|
10038
|
-
CUDA:
|
10039
|
-
autogen:
|
10322
|
+
CPU: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10323
|
+
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10324
|
+
autogen: _foreach_minimum.Scalar_out
|
10040
10325
|
|
10041
|
-
|
10042
|
-
- func: _foreach_maximum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
10326
|
+
- func: _foreach_minimum.List(Tensor[] self, Tensor[] other) -> Tensor[]
|
10043
10327
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10044
10328
|
variants: function
|
10045
10329
|
dispatch:
|
10046
|
-
CPU:
|
10047
|
-
CUDA:
|
10330
|
+
CPU: foreach_tensor_clamp_max_list_kernel_slow
|
10331
|
+
CUDA: foreach_tensor_clamp_max_list_kernel_cuda
|
10048
10332
|
|
10049
|
-
- func:
|
10333
|
+
- func: _foreach_minimum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10050
10334
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10051
10335
|
variants: function
|
10052
10336
|
dispatch:
|
10053
|
-
CPU:
|
10054
|
-
CUDA:
|
10055
|
-
autogen:
|
10337
|
+
CPU: foreach_tensor_clamp_max_list_kernel_slow_
|
10338
|
+
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
10339
|
+
autogen: _foreach_minimum.List_out
|
10056
10340
|
|
10057
10341
|
- func: _foreach_minimum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
|
10058
10342
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -10069,43 +10353,95 @@
|
|
10069
10353
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10070
10354
|
autogen: _foreach_minimum.ScalarList_out
|
10071
10355
|
|
10072
|
-
- func:
|
10356
|
+
- func: _foreach_addcdiv.Scalar(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
|
10073
10357
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10074
10358
|
variants: function
|
10075
10359
|
dispatch:
|
10076
|
-
CPU:
|
10077
|
-
CUDA:
|
10360
|
+
CPU: foreach_tensor_addcdiv_scalar_slow
|
10361
|
+
CUDA: foreach_tensor_addcdiv_scalar_cuda
|
10078
10362
|
|
10079
|
-
- func:
|
10363
|
+
- func: _foreach_addcdiv.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10080
10364
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10081
10365
|
variants: function
|
10082
10366
|
dispatch:
|
10083
|
-
CPU:
|
10084
|
-
CUDA:
|
10085
|
-
autogen: _foreach_zero, _foreach_zero.out
|
10367
|
+
CPU: foreach_tensor_addcdiv_scalarlist_slow
|
10368
|
+
CUDA: foreach_tensor_addcdiv_scalarlist_cuda
|
10086
10369
|
|
10087
|
-
- func:
|
10370
|
+
- func: _foreach_addcdiv.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10088
10371
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10089
10372
|
variants: function
|
10090
10373
|
dispatch:
|
10091
|
-
CPU:
|
10092
|
-
CUDA:
|
10093
|
-
autogen: _foreach_exp.out
|
10374
|
+
CPU: foreach_tensor_addcdiv_tensor_slow
|
10375
|
+
CUDA: foreach_tensor_addcdiv_tensor_cuda
|
10094
10376
|
|
10095
|
-
- func:
|
10377
|
+
- func: _foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10096
10378
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10097
10379
|
variants: function
|
10098
10380
|
dispatch:
|
10099
|
-
CPU:
|
10100
|
-
CUDA:
|
10381
|
+
CPU: foreach_tensor_addcdiv_scalar_slow_
|
10382
|
+
CUDA: foreach_tensor_addcdiv_scalar_cuda_
|
10383
|
+
autogen: _foreach_addcdiv.Scalar_out
|
10101
10384
|
|
10102
|
-
- func:
|
10385
|
+
- func: _foreach_addcdiv_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
|
10103
10386
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10104
10387
|
variants: function
|
10105
10388
|
dispatch:
|
10106
|
-
CPU:
|
10107
|
-
CUDA:
|
10108
|
-
autogen:
|
10389
|
+
CPU: foreach_tensor_addcdiv_scalarlist_slow_
|
10390
|
+
CUDA: foreach_tensor_addcdiv_scalarlist_cuda_
|
10391
|
+
autogen: _foreach_addcdiv.ScalarList_out
|
10392
|
+
|
10393
|
+
- func: _foreach_addcdiv_.Tensor(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> ()
|
10394
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10395
|
+
variants: function
|
10396
|
+
dispatch:
|
10397
|
+
CPU: foreach_tensor_addcdiv_tensor_slow_
|
10398
|
+
CUDA: foreach_tensor_addcdiv_tensor_cuda_
|
10399
|
+
autogen: _foreach_addcdiv.Tensor_out
|
10400
|
+
|
10401
|
+
- func: _foreach_addcmul.Scalar(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
|
10402
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10403
|
+
variants: function
|
10404
|
+
dispatch:
|
10405
|
+
CPU: foreach_tensor_addcmul_scalar_slow
|
10406
|
+
CUDA: foreach_tensor_addcmul_scalar_cuda
|
10407
|
+
|
10408
|
+
- func: _foreach_addcmul.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10409
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10410
|
+
variants: function
|
10411
|
+
dispatch:
|
10412
|
+
CPU: foreach_tensor_addcmul_scalarlist_slow
|
10413
|
+
CUDA: foreach_tensor_addcmul_scalarlist_cuda
|
10414
|
+
|
10415
|
+
- func: _foreach_addcmul.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10416
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10417
|
+
variants: function
|
10418
|
+
dispatch:
|
10419
|
+
CPU: foreach_tensor_addcmul_tensor_slow
|
10420
|
+
CUDA: foreach_tensor_addcmul_tensor_cuda
|
10421
|
+
|
10422
|
+
- func: _foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10423
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10424
|
+
variants: function
|
10425
|
+
dispatch:
|
10426
|
+
CPU: foreach_tensor_addcmul_scalar_slow_
|
10427
|
+
CUDA: foreach_tensor_addcmul_scalar_cuda_
|
10428
|
+
autogen: _foreach_addcmul.Scalar_out
|
10429
|
+
|
10430
|
+
- func: _foreach_addcmul_.ScalarList(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> ()
|
10431
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10432
|
+
variants: function
|
10433
|
+
dispatch:
|
10434
|
+
CPU: foreach_tensor_addcmul_scalarlist_slow_
|
10435
|
+
CUDA: foreach_tensor_addcmul_scalarlist_cuda_
|
10436
|
+
autogen: _foreach_addcmul.ScalarList_out
|
10437
|
+
|
10438
|
+
- func: _foreach_addcmul_.Tensor(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> ()
|
10439
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10440
|
+
variants: function
|
10441
|
+
dispatch:
|
10442
|
+
CPU: foreach_tensor_addcmul_tensor_slow_
|
10443
|
+
CUDA: foreach_tensor_addcmul_tensor_cuda_
|
10444
|
+
autogen: _foreach_addcmul.Tensor_out
|
10109
10445
|
|
10110
10446
|
- func: _foreach_abs(Tensor[] self) -> Tensor[]
|
10111
10447
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -10242,6 +10578,21 @@
|
|
10242
10578
|
CUDA: foreach_tensor_erfc_cuda_
|
10243
10579
|
autogen: _foreach_erfc.out
|
10244
10580
|
|
10581
|
+
- func: _foreach_exp(Tensor[] self) -> Tensor[]
|
10582
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10583
|
+
variants: function
|
10584
|
+
dispatch:
|
10585
|
+
CPU: foreach_tensor_exp_slow
|
10586
|
+
CUDA: foreach_tensor_exp_cuda
|
10587
|
+
|
10588
|
+
- func: _foreach_exp_(Tensor(a!)[] self) -> ()
|
10589
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10590
|
+
variants: function
|
10591
|
+
dispatch:
|
10592
|
+
CPU: foreach_tensor_exp_slow_
|
10593
|
+
CUDA: foreach_tensor_exp_cuda_
|
10594
|
+
autogen: _foreach_exp.out
|
10595
|
+
|
10245
10596
|
- func: _foreach_expm1(Tensor[] self) -> Tensor[]
|
10246
10597
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10247
10598
|
variants: function
|
@@ -10272,6 +10623,68 @@
|
|
10272
10623
|
CUDA: foreach_tensor_floor_cuda_
|
10273
10624
|
autogen: _foreach_floor.out
|
10274
10625
|
|
10626
|
+
- func: _foreach_frac(Tensor[] self) -> Tensor[]
|
10627
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10628
|
+
variants: function
|
10629
|
+
dispatch:
|
10630
|
+
CPU: foreach_tensor_frac_slow
|
10631
|
+
CUDA: foreach_tensor_frac_cuda
|
10632
|
+
|
10633
|
+
- func: _foreach_frac_(Tensor(a!)[] self) -> ()
|
10634
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10635
|
+
variants: function
|
10636
|
+
dispatch:
|
10637
|
+
CPU: foreach_tensor_frac_slow_
|
10638
|
+
CUDA: foreach_tensor_frac_cuda_
|
10639
|
+
autogen: _foreach_frac.out
|
10640
|
+
|
10641
|
+
- func: _foreach_lerp.List(Tensor[] self, Tensor[] tensors1, Tensor[] weights) -> Tensor[]
|
10642
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10643
|
+
variants: function
|
10644
|
+
dispatch:
|
10645
|
+
CPU: foreach_tensor_ternary_lerp_slow
|
10646
|
+
CUDA: foreach_tensor_lerp_ternary_cuda
|
10647
|
+
autogen: _foreach_lerp.List_out
|
10648
|
+
|
10649
|
+
- func: _foreach_lerp_.List(Tensor(a!)[] self, Tensor[] tensors1, Tensor[] weights) -> ()
|
10650
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10651
|
+
variants: function
|
10652
|
+
dispatch:
|
10653
|
+
CPU: foreach_tensor_ternary_lerp_slow_
|
10654
|
+
CUDA: foreach_tensor_lerp_ternary_cuda_
|
10655
|
+
autogen: _foreach_lerp.List_out
|
10656
|
+
|
10657
|
+
- func: _foreach_lerp.Scalar(Tensor[] self, Tensor[] tensors1, Scalar weight) -> Tensor[]
|
10658
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10659
|
+
variants: function
|
10660
|
+
dispatch:
|
10661
|
+
CPU: foreach_tensor_lerp_list_kernel_slow
|
10662
|
+
CUDA: foreach_tensor_lerp_list_cuda
|
10663
|
+
autogen: _foreach_lerp.Scalar_out
|
10664
|
+
|
10665
|
+
- func: _foreach_lerp_.Scalar(Tensor(a!)[] self, Tensor[] tensors1, Scalar weight) -> ()
|
10666
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10667
|
+
variants: function
|
10668
|
+
dispatch:
|
10669
|
+
CPU: foreach_tensor_lerp_list_kernel_slow_
|
10670
|
+
CUDA: foreach_tensor_lerp_list_cuda_
|
10671
|
+
autogen: _foreach_lerp.Scalar_out
|
10672
|
+
|
10673
|
+
- func: _foreach_lgamma(Tensor[] self) -> Tensor[]
|
10674
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10675
|
+
variants: function
|
10676
|
+
dispatch:
|
10677
|
+
CPU: foreach_tensor_lgamma_slow
|
10678
|
+
CUDA: foreach_tensor_lgamma_cuda
|
10679
|
+
|
10680
|
+
- func: _foreach_lgamma_(Tensor(a!)[] self) -> ()
|
10681
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10682
|
+
variants: function
|
10683
|
+
dispatch:
|
10684
|
+
CPU: foreach_tensor_lgamma_slow_
|
10685
|
+
CUDA: foreach_tensor_lgamma_cuda_
|
10686
|
+
autogen: _foreach_lgamma.out
|
10687
|
+
|
10275
10688
|
- func: _foreach_log(Tensor[] self) -> Tensor[]
|
10276
10689
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10277
10690
|
variants: function
|
@@ -10347,110 +10760,65 @@
|
|
10347
10760
|
CUDA: foreach_tensor_neg_cuda_
|
10348
10761
|
autogen: _foreach_neg.out
|
10349
10762
|
|
10350
|
-
- func:
|
10351
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10352
|
-
variants: function
|
10353
|
-
dispatch:
|
10354
|
-
CPU: foreach_tensor_tan_slow
|
10355
|
-
CUDA: foreach_tensor_tan_cuda
|
10356
|
-
|
10357
|
-
- func: _foreach_tan_(Tensor(a!)[] self) -> ()
|
10358
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10359
|
-
variants: function
|
10360
|
-
dispatch:
|
10361
|
-
CPU: foreach_tensor_tan_slow_
|
10362
|
-
CUDA: foreach_tensor_tan_cuda_
|
10363
|
-
autogen: _foreach_tan.out
|
10364
|
-
|
10365
|
-
- func: _foreach_tanh(Tensor[] self) -> Tensor[]
|
10366
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10367
|
-
variants: function
|
10368
|
-
dispatch:
|
10369
|
-
CPU: foreach_tensor_tanh_slow
|
10370
|
-
CUDA: foreach_tensor_tanh_cuda
|
10371
|
-
|
10372
|
-
- func: _foreach_tanh_(Tensor(a!)[] self) -> ()
|
10373
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10374
|
-
variants: function
|
10375
|
-
dispatch:
|
10376
|
-
CPU: foreach_tensor_tanh_slow_
|
10377
|
-
CUDA: foreach_tensor_tanh_cuda_
|
10378
|
-
autogen: _foreach_tanh.out
|
10379
|
-
|
10380
|
-
- func: _foreach_sin(Tensor[] self) -> Tensor[]
|
10381
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10382
|
-
variants: function
|
10383
|
-
dispatch:
|
10384
|
-
CPU: foreach_tensor_sin_slow
|
10385
|
-
CUDA: foreach_tensor_sin_cuda
|
10386
|
-
|
10387
|
-
- func: _foreach_sin_(Tensor(a!)[] self) -> ()
|
10388
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10389
|
-
variants: function
|
10390
|
-
dispatch:
|
10391
|
-
CPU: foreach_tensor_sin_slow_
|
10392
|
-
CUDA: foreach_tensor_sin_cuda_
|
10393
|
-
autogen: _foreach_sin.out
|
10394
|
-
|
10395
|
-
- func: _foreach_sinh(Tensor[] self) -> Tensor[]
|
10763
|
+
- func: _foreach_norm.Scalar(Tensor[] self, Scalar ord=2) -> Tensor[]
|
10396
10764
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10397
10765
|
variants: function
|
10398
10766
|
dispatch:
|
10399
|
-
CPU:
|
10400
|
-
CUDA:
|
10767
|
+
CPU: foreach_tensor_norm_slow
|
10768
|
+
CUDA: foreach_tensor_norm_cuda
|
10769
|
+
autogen: _foreach_norm.Scalar_out
|
10401
10770
|
|
10402
|
-
- func:
|
10771
|
+
- func: _foreach_pow.List(Tensor[] self, Tensor[] exponent) -> Tensor[]
|
10403
10772
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10404
10773
|
variants: function
|
10405
10774
|
dispatch:
|
10406
|
-
CPU:
|
10407
|
-
CUDA:
|
10408
|
-
autogen: _foreach_sinh.out
|
10775
|
+
CPU: foreach_tensor_pow_list_kernel_slow
|
10776
|
+
CUDA: foreach_tensor_pow_list_kernel_cuda
|
10409
10777
|
|
10410
|
-
- func:
|
10778
|
+
- func: _foreach_pow.Scalar(Tensor[] self, Scalar exponent) -> Tensor[]
|
10411
10779
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10412
10780
|
variants: function
|
10413
10781
|
dispatch:
|
10414
|
-
CPU:
|
10415
|
-
CUDA:
|
10782
|
+
CPU: foreach_tensor_pow_scalar_kernel_slow
|
10783
|
+
CUDA: foreach_tensor_pow_scalar_kernel_cuda
|
10416
10784
|
|
10417
|
-
- func:
|
10785
|
+
- func: _foreach_pow.ScalarList(Tensor[] self, Scalar[] exponent) -> Tensor[]
|
10418
10786
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10419
10787
|
variants: function
|
10420
10788
|
dispatch:
|
10421
|
-
CPU:
|
10422
|
-
CUDA:
|
10423
|
-
autogen: _foreach_round.out
|
10789
|
+
CPU: foreach_tensor_pow_scalarlist_kernel_slow
|
10790
|
+
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda
|
10424
10791
|
|
10425
|
-
- func:
|
10792
|
+
- func: _foreach_pow.ScalarAndTensor(Scalar self, Tensor[] exponent) -> Tensor[]
|
10426
10793
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10427
10794
|
variants: function
|
10428
10795
|
dispatch:
|
10429
|
-
CPU:
|
10430
|
-
CUDA:
|
10796
|
+
CPU: foreach_scalar_pow_list_kernel_slow
|
10797
|
+
CUDA: foreach_scalar_pow_list_kernel_cuda
|
10431
10798
|
|
10432
|
-
- func:
|
10433
|
-
device_check: NoCheck
|
10799
|
+
- func: _foreach_pow_.List(Tensor(a!)[] self, Tensor[] exponent) -> ()
|
10800
|
+
device_check: NoCheck
|
10434
10801
|
variants: function
|
10435
10802
|
dispatch:
|
10436
|
-
CPU:
|
10437
|
-
CUDA:
|
10438
|
-
autogen:
|
10803
|
+
CPU: foreach_tensor_pow_list_kernel_slow_
|
10804
|
+
CUDA: foreach_tensor_pow_list_kernel_cuda_
|
10805
|
+
autogen: _foreach_pow.List_out
|
10439
10806
|
|
10440
|
-
- func:
|
10441
|
-
device_check: NoCheck
|
10807
|
+
- func: _foreach_pow_.Scalar(Tensor(a!)[] self, Scalar exponent) -> ()
|
10808
|
+
device_check: NoCheck
|
10442
10809
|
variants: function
|
10443
10810
|
dispatch:
|
10444
|
-
CPU:
|
10445
|
-
CUDA:
|
10811
|
+
CPU: foreach_tensor_pow_scalar_kernel_slow_
|
10812
|
+
CUDA: foreach_tensor_pow_scalar_kernel_cuda_
|
10813
|
+
autogen: _foreach_pow.Scalar_out
|
10446
10814
|
|
10447
|
-
- func:
|
10448
|
-
device_check: NoCheck
|
10815
|
+
- func: _foreach_pow_.ScalarList(Tensor(a!)[] self, Scalar[] exponent) -> ()
|
10816
|
+
device_check: NoCheck
|
10449
10817
|
variants: function
|
10450
10818
|
dispatch:
|
10451
|
-
CPU:
|
10452
|
-
CUDA:
|
10453
|
-
autogen:
|
10819
|
+
CPU: foreach_tensor_pow_scalarlist_kernel_slow_
|
10820
|
+
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda_
|
10821
|
+
autogen: _foreach_pow.ScalarList_out
|
10454
10822
|
|
10455
10823
|
- func: _foreach_reciprocal(Tensor[] self) -> Tensor[]
|
10456
10824
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
@@ -10467,6 +10835,21 @@
|
|
10467
10835
|
CUDA: foreach_tensor_reciprocal_cuda_
|
10468
10836
|
autogen: _foreach_reciprocal.out
|
10469
10837
|
|
10838
|
+
- func: _foreach_round(Tensor[] self) -> Tensor[]
|
10839
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10840
|
+
variants: function
|
10841
|
+
dispatch:
|
10842
|
+
CPU: foreach_tensor_round_slow
|
10843
|
+
CUDA: foreach_tensor_round_cuda
|
10844
|
+
|
10845
|
+
- func: _foreach_round_(Tensor(a!)[] self) -> ()
|
10846
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10847
|
+
variants: function
|
10848
|
+
dispatch:
|
10849
|
+
CPU: foreach_tensor_round_slow_
|
10850
|
+
CUDA: foreach_tensor_round_cuda_
|
10851
|
+
autogen: _foreach_round.out
|
10852
|
+
|
10470
10853
|
- func: _foreach_sigmoid(Tensor[] self) -> Tensor[]
|
10471
10854
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10472
10855
|
variants: function
|
@@ -10482,150 +10865,126 @@
|
|
10482
10865
|
CUDA: foreach_tensor_sigmoid_cuda_
|
10483
10866
|
autogen: _foreach_sigmoid.out
|
10484
10867
|
|
10485
|
-
- func:
|
10868
|
+
- func: _foreach_sign(Tensor[] self) -> Tensor[]
|
10486
10869
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10487
10870
|
variants: function
|
10488
10871
|
dispatch:
|
10489
|
-
CPU:
|
10490
|
-
CUDA:
|
10872
|
+
CPU: foreach_tensor_sign_slow
|
10873
|
+
CUDA: foreach_tensor_sign_cuda
|
10491
10874
|
|
10492
|
-
- func:
|
10875
|
+
- func: _foreach_sign_(Tensor(a!)[] self) -> ()
|
10493
10876
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10494
10877
|
variants: function
|
10495
10878
|
dispatch:
|
10496
|
-
CPU:
|
10497
|
-
CUDA:
|
10498
|
-
autogen:
|
10879
|
+
CPU: foreach_tensor_sign_slow_
|
10880
|
+
CUDA: foreach_tensor_sign_cuda_
|
10881
|
+
autogen: _foreach_sign.out
|
10499
10882
|
|
10500
|
-
- func:
|
10883
|
+
- func: _foreach_sin(Tensor[] self) -> Tensor[]
|
10501
10884
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10502
10885
|
variants: function
|
10503
10886
|
dispatch:
|
10504
|
-
CPU:
|
10505
|
-
CUDA:
|
10506
|
-
autogen: _foreach_addcdiv.Scalar_out
|
10887
|
+
CPU: foreach_tensor_sin_slow
|
10888
|
+
CUDA: foreach_tensor_sin_cuda
|
10507
10889
|
|
10508
|
-
- func:
|
10890
|
+
- func: _foreach_sin_(Tensor(a!)[] self) -> ()
|
10509
10891
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10510
10892
|
variants: function
|
10511
10893
|
dispatch:
|
10512
|
-
CPU:
|
10513
|
-
CUDA:
|
10514
|
-
autogen:
|
10894
|
+
CPU: foreach_tensor_sin_slow_
|
10895
|
+
CUDA: foreach_tensor_sin_cuda_
|
10896
|
+
autogen: _foreach_sin.out
|
10515
10897
|
|
10516
|
-
- func:
|
10898
|
+
- func: _foreach_sinh(Tensor[] self) -> Tensor[]
|
10517
10899
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10518
10900
|
variants: function
|
10519
10901
|
dispatch:
|
10520
|
-
CPU:
|
10521
|
-
CUDA:
|
10522
|
-
autogen: _foreach_addcdiv.ScalarList_out
|
10902
|
+
CPU: foreach_tensor_sinh_slow
|
10903
|
+
CUDA: foreach_tensor_sinh_cuda
|
10523
10904
|
|
10524
|
-
- func:
|
10905
|
+
- func: _foreach_sinh_(Tensor(a!)[] self) -> ()
|
10525
10906
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10526
10907
|
variants: function
|
10527
10908
|
dispatch:
|
10528
|
-
CPU:
|
10529
|
-
CUDA:
|
10530
|
-
autogen:
|
10909
|
+
CPU: foreach_tensor_sinh_slow_
|
10910
|
+
CUDA: foreach_tensor_sinh_cuda_
|
10911
|
+
autogen: _foreach_sinh.out
|
10531
10912
|
|
10532
|
-
- func:
|
10913
|
+
- func: _foreach_sqrt(Tensor[] self) -> Tensor[]
|
10533
10914
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10534
10915
|
variants: function
|
10535
10916
|
dispatch:
|
10536
|
-
CPU:
|
10537
|
-
CUDA:
|
10538
|
-
autogen: _foreach_addcmul.ScalarList_out
|
10917
|
+
CPU: foreach_tensor_sqrt_slow
|
10918
|
+
CUDA: foreach_tensor_sqrt_cuda
|
10539
10919
|
|
10540
|
-
- func:
|
10920
|
+
- func: _foreach_sqrt_(Tensor(a!)[] self) -> ()
|
10541
10921
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10542
10922
|
variants: function
|
10543
10923
|
dispatch:
|
10544
|
-
CPU:
|
10545
|
-
CUDA:
|
10546
|
-
autogen:
|
10924
|
+
CPU: foreach_tensor_sqrt_slow_
|
10925
|
+
CUDA: foreach_tensor_sqrt_cuda_
|
10926
|
+
autogen: _foreach_sqrt.out
|
10547
10927
|
|
10548
|
-
- func:
|
10928
|
+
- func: _foreach_tan(Tensor[] self) -> Tensor[]
|
10549
10929
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10550
10930
|
variants: function
|
10551
10931
|
dispatch:
|
10552
|
-
CPU:
|
10553
|
-
CUDA:
|
10932
|
+
CPU: foreach_tensor_tan_slow
|
10933
|
+
CUDA: foreach_tensor_tan_cuda
|
10554
10934
|
|
10555
|
-
- func:
|
10935
|
+
- func: _foreach_tan_(Tensor(a!)[] self) -> ()
|
10556
10936
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10557
10937
|
variants: function
|
10558
10938
|
dispatch:
|
10559
|
-
CPU:
|
10560
|
-
CUDA:
|
10939
|
+
CPU: foreach_tensor_tan_slow_
|
10940
|
+
CUDA: foreach_tensor_tan_cuda_
|
10941
|
+
autogen: _foreach_tan.out
|
10561
10942
|
|
10562
|
-
- func:
|
10943
|
+
- func: _foreach_tanh(Tensor[] self) -> Tensor[]
|
10563
10944
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10564
10945
|
variants: function
|
10565
10946
|
dispatch:
|
10566
|
-
CPU:
|
10567
|
-
CUDA:
|
10947
|
+
CPU: foreach_tensor_tanh_slow
|
10948
|
+
CUDA: foreach_tensor_tanh_cuda
|
10568
10949
|
|
10569
|
-
- func:
|
10950
|
+
- func: _foreach_tanh_(Tensor(a!)[] self) -> ()
|
10570
10951
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10571
10952
|
variants: function
|
10572
10953
|
dispatch:
|
10573
|
-
CPU:
|
10574
|
-
CUDA:
|
10954
|
+
CPU: foreach_tensor_tanh_slow_
|
10955
|
+
CUDA: foreach_tensor_tanh_cuda_
|
10956
|
+
autogen: _foreach_tanh.out
|
10575
10957
|
|
10576
|
-
- func:
|
10958
|
+
- func: _foreach_trunc(Tensor[] self) -> Tensor[]
|
10577
10959
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10578
10960
|
variants: function
|
10579
10961
|
dispatch:
|
10580
|
-
CPU:
|
10581
|
-
CUDA:
|
10962
|
+
CPU: foreach_tensor_trunc_slow
|
10963
|
+
CUDA: foreach_tensor_trunc_cuda
|
10582
10964
|
|
10583
|
-
- func:
|
10965
|
+
- func: _foreach_trunc_(Tensor(a!)[] self) -> ()
|
10584
10966
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10585
10967
|
variants: function
|
10586
10968
|
dispatch:
|
10587
|
-
CPU:
|
10588
|
-
CUDA:
|
10969
|
+
CPU: foreach_tensor_trunc_slow_
|
10970
|
+
CUDA: foreach_tensor_trunc_cuda_
|
10971
|
+
autogen: _foreach_trunc.out
|
10589
10972
|
|
10590
|
-
- func:
|
10973
|
+
- func: _foreach_zero_(Tensor(a!)[] self) -> ()
|
10591
10974
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10592
10975
|
variants: function
|
10593
10976
|
dispatch:
|
10594
|
-
CPU:
|
10595
|
-
CUDA:
|
10596
|
-
autogen:
|
10597
|
-
|
10598
|
-
- func: _foreach_lerp.List(Tensor[] self, Tensor[] tensors1, Tensor[] weights) -> Tensor[]
|
10599
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10600
|
-
variants: function
|
10601
|
-
dispatch:
|
10602
|
-
CPU: foreach_tensor_ternary_lerp_slow
|
10603
|
-
CUDA: foreach_tensor_lerp_ternary_cuda
|
10604
|
-
autogen: _foreach_lerp.List_out
|
10605
|
-
|
10606
|
-
- func: _foreach_lerp_.List(Tensor(a!)[] self, Tensor[] tensors1, Tensor[] weights) -> ()
|
10607
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10608
|
-
variants: function
|
10609
|
-
dispatch:
|
10610
|
-
CPU: foreach_tensor_ternary_lerp_slow_
|
10611
|
-
CUDA: foreach_tensor_lerp_ternary_cuda_
|
10612
|
-
autogen: _foreach_lerp.List_out
|
10613
|
-
|
10614
|
-
- func: _foreach_lerp.Scalar(Tensor[] self, Tensor[] tensors1, Scalar weight) -> Tensor[]
|
10615
|
-
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10616
|
-
variants: function
|
10617
|
-
dispatch:
|
10618
|
-
CPU: foreach_tensor_lerp_list_kernel_slow
|
10619
|
-
CUDA: foreach_tensor_lerp_list_cuda
|
10620
|
-
autogen: _foreach_lerp.Scalar_out
|
10977
|
+
CPU: foreach_tensor_zero_slow_
|
10978
|
+
CUDA: foreach_tensor_zero_cuda_
|
10979
|
+
autogen: _foreach_zero, _foreach_zero.out
|
10621
10980
|
|
10622
|
-
- func:
|
10623
|
-
device_check: NoCheck # foreach kernels fall back to slow path when
|
10981
|
+
- func: _foreach_copy_(Tensor(a!)[] self, Tensor[] src, bool non_blocking=False) -> ()
|
10982
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10624
10983
|
variants: function
|
10625
10984
|
dispatch:
|
10626
|
-
CPU:
|
10627
|
-
CUDA:
|
10628
|
-
autogen:
|
10985
|
+
CPU: foreach_tensor_copy_list_kernel_slow_
|
10986
|
+
CUDA: foreach_tensor_copy_list_kernel_cuda_
|
10987
|
+
autogen: _foreach_copy, _foreach_copy.out
|
10629
10988
|
|
10630
10989
|
- func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
|
10631
10990
|
dispatch:
|
@@ -10657,7 +11016,11 @@
|
|
10657
11016
|
dispatch:
|
10658
11017
|
CPU: searchsorted_cpu
|
10659
11018
|
CUDA: searchsorted_cuda
|
10660
|
-
|
11019
|
+
|
11020
|
+
- func: searchsorted.Scalar_out(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
|
11021
|
+
dispatch:
|
11022
|
+
CPU: searchsorted_out_cpu
|
11023
|
+
CUDA: searchsorted_out_cuda
|
10661
11024
|
|
10662
11025
|
- func: _convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor
|
10663
11026
|
structured_delegate: _convert_indices_from_coo_to_csr.out
|
@@ -10981,6 +11344,7 @@
|
|
10981
11344
|
python_module: nn
|
10982
11345
|
dispatch:
|
10983
11346
|
CPU, CUDA: hardsigmoid_out
|
11347
|
+
MPS: hardsigmoid_out_mps
|
10984
11348
|
QuantizedCPU: hardsigmoid_out_quantized_cpu
|
10985
11349
|
|
10986
11350
|
- func: hardsigmoid(Tensor self) -> Tensor
|
@@ -11001,6 +11365,7 @@
|
|
11001
11365
|
python_module: nn
|
11002
11366
|
dispatch:
|
11003
11367
|
CPU, CUDA: hardsigmoid_backward_out
|
11368
|
+
MPS: hardsigmoid_backward_out_mps
|
11004
11369
|
|
11005
11370
|
- func: hardsigmoid_backward(Tensor grad_output, Tensor self) -> Tensor
|
11006
11371
|
structured_delegate: hardsigmoid_backward.grad_input
|
@@ -11119,6 +11484,7 @@
|
|
11119
11484
|
dispatch:
|
11120
11485
|
CPU: log_sigmoid_forward_out_cpu
|
11121
11486
|
CUDA: log_sigmoid_forward_out_cuda
|
11487
|
+
MPS: log_sigmoid_forward_out_mps
|
11122
11488
|
|
11123
11489
|
- func: log_sigmoid_forward(Tensor self) -> (Tensor output, Tensor buffer)
|
11124
11490
|
device_check: NoCheck # TensorIterator
|
@@ -11126,18 +11492,21 @@
|
|
11126
11492
|
dispatch:
|
11127
11493
|
CPU: log_sigmoid_forward_cpu
|
11128
11494
|
CUDA: log_sigmoid_forward_cuda
|
11495
|
+
MPS: log_sigmoid_forward_mps
|
11129
11496
|
|
11130
11497
|
- func: log_sigmoid_backward.grad_input(Tensor grad_output, Tensor self, Tensor buffer, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11131
11498
|
python_module: nn
|
11132
11499
|
dispatch:
|
11133
11500
|
CPU: log_sigmoid_backward_cpu_out
|
11134
11501
|
CUDA: log_sigmoid_backward_cuda_out
|
11502
|
+
MPS: log_sigmoid_backward_mps_out
|
11135
11503
|
|
11136
11504
|
- func: log_sigmoid_backward(Tensor grad_output, Tensor self, Tensor buffer) -> Tensor
|
11137
11505
|
python_module: nn
|
11138
11506
|
dispatch:
|
11139
11507
|
CPU: log_sigmoid_backward_cpu
|
11140
11508
|
CUDA: log_sigmoid_backward_cuda
|
11509
|
+
MPS: log_sigmoid_backward_mps
|
11141
11510
|
|
11142
11511
|
- func: rrelu_with_noise.out(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None, *, Tensor(a!) out) -> Tensor(a!)
|
11143
11512
|
python_module: nn
|
@@ -11279,6 +11648,7 @@
|
|
11279
11648
|
CUDA: adaptive_avg_pool3d_cuda
|
11280
11649
|
QuantizedCPU: adaptive_avg_pool3d_quantized_cpu
|
11281
11650
|
autogen: _adaptive_avg_pool3d.out
|
11651
|
+
tags: core
|
11282
11652
|
|
11283
11653
|
- func: adaptive_avg_pool3d_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11284
11654
|
python_module: nn
|
@@ -11394,6 +11764,7 @@
|
|
11394
11764
|
dispatch:
|
11395
11765
|
MkldnnCPU: mkldnn_avg_pool3d
|
11396
11766
|
QuantizedCPU: avg_pool3d_quantized_cpu
|
11767
|
+
tags: core
|
11397
11768
|
|
11398
11769
|
- func: avg_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11399
11770
|
python_module: nn
|
@@ -11517,25 +11888,25 @@
|
|
11517
11888
|
CPU: max_pool3d_with_indices_backward_cpu
|
11518
11889
|
CUDA: max_pool3d_with_indices_backward_cuda
|
11519
11890
|
|
11520
|
-
- func: max_unpool2d.out(Tensor self, Tensor indices,
|
11891
|
+
- func: max_unpool2d.out(Tensor self, Tensor indices, SymInt[2] output_size, *, Tensor(a!) out) -> Tensor(a!)
|
11521
11892
|
python_module: nn
|
11522
11893
|
dispatch:
|
11523
11894
|
CPU: max_unpooling2d_forward_out_cpu
|
11524
11895
|
CUDA: max_unpooling2d_forward_out_cuda
|
11525
11896
|
|
11526
|
-
- func: max_unpool2d(Tensor self, Tensor indices,
|
11897
|
+
- func: max_unpool2d(Tensor self, Tensor indices, SymInt[2] output_size) -> Tensor
|
11527
11898
|
python_module: nn
|
11528
11899
|
dispatch:
|
11529
11900
|
CPU: max_unpooling2d_forward_cpu
|
11530
11901
|
CUDA: max_unpooling2d_forward_cuda
|
11531
11902
|
|
11532
|
-
- func: max_unpool3d.out(Tensor self, Tensor indices,
|
11903
|
+
- func: max_unpool3d.out(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding, *, Tensor(a!) out) -> Tensor(a!)
|
11533
11904
|
python_module: nn
|
11534
11905
|
dispatch:
|
11535
11906
|
CPU: max_unpooling3d_forward_out_cpu
|
11536
11907
|
CUDA: max_unpooling3d_forward_out_cuda
|
11537
11908
|
|
11538
|
-
- func: max_unpool3d(Tensor self, Tensor indices,
|
11909
|
+
- func: max_unpool3d(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding) -> Tensor
|
11539
11910
|
python_module: nn
|
11540
11911
|
dispatch:
|
11541
11912
|
CPU: max_unpooling3d_forward_cpu
|
@@ -11553,6 +11924,7 @@
|
|
11553
11924
|
- func: reflection_pad1d(Tensor self, SymInt[2] padding) -> Tensor
|
11554
11925
|
python_module: nn
|
11555
11926
|
structured_delegate: reflection_pad1d.out
|
11927
|
+
tags: core
|
11556
11928
|
|
11557
11929
|
- func: reflection_pad1d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[2] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11558
11930
|
python_module: nn
|
@@ -11607,6 +11979,7 @@
|
|
11607
11979
|
- func: reflection_pad3d(Tensor self, SymInt[6] padding) -> Tensor
|
11608
11980
|
python_module: nn
|
11609
11981
|
structured_delegate: reflection_pad3d.out
|
11982
|
+
tags: core
|
11610
11983
|
|
11611
11984
|
- func: reflection_pad3d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[6] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
|
11612
11985
|
python_module: nn
|
@@ -12069,6 +12442,7 @@
|
|
12069
12442
|
structured_inherits: TensorIteratorBase
|
12070
12443
|
dispatch:
|
12071
12444
|
CPU, CUDA: logit_backward_out
|
12445
|
+
MPS: logit_backward_out_mps
|
12072
12446
|
tags: pointwise
|
12073
12447
|
|
12074
12448
|
- func: logit_backward(Tensor grad_output, Tensor self, float? eps=None) -> Tensor
|
@@ -12715,157 +13089,229 @@
|
|
12715
13089
|
|
12716
13090
|
# torch.fft.fft
|
12717
13091
|
# NOTE: NOT an alias for torch.fft, which has different semantics
|
12718
|
-
- func: fft_fft(Tensor self,
|
13092
|
+
- func: fft_fft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12719
13093
|
python_module: fft
|
12720
13094
|
variants: function
|
13095
|
+
dispatch:
|
13096
|
+
CompositeImplicitAutograd: fft_fft_symint
|
12721
13097
|
|
12722
|
-
- func: fft_fft.out(Tensor self,
|
13098
|
+
- func: fft_fft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12723
13099
|
python_module: fft
|
12724
13100
|
variants: function
|
13101
|
+
dispatch:
|
13102
|
+
CompositeImplicitAutograd: fft_fft_symint_out
|
12725
13103
|
|
12726
|
-
- func: fft_ifft(Tensor self,
|
13104
|
+
- func: fft_ifft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12727
13105
|
python_module: fft
|
12728
13106
|
variants: function
|
13107
|
+
dispatch:
|
13108
|
+
CompositeImplicitAutograd: fft_ifft_symint
|
12729
13109
|
|
12730
|
-
- func: fft_ifft.out(Tensor self,
|
13110
|
+
- func: fft_ifft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12731
13111
|
python_module: fft
|
12732
13112
|
variants: function
|
13113
|
+
dispatch:
|
13114
|
+
CompositeImplicitAutograd: fft_ifft_symint_out
|
12733
13115
|
|
12734
|
-
- func: fft_rfft(Tensor self,
|
13116
|
+
- func: fft_rfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12735
13117
|
python_module: fft
|
12736
13118
|
variants: function
|
13119
|
+
dispatch:
|
13120
|
+
CompositeImplicitAutograd: fft_rfft_symint
|
12737
13121
|
|
12738
|
-
- func: fft_rfft.out(Tensor self,
|
13122
|
+
- func: fft_rfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12739
13123
|
python_module: fft
|
12740
13124
|
variants: function
|
13125
|
+
dispatch:
|
13126
|
+
CompositeImplicitAutograd: fft_rfft_symint_out
|
12741
13127
|
|
12742
|
-
- func: fft_irfft(Tensor self,
|
13128
|
+
- func: fft_irfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12743
13129
|
python_module: fft
|
12744
13130
|
variants: function
|
13131
|
+
dispatch:
|
13132
|
+
CompositeImplicitAutograd: fft_irfft_symint
|
12745
13133
|
|
12746
|
-
- func: fft_irfft.out(Tensor self,
|
13134
|
+
- func: fft_irfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12747
13135
|
python_module: fft
|
12748
13136
|
variants: function
|
13137
|
+
dispatch:
|
13138
|
+
CompositeImplicitAutograd: fft_irfft_symint_out
|
12749
13139
|
|
12750
|
-
- func: fft_hfft(Tensor self,
|
13140
|
+
- func: fft_hfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12751
13141
|
python_module: fft
|
12752
13142
|
variants: function
|
13143
|
+
dispatch:
|
13144
|
+
CompositeImplicitAutograd: fft_hfft_symint
|
12753
13145
|
|
12754
|
-
- func: fft_hfft.out(Tensor self,
|
13146
|
+
- func: fft_hfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12755
13147
|
python_module: fft
|
12756
13148
|
variants: function
|
13149
|
+
dispatch:
|
13150
|
+
CompositeImplicitAutograd: fft_hfft_symint_out
|
12757
13151
|
|
12758
|
-
- func: fft_ihfft(Tensor self,
|
13152
|
+
- func: fft_ihfft(Tensor self, SymInt? n=None, int dim=-1, str? norm=None) -> Tensor
|
12759
13153
|
python_module: fft
|
12760
13154
|
variants: function
|
13155
|
+
dispatch:
|
13156
|
+
CompositeImplicitAutograd: fft_ihfft_symint
|
12761
13157
|
|
12762
|
-
- func: fft_ihfft.out(Tensor self,
|
13158
|
+
- func: fft_ihfft.out(Tensor self, SymInt? n=None, int dim=-1, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12763
13159
|
python_module: fft
|
12764
13160
|
variants: function
|
13161
|
+
dispatch:
|
13162
|
+
CompositeImplicitAutograd: fft_ihfft_symint_out
|
12765
13163
|
|
12766
|
-
- func: fft_fft2(Tensor self,
|
13164
|
+
- func: fft_fft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12767
13165
|
python_module: fft
|
12768
13166
|
variants: function
|
13167
|
+
dispatch:
|
13168
|
+
CompositeImplicitAutograd: fft_fft2_symint
|
12769
13169
|
|
12770
|
-
- func: fft_fft2.out(Tensor self,
|
13170
|
+
- func: fft_fft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12771
13171
|
python_module: fft
|
12772
13172
|
variants: function
|
13173
|
+
dispatch:
|
13174
|
+
CompositeImplicitAutograd: fft_fft2_symint_out
|
12773
13175
|
|
12774
|
-
- func: fft_ifft2(Tensor self,
|
13176
|
+
- func: fft_ifft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12775
13177
|
python_module: fft
|
12776
13178
|
variants: function
|
13179
|
+
dispatch:
|
13180
|
+
CompositeImplicitAutograd: fft_ifft2_symint
|
12777
13181
|
|
12778
|
-
- func: fft_ifft2.out(Tensor self,
|
13182
|
+
- func: fft_ifft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12779
13183
|
python_module: fft
|
12780
13184
|
variants: function
|
13185
|
+
dispatch:
|
13186
|
+
CompositeImplicitAutograd: fft_ifft2_symint_out
|
12781
13187
|
|
12782
|
-
- func: fft_rfft2(Tensor self,
|
13188
|
+
- func: fft_rfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12783
13189
|
python_module: fft
|
12784
13190
|
variants: function
|
13191
|
+
dispatch:
|
13192
|
+
CompositeImplicitAutograd: fft_rfft2_symint
|
12785
13193
|
|
12786
|
-
- func: fft_rfft2.out(Tensor self,
|
13194
|
+
- func: fft_rfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12787
13195
|
python_module: fft
|
12788
13196
|
variants: function
|
13197
|
+
dispatch:
|
13198
|
+
CompositeImplicitAutograd: fft_rfft2_symint_out
|
12789
13199
|
|
12790
|
-
- func: fft_irfft2(Tensor self,
|
13200
|
+
- func: fft_irfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12791
13201
|
python_module: fft
|
12792
13202
|
variants: function
|
13203
|
+
dispatch:
|
13204
|
+
CompositeImplicitAutograd: fft_irfft2_symint
|
12793
13205
|
|
12794
|
-
- func: fft_irfft2.out(Tensor self,
|
13206
|
+
- func: fft_irfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12795
13207
|
python_module: fft
|
12796
13208
|
variants: function
|
13209
|
+
dispatch:
|
13210
|
+
CompositeImplicitAutograd: fft_irfft2_symint_out
|
12797
13211
|
|
12798
|
-
- func: fft_hfft2(Tensor self,
|
13212
|
+
- func: fft_hfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12799
13213
|
use_const_ref_for_mutable_tensors: True
|
12800
13214
|
python_module: fft
|
12801
13215
|
variants: function
|
13216
|
+
dispatch:
|
13217
|
+
CompositeImplicitAutograd: fft_hfft2_symint
|
12802
13218
|
|
12803
|
-
- func: fft_hfft2.out(Tensor self,
|
13219
|
+
- func: fft_hfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12804
13220
|
use_const_ref_for_mutable_tensors: True
|
12805
13221
|
python_module: fft
|
12806
13222
|
variants: function
|
13223
|
+
dispatch:
|
13224
|
+
CompositeImplicitAutograd: fft_hfft2_symint_out
|
12807
13225
|
|
12808
|
-
- func: fft_ihfft2(Tensor self,
|
13226
|
+
- func: fft_ihfft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
|
12809
13227
|
use_const_ref_for_mutable_tensors: True
|
12810
13228
|
python_module: fft
|
12811
13229
|
variants: function
|
13230
|
+
dispatch:
|
13231
|
+
CompositeImplicitAutograd: fft_ihfft2_symint
|
12812
13232
|
|
12813
|
-
- func: fft_ihfft2.out(Tensor self,
|
13233
|
+
- func: fft_ihfft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12814
13234
|
use_const_ref_for_mutable_tensors: True
|
12815
13235
|
python_module: fft
|
12816
13236
|
variants: function
|
13237
|
+
dispatch:
|
13238
|
+
CompositeImplicitAutograd: fft_ihfft2_symint_out
|
12817
13239
|
|
12818
|
-
- func: fft_fftn(Tensor self,
|
13240
|
+
- func: fft_fftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12819
13241
|
python_module: fft
|
12820
13242
|
variants: function
|
13243
|
+
dispatch:
|
13244
|
+
CompositeImplicitAutograd: fft_fftn_symint
|
12821
13245
|
|
12822
|
-
- func: fft_fftn.out(Tensor self,
|
13246
|
+
- func: fft_fftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12823
13247
|
python_module: fft
|
12824
13248
|
variants: function
|
13249
|
+
dispatch:
|
13250
|
+
CompositeImplicitAutograd: fft_fftn_symint_out
|
12825
13251
|
|
12826
|
-
- func: fft_ifftn(Tensor self,
|
13252
|
+
- func: fft_ifftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12827
13253
|
python_module: fft
|
12828
13254
|
variants: function
|
13255
|
+
dispatch:
|
13256
|
+
CompositeImplicitAutograd: fft_ifftn_symint
|
12829
13257
|
|
12830
|
-
- func: fft_ifftn.out(Tensor self,
|
13258
|
+
- func: fft_ifftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12831
13259
|
python_module: fft
|
12832
13260
|
variants: function
|
13261
|
+
dispatch:
|
13262
|
+
CompositeImplicitAutograd: fft_ifftn_symint_out
|
12833
13263
|
|
12834
|
-
- func: fft_rfftn(Tensor self,
|
13264
|
+
- func: fft_rfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12835
13265
|
python_module: fft
|
12836
13266
|
variants: function
|
13267
|
+
dispatch:
|
13268
|
+
CompositeImplicitAutograd: fft_rfftn_symint
|
12837
13269
|
|
12838
|
-
- func: fft_rfftn.out(Tensor self,
|
13270
|
+
- func: fft_rfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12839
13271
|
python_module: fft
|
12840
13272
|
variants: function
|
13273
|
+
dispatch:
|
13274
|
+
CompositeImplicitAutograd: fft_rfftn_symint_out
|
12841
13275
|
|
12842
|
-
- func: fft_irfftn(Tensor self,
|
13276
|
+
- func: fft_irfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12843
13277
|
python_module: fft
|
12844
13278
|
variants: function
|
13279
|
+
dispatch:
|
13280
|
+
CompositeImplicitAutograd: fft_irfftn_symint
|
12845
13281
|
|
12846
|
-
- func: fft_irfftn.out(Tensor self,
|
13282
|
+
- func: fft_irfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12847
13283
|
python_module: fft
|
12848
13284
|
variants: function
|
13285
|
+
dispatch:
|
13286
|
+
CompositeImplicitAutograd: fft_irfftn_symint_out
|
12849
13287
|
|
12850
|
-
- func: fft_hfftn(Tensor self,
|
13288
|
+
- func: fft_hfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12851
13289
|
use_const_ref_for_mutable_tensors: True
|
12852
13290
|
python_module: fft
|
12853
13291
|
variants: function
|
13292
|
+
dispatch:
|
13293
|
+
CompositeImplicitAutograd: fft_hfftn_symint
|
12854
13294
|
|
12855
|
-
- func: fft_hfftn.out(Tensor self,
|
13295
|
+
- func: fft_hfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12856
13296
|
use_const_ref_for_mutable_tensors: True
|
12857
13297
|
python_module: fft
|
12858
13298
|
variants: function
|
13299
|
+
dispatch:
|
13300
|
+
CompositeImplicitAutograd: fft_hfftn_symint_out
|
12859
13301
|
|
12860
|
-
- func: fft_ihfftn(Tensor self,
|
13302
|
+
- func: fft_ihfftn(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
|
12861
13303
|
use_const_ref_for_mutable_tensors: True
|
12862
13304
|
python_module: fft
|
12863
13305
|
variants: function
|
13306
|
+
dispatch:
|
13307
|
+
CompositeImplicitAutograd: fft_ihfftn_symint
|
12864
13308
|
|
12865
|
-
- func: fft_ihfftn.out(Tensor self,
|
13309
|
+
- func: fft_ihfftn.out(Tensor self, SymInt[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
|
12866
13310
|
use_const_ref_for_mutable_tensors: True
|
12867
13311
|
python_module: fft
|
12868
13312
|
variants: function
|
13313
|
+
dispatch:
|
13314
|
+
CompositeImplicitAutograd: fft_ihfftn_symint_out
|
12869
13315
|
|
12870
13316
|
- func: fft_fftfreq(int n, float d=1.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
12871
13317
|
python_module: fft
|
@@ -13210,6 +13656,7 @@
|
|
13210
13656
|
structured: True
|
13211
13657
|
dispatch:
|
13212
13658
|
CPU, CUDA: linalg_vector_norm_out
|
13659
|
+
MPS: linalg_vector_norm_out_mps
|
13213
13660
|
|
13214
13661
|
- func: linalg_matrix_norm(Tensor self, Scalar ord, int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
13215
13662
|
python_module: linalg
|
@@ -13788,6 +14235,7 @@
|
|
13788
14235
|
dispatch:
|
13789
14236
|
NestedTensorCPU: NestedTensor_softmax_dropout
|
13790
14237
|
NestedTensorCUDA: NestedTensor_softmax_dropout_cuda
|
14238
|
+
tags: nondeterministic_seeded
|
13791
14239
|
|
13792
14240
|
# Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
|
13793
14241
|
- func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
|
@@ -13803,67 +14251,71 @@
|
|
13803
14251
|
CUDA, NestedTensorCUDA: native_multi_head_attention_cuda
|
13804
14252
|
autogen: _native_multi_head_attention.out
|
13805
14253
|
|
13806
|
-
- func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False) -> Tensor
|
14254
|
+
- func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> Tensor
|
13807
14255
|
python_module: nn
|
13808
14256
|
variants: function
|
13809
14257
|
autogen: scaled_dot_product_attention.out
|
13810
|
-
|
13811
|
-
# TODO: THIS NEEDS TO BE REMOVED BUT PEOPLE HAVE TRAINED THEIR MODELS WITH THIS OP BUILTIN
|
13812
|
-
- func: _scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool need_attn_weights=False, bool is_causal=False) -> (Tensor, Tensor)
|
13813
|
-
python_module: nn
|
13814
|
-
variants: function
|
13815
|
-
autogen: _scaled_dot_product_attention.out
|
14258
|
+
tags: nondeterministic_seeded
|
13816
14259
|
|
13817
14260
|
# This aten function is kept so that we can test the choice function from Python
|
13818
|
-
- func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False) -> int
|
14261
|
+
- func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> int
|
13819
14262
|
dispatch:
|
13820
14263
|
Meta: _fused_sdp_choice_meta
|
13821
14264
|
CPU, NestedTensorCPU: _fused_sdp_choice_cpp
|
13822
14265
|
CUDA, NestedTensorCUDA: _fused_sdp_choice_cuda
|
14266
|
+
tags: nondeterministic_seeded
|
13823
14267
|
|
13824
|
-
- func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None) -> (Tensor, Tensor)
|
14268
|
+
- func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
|
13825
14269
|
variants: function
|
14270
|
+
tags: nondeterministic_seeded
|
13826
14271
|
|
13827
|
-
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False) -> (Tensor ouput, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k,
|
14272
|
+
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor ouput, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
13828
14273
|
dispatch:
|
14274
|
+
CPU: _scaled_dot_product_flash_attention_cpu
|
13829
14275
|
CUDA: _scaled_dot_product_flash_attention_cuda
|
13830
14276
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
|
14277
|
+
tags: nondeterministic_seeded
|
13831
14278
|
|
13832
|
-
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal,
|
14279
|
+
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14280
|
+
device_check: NoCheck
|
13833
14281
|
variants: function
|
13834
14282
|
dispatch:
|
14283
|
+
CPU: _scaled_dot_product_flash_attention_backward_cpu
|
13835
14284
|
CUDA: _scaled_dot_product_flash_attention_backward_cuda
|
13836
14285
|
|
13837
|
-
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, bool compute_log_sumexp, bool is_causal=False) -> (Tensor, Tensor)
|
14286
|
+
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
|
13838
14287
|
dispatch:
|
13839
14288
|
CUDA: _scaled_dot_product_efficient_attention_cuda
|
13840
14289
|
NestedTensorCUDA: _scaled_dot_product_efficient_attention_nestedtensor_cuda
|
14290
|
+
tags: nondeterministic_seeded
|
13841
14291
|
|
13842
|
-
- func: _scaled_dot_product_efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, bool is_causal=False,
|
14292
|
+
- func: _scaled_dot_product_efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor attn_bias, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, float dropout_p, bool[4] grad_input_mask, bool is_causal=False, *, float? scale=None) -> (Tensor, Tensor, Tensor, Tensor)
|
14293
|
+
device_check: NoCheck
|
13843
14294
|
dispatch:
|
13844
14295
|
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
|
14296
|
+
tags: nondeterministic_seeded
|
13845
14297
|
|
13846
|
-
- func:
|
13847
|
-
dispatch:
|
13848
|
-
CUDA: _chunk_grad_outputs_efficient_attention
|
13849
|
-
|
13850
|
-
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, bool return_debug_mask) -> (Tensor output, Tensor softmax_logsumexp, int philox_seed, int philox_offset, Tensor debug_attn_mask)
|
14298
|
+
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
13851
14299
|
variants: function
|
13852
14300
|
dispatch:
|
13853
14301
|
CUDA: _flash_attention_forward
|
14302
|
+
tags: nondeterministic_seeded
|
13854
14303
|
|
13855
|
-
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal,
|
14304
|
+
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14305
|
+
device_check: NoCheck
|
13856
14306
|
variants: function
|
13857
14307
|
dispatch:
|
13858
14308
|
CUDA: _flash_attention_backward
|
13859
14309
|
|
13860
14310
|
# Returns ouput, logsumexp if compute_logsumexp
|
13861
|
-
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, bool compute_log_sumexp=False,
|
14311
|
+
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
|
13862
14312
|
variants: function
|
13863
14313
|
dispatch:
|
13864
14314
|
CUDA: _efficient_attention_forward
|
14315
|
+
tags: nondeterministic_seeded
|
13865
14316
|
|
13866
|
-
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, bool
|
14317
|
+
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int max_seqlen_k, int max_seqlen_q, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
|
14318
|
+
device_check: NoCheck
|
13867
14319
|
variants: function
|
13868
14320
|
dispatch:
|
13869
14321
|
CUDA: _efficient_attention_backward
|
@@ -13872,8 +14324,15 @@
|
|
13872
14324
|
variants: function
|
13873
14325
|
dispatch:
|
13874
14326
|
CUDA: triton_scaled_dot_attention
|
14327
|
+
tags: nondeterministic_seeded
|
13875
14328
|
autogen: _triton_scaled_dot_attention.out
|
13876
14329
|
|
14330
|
+
- func: _fill_mem_eff_dropout_mask_(Tensor(a!) self, float dropout_p, int seed, int offset) -> Tensor(a!)
|
14331
|
+
variants: function
|
14332
|
+
dispatch:
|
14333
|
+
CUDA: _fill_mem_eff_dropout_mask_
|
14334
|
+
tags: nondeterministic_seeded
|
14335
|
+
|
13877
14336
|
- func: _triton_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor
|
13878
14337
|
variants: function
|
13879
14338
|
dispatch:
|
@@ -13895,18 +14354,6 @@
|
|
13895
14354
|
variants: function
|
13896
14355
|
tags: pointwise
|
13897
14356
|
|
13898
|
-
- func: _transformer_decoder_only_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, Tensor? incr_key=None, Tensor? incr_value=None) -> (Tensor, Tensor, Tensor)
|
13899
|
-
variants: function
|
13900
|
-
dispatch:
|
13901
|
-
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: transformer_decoder_only_layer_forward
|
13902
|
-
autogen: _transformer_decoder_only_layer_fwd.out
|
13903
|
-
|
13904
|
-
- func: _native_decoder_only_multi_head_attention(Tensor query, Tensor key, Tensor value, int embed_dim, int num_head, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None, Tensor? incr_key=None, Tensor? incr_value=None, bool need_weights=True, bool average_attn_weights=True) -> (Tensor, Tensor, Tensor, Tensor)
|
13905
|
-
variants: function
|
13906
|
-
dispatch:
|
13907
|
-
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: native_decoder_only_multi_head_attention
|
13908
|
-
autogen: _native_decoder_only_multi_head_attention.out
|
13909
|
-
|
13910
14357
|
- func: special_bessel_j0(Tensor self) -> Tensor
|
13911
14358
|
python_module: special
|
13912
14359
|
structured_delegate: special_bessel_j0.out
|
@@ -14603,9 +15050,31 @@
|
|
14603
15050
|
CUDA: _fused_adam_kernel_cuda_
|
14604
15051
|
autogen: _fused_adam, _fused_adam.out
|
14605
15052
|
|
15053
|
+
- func: _fused_adam_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15054
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now),
|
15055
|
+
# but still skip the device check as the Tensor LR can be on CPU
|
15056
|
+
device_check: NoCheck
|
15057
|
+
variants: function
|
15058
|
+
dispatch:
|
15059
|
+
CUDA: _fused_adam_kernel_cuda_
|
15060
|
+
autogen: _fused_adam.tensor_lr, _fused_adam.tensor_lr_out
|
15061
|
+
|
14606
15062
|
- func: _fused_adamw_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
14607
15063
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
14608
15064
|
variants: function
|
14609
15065
|
dispatch:
|
14610
15066
|
CUDA: _fused_adamw_kernel_cuda_
|
14611
15067
|
autogen: _fused_adamw, _fused_adamw.out
|
15068
|
+
|
15069
|
+
- func: _fused_adamw_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15070
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now),
|
15071
|
+
# but still skip the device check as the Tensor LR can be on CPU
|
15072
|
+
device_check: NoCheck
|
15073
|
+
variants: function
|
15074
|
+
dispatch:
|
15075
|
+
CUDA: _fused_adamw_kernel_cuda_
|
15076
|
+
autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
|
15077
|
+
|
15078
|
+
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15079
|
+
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15080
|
+
variants: function
|