torch-rb 0.16.0 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +2 -1
- data/codegen/generate_functions.rb +6 -6
- data/codegen/native_functions.yaml +269 -161
- data/ext/torch/fft_functions.h +6 -0
- data/ext/torch/linalg_functions.h +6 -0
- data/ext/torch/nn_functions.h +6 -0
- data/ext/torch/sparse_functions.h +6 -0
- data/ext/torch/special_functions.h +6 -0
- data/ext/torch/tensor_functions.h +6 -0
- data/ext/torch/torch_functions.h +6 -0
- data/ext/torch/utils.h +1 -1
- data/lib/torch/nn/functional.rb +11 -1
- data/lib/torch/nn/functional_attention.rb +5 -5
- data/lib/torch/nn/module.rb +24 -4
- data/lib/torch/tensor.rb +10 -4
- data/lib/torch/version.rb +1 -1
- metadata +11 -4
@@ -549,8 +549,8 @@
|
|
549
549
|
structured_delegate: add.out
|
550
550
|
variants: function, method
|
551
551
|
dispatch:
|
552
|
-
SparseCPU, SparseCUDA: add_sparse
|
553
|
-
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr
|
552
|
+
SparseCPU, SparseCUDA, SparseMeta: add_sparse
|
553
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr
|
554
554
|
MkldnnCPU: mkldnn_add
|
555
555
|
ZeroTensor: add_zerotensor
|
556
556
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
@@ -561,8 +561,8 @@
|
|
561
561
|
variants: method
|
562
562
|
structured_delegate: add.out
|
563
563
|
dispatch:
|
564
|
-
SparseCPU, SparseCUDA: add_sparse_
|
565
|
-
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr_
|
564
|
+
SparseCPU, SparseCUDA, SparseMeta: add_sparse_
|
565
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr_
|
566
566
|
MkldnnCPU: mkldnn_add_
|
567
567
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
568
568
|
tags: pointwise
|
@@ -575,9 +575,9 @@
|
|
575
575
|
Generic: add (AllAndComplex, BFloat16, Half, ComplexHalf)
|
576
576
|
ScalarOnly: add (Bool)
|
577
577
|
dispatch:
|
578
|
-
SparseCPU: add_out_sparse_cpu
|
578
|
+
SparseCPU, SparseMeta: add_out_sparse_cpu
|
579
579
|
SparseCUDA: add_out_sparse_cuda
|
580
|
-
SparseCsrCPU: add_out_sparse_compressed_cpu
|
580
|
+
SparseCsrCPU, SparseCsrMeta: add_out_sparse_compressed_cpu
|
581
581
|
SparseCsrCUDA: add_out_sparse_compressed_cuda
|
582
582
|
MkldnnCPU: mkldnn_add_out
|
583
583
|
MPS: add_out_mps
|
@@ -1750,6 +1750,7 @@
|
|
1750
1750
|
- func: copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor
|
1751
1751
|
variants: function
|
1752
1752
|
dispatch:
|
1753
|
+
Meta: copy_meta
|
1753
1754
|
CompositeExplicitAutogradNonFunctional: copy
|
1754
1755
|
tags: core
|
1755
1756
|
|
@@ -3127,6 +3128,7 @@
|
|
3127
3128
|
structured: True
|
3128
3129
|
dispatch:
|
3129
3130
|
CPU, CUDA: isin_Tensor_Tensor_out
|
3131
|
+
MPS: isin_Tensor_Tensor_out_mps
|
3130
3132
|
|
3131
3133
|
- func: isin.Tensor_Tensor(Tensor elements, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor
|
3132
3134
|
variants: function
|
@@ -3268,6 +3270,8 @@
|
|
3268
3270
|
autogen: native_layer_norm_backward.out
|
3269
3271
|
tags: core
|
3270
3272
|
|
3273
|
+
- func: rms_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, float? eps=None) -> Tensor
|
3274
|
+
|
3271
3275
|
- func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
|
3272
3276
|
variants: function, method
|
3273
3277
|
dispatch:
|
@@ -3340,10 +3344,31 @@
|
|
3340
3344
|
dispatch:
|
3341
3345
|
CUDA: _cslt_sparse_mm_search
|
3342
3346
|
|
3347
|
+
- func: _sparse_semi_structured_tile(Tensor input, str algorithm="", bool use_cutlass=True) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
|
3348
|
+
dispatch:
|
3349
|
+
CUDA: _sparse_semi_structured_tile
|
3350
|
+
|
3351
|
+
- func: _sparse_semi_structured_apply(Tensor input, Tensor thread_masks) -> (Tensor, Tensor)
|
3352
|
+
dispatch:
|
3353
|
+
CUDA: _sparse_semi_structured_apply
|
3354
|
+
|
3355
|
+
- func: _sparse_semi_structured_apply_dense(Tensor input, Tensor thread_masks) -> Tensor
|
3356
|
+
dispatch:
|
3357
|
+
CUDA: _sparse_semi_structured_apply_dense
|
3358
|
+
|
3359
|
+
# DEPRECATED: Use torch.__sparse_semi_structured_mm/torch._sparse_semi_structured_addmm instead
|
3343
3360
|
- func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
|
3344
3361
|
dispatch:
|
3345
3362
|
CUDA: _sparse_semi_structured_linear
|
3346
3363
|
|
3364
|
+
- func: _sparse_semi_structured_mm(Tensor mat1, Tensor mat1_meta, Tensor mat2, *, ScalarType? out_dtype=None) -> Tensor
|
3365
|
+
dispatch:
|
3366
|
+
CUDA: _sparse_semi_structured_mm
|
3367
|
+
|
3368
|
+
- func: _sparse_semi_structured_addmm(Tensor input, Tensor mat1, Tensor mat1_meta, Tensor mat2, *, Scalar alpha=1, Scalar beta=1, ScalarType? out_dtype=None) -> Tensor
|
3369
|
+
dispatch:
|
3370
|
+
CUDA: _sparse_semi_structured_addmm
|
3371
|
+
|
3347
3372
|
- func: _mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor
|
3348
3373
|
dispatch:
|
3349
3374
|
CUDA: _mixed_dtypes_linear
|
@@ -4084,10 +4109,12 @@
|
|
4084
4109
|
|
4085
4110
|
- func: _int_mm(Tensor self, Tensor mat2) -> Tensor
|
4086
4111
|
dispatch:
|
4112
|
+
CPU: _int_mm_cpu
|
4087
4113
|
CUDA: _int_mm_cuda
|
4088
4114
|
|
4089
4115
|
- func: _int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
|
4090
4116
|
dispatch:
|
4117
|
+
CPU: _int_mm_out_cpu
|
4091
4118
|
CUDA: _int_mm_out_cuda
|
4092
4119
|
|
4093
4120
|
- func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
|
@@ -4098,11 +4125,13 @@
|
|
4098
4125
|
- func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
|
4099
4126
|
dispatch:
|
4100
4127
|
CPU: _weight_int4pack_mm_cpu
|
4128
|
+
MPS: _weight_int4pack_mm_mps
|
4101
4129
|
CUDA: _weight_int4pack_mm_cuda
|
4102
4130
|
|
4103
4131
|
- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
|
4104
4132
|
dispatch:
|
4105
4133
|
CPU: _weight_int8pack_mm_cpu
|
4134
|
+
MPS: _weight_int8pack_mm_mps
|
4106
4135
|
|
4107
4136
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
4108
4137
|
python_module: sparse
|
@@ -5397,7 +5426,7 @@
|
|
5397
5426
|
autogen: slice_backward.out
|
5398
5427
|
|
5399
5428
|
# NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
|
5400
|
-
# slice.Tensor, split_with_sizes, et
|
5429
|
+
# slice.Tensor, split_with_sizes, et al.). Currently, these are only used during fake-ification
|
5401
5430
|
# of PT2 graph input subclass instances that are views. This means:
|
5402
5431
|
# * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
|
5403
5432
|
# * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
|
@@ -5620,10 +5649,12 @@
|
|
5620
5649
|
- func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
|
5621
5650
|
dispatch:
|
5622
5651
|
CompositeExplicitAutograd: _chunk_cat
|
5652
|
+
CUDA: _chunk_cat_cuda
|
5623
5653
|
|
5624
5654
|
- func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
|
5625
5655
|
dispatch:
|
5626
5656
|
CompositeExplicitAutograd: _chunk_cat_out
|
5657
|
+
CUDA: _chunk_cat_out_cuda
|
5627
5658
|
|
5628
5659
|
- func: stack(Tensor[] tensors, int dim=0) -> Tensor
|
5629
5660
|
dispatch:
|
@@ -5689,8 +5720,8 @@
|
|
5689
5720
|
variants: function, method
|
5690
5721
|
dispatch:
|
5691
5722
|
CompositeExplicitAutograd: sum
|
5692
|
-
SparseCPU, SparseCUDA: sum_coo
|
5693
|
-
SparseCsrCPU, SparseCsrCUDA: sum_csr
|
5723
|
+
SparseCPU, SparseCUDA, SparseMeta: sum_coo
|
5724
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_csr
|
5694
5725
|
autogen: sum.out
|
5695
5726
|
|
5696
5727
|
- func: sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
@@ -6200,6 +6231,12 @@
|
|
6200
6231
|
category_override: dummy
|
6201
6232
|
dispatch: {}
|
6202
6233
|
|
6234
|
+
- func: _nested_compute_contiguous_strides_offsets(Tensor nested_size) -> (Tensor, Tensor)
|
6235
|
+
variants: function
|
6236
|
+
device_check: NoCheck
|
6237
|
+
dispatch:
|
6238
|
+
CPU, CUDA: _nested_compute_contiguous_strides_offsets
|
6239
|
+
|
6203
6240
|
- func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
|
6204
6241
|
dispatch:
|
6205
6242
|
# calls unsqueeze
|
@@ -6465,7 +6502,7 @@
|
|
6465
6502
|
CPU: _efficientzerotensor
|
6466
6503
|
CUDA: _efficientzerotensor_cuda
|
6467
6504
|
MPS: _efficientzerotensor_mps
|
6468
|
-
Meta:
|
6505
|
+
Meta: _efficientzerotensor_meta_symint
|
6469
6506
|
autogen: _efficientzerotensor.out
|
6470
6507
|
|
6471
6508
|
- func: zeros(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -6542,6 +6579,32 @@
|
|
6542
6579
|
SparseCPU, SparseCUDA: norm_sparse
|
6543
6580
|
autogen: native_norm.ScalarOpt_dim_dtype_out
|
6544
6581
|
|
6582
|
+
- func: _batch_norm_with_update(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor)
|
6583
|
+
dispatch:
|
6584
|
+
CPU: _batch_norm_with_update_cpu
|
6585
|
+
CUDA: _batch_norm_with_update_cuda
|
6586
|
+
MPS: _batch_norm_with_update_mps
|
6587
|
+
MkldnnCPU: _batch_norm_with_update_mkldnn
|
6588
|
+
autogen: _batch_norm_with_update_functional
|
6589
|
+
|
6590
|
+
- func: _batch_norm_with_update.out(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, float momentum, float eps, *, Tensor(d!) out, Tensor(e!) save_mean, Tensor(f!) save_invstd, Tensor(g!) reserve) -> (Tensor(d!), Tensor(e!), Tensor(f!), Tensor(g!))
|
6591
|
+
dispatch:
|
6592
|
+
CPU: _batch_norm_with_update_cpu_out
|
6593
|
+
CUDA: _batch_norm_with_update_cuda_out
|
6594
|
+
MPS: _batch_norm_with_update_mps_out
|
6595
|
+
|
6596
|
+
- func: _batch_norm_no_update(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor)
|
6597
|
+
dispatch:
|
6598
|
+
CompositeExplicitAutograd: _batch_norm_no_update
|
6599
|
+
autogen: _batch_norm_no_update.out
|
6600
|
+
|
6601
|
+
- func: batch_norm_backward(Tensor grad_out, Tensor input, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, bool update, float eps, bool[3] output_mask, Tensor reserve) -> (Tensor, Tensor, Tensor)
|
6602
|
+
dispatch:
|
6603
|
+
CPU: _new_batch_norm_backward_cpu
|
6604
|
+
CUDA: _new_batch_norm_backward_cuda
|
6605
|
+
MPS: _new_batch_norm_backward_mps
|
6606
|
+
MkldnnCPU: _new_batch_norm_backward_mkldnn
|
6607
|
+
|
6545
6608
|
# TODO: reduce signatures down to one when optional args is available
|
6546
6609
|
- func: _sparse_sum(Tensor self) -> Tensor
|
6547
6610
|
|
@@ -7042,6 +7105,10 @@
|
|
7042
7105
|
# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
|
7043
7106
|
# the default would never make sense.
|
7044
7107
|
|
7108
|
+
- func: _sparse_compressed_tensor_with_dims(int nnz, int dense_dim, int[] size, int[] blocksize, ScalarType index_dtype, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
7109
|
+
dispatch:
|
7110
|
+
CompositeExplicitAutograd: sparse_compressed_tensor_with_dims
|
7111
|
+
|
7045
7112
|
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
7046
7113
|
dispatch:
|
7047
7114
|
CompositeExplicitAutograd: sparse_compressed_tensor
|
@@ -7146,9 +7213,9 @@
|
|
7146
7213
|
- func: sparse_dim(Tensor self) -> int
|
7147
7214
|
variants: method
|
7148
7215
|
dispatch:
|
7149
|
-
CPU, CUDA: sparse_dim_strided
|
7150
7216
|
SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
|
7151
7217
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
|
7218
|
+
CompositeExplicitAutograd: sparse_dim_default
|
7152
7219
|
device_check: NoCheck
|
7153
7220
|
device_guard: False
|
7154
7221
|
|
@@ -7163,9 +7230,9 @@
|
|
7163
7230
|
- func: dense_dim(Tensor self) -> int
|
7164
7231
|
variants: method
|
7165
7232
|
dispatch:
|
7166
|
-
CPU, CUDA: dense_dim_strided
|
7167
7233
|
SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
|
7168
7234
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
|
7235
|
+
CompositeExplicitAutograd: dense_dim_default
|
7169
7236
|
device_check: NoCheck
|
7170
7237
|
device_guard: False
|
7171
7238
|
|
@@ -7296,7 +7363,7 @@
|
|
7296
7363
|
device_check: NoCheck # Allows copy into different device
|
7297
7364
|
variants: function
|
7298
7365
|
dispatch:
|
7299
|
-
SparseCPU, SparseCUDA: copy_sparse_
|
7366
|
+
SparseCPU, SparseCUDA, SparseMeta: copy_sparse_
|
7300
7367
|
autogen: copy_sparse_to_sparse, copy_sparse_to_sparse.out
|
7301
7368
|
|
7302
7369
|
# By adding the AutogradNestedTensor this makes this function CompositeImplicit-like for nested tensors
|
@@ -7399,7 +7466,7 @@
|
|
7399
7466
|
MkldnnCPU: mkldnn_reorder_conv2d_weight
|
7400
7467
|
autogen: mkldnn_reorder_conv2d_weight.out
|
7401
7468
|
|
7402
|
-
- func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
|
7469
|
+
- func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor
|
7403
7470
|
variants: function
|
7404
7471
|
python_module: nn
|
7405
7472
|
dispatch:
|
@@ -7647,7 +7714,7 @@
|
|
7647
7714
|
|
7648
7715
|
- func: result_type.Scalar_Scalar(Scalar scalar1, Scalar scalar2) -> ScalarType
|
7649
7716
|
|
7650
|
-
- func: can_cast(ScalarType
|
7717
|
+
- func: can_cast(ScalarType from_, ScalarType to) -> bool
|
7651
7718
|
variants: function
|
7652
7719
|
|
7653
7720
|
- func: promote_types(ScalarType type1, ScalarType type2) -> ScalarType
|
@@ -10222,6 +10289,7 @@
|
|
10222
10289
|
variants: method, function
|
10223
10290
|
dispatch:
|
10224
10291
|
CompositeExplicitAutograd: alias
|
10292
|
+
NestedTensorCPU, NestedTensorCUDA: alias_nested
|
10225
10293
|
tags: core
|
10226
10294
|
|
10227
10295
|
- func: _amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
|
@@ -10255,14 +10323,14 @@
|
|
10255
10323
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10256
10324
|
variants: function
|
10257
10325
|
dispatch:
|
10258
|
-
|
10326
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalar_kernel_slow
|
10259
10327
|
CUDA: foreach_tensor_add_scalar_kernel_cuda
|
10260
10328
|
|
10261
10329
|
- func: _foreach_add_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10262
10330
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10263
10331
|
variants: function
|
10264
10332
|
dispatch:
|
10265
|
-
|
10333
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalar_kernel_slow_
|
10266
10334
|
CUDA: foreach_tensor_add_scalar_kernel_cuda_
|
10267
10335
|
autogen: _foreach_add.Scalar_out
|
10268
10336
|
|
@@ -10270,14 +10338,14 @@
|
|
10270
10338
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10271
10339
|
variants: function
|
10272
10340
|
dispatch:
|
10273
|
-
|
10341
|
+
CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow
|
10274
10342
|
CUDA: foreach_tensor_add_list_kernel_cuda
|
10275
10343
|
|
10276
10344
|
- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
10277
10345
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10278
10346
|
variants: function
|
10279
10347
|
dispatch:
|
10280
|
-
|
10348
|
+
CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow_
|
10281
10349
|
CUDA: foreach_tensor_add_list_kernel_cuda_
|
10282
10350
|
autogen: _foreach_add.List_out
|
10283
10351
|
|
@@ -10285,14 +10353,14 @@
|
|
10285
10353
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10286
10354
|
variants: function
|
10287
10355
|
dispatch:
|
10288
|
-
|
10356
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalarlist_kernel_slow
|
10289
10357
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda
|
10290
10358
|
|
10291
10359
|
- func: _foreach_add_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10292
10360
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10293
10361
|
variants: function
|
10294
10362
|
dispatch:
|
10295
|
-
|
10363
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalarlist_kernel_slow_
|
10296
10364
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
|
10297
10365
|
autogen: _foreach_add.ScalarList_out
|
10298
10366
|
|
@@ -10300,14 +10368,14 @@
|
|
10300
10368
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10301
10369
|
variants: function
|
10302
10370
|
dispatch:
|
10303
|
-
|
10371
|
+
CompositeExplicitAutograd: foreach_tensor_add_tensor_kernel_slow
|
10304
10372
|
CUDA: foreach_tensor_add_tensor_kernel_cuda
|
10305
10373
|
|
10306
10374
|
- func: _foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()
|
10307
10375
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10308
10376
|
variants: function
|
10309
10377
|
dispatch:
|
10310
|
-
|
10378
|
+
CompositeExplicitAutograd: foreach_tensor_add_tensor_kernel_slow_
|
10311
10379
|
CUDA: foreach_tensor_add_tensor_kernel_cuda_
|
10312
10380
|
autogen: _foreach_add.Tensor_out
|
10313
10381
|
|
@@ -10315,14 +10383,14 @@
|
|
10315
10383
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10316
10384
|
variants: function
|
10317
10385
|
dispatch:
|
10318
|
-
|
10386
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalar_kernel_slow
|
10319
10387
|
CUDA: foreach_tensor_sub_scalar_kernel_cuda
|
10320
10388
|
|
10321
10389
|
- func: _foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10322
10390
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10323
10391
|
variants: function
|
10324
10392
|
dispatch:
|
10325
|
-
|
10393
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalar_kernel_slow_
|
10326
10394
|
CUDA: foreach_tensor_sub_scalar_kernel_cuda_
|
10327
10395
|
autogen: _foreach_sub.Scalar_out
|
10328
10396
|
|
@@ -10330,14 +10398,14 @@
|
|
10330
10398
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10331
10399
|
variants: function
|
10332
10400
|
dispatch:
|
10333
|
-
|
10401
|
+
CompositeExplicitAutograd: foreach_tensor_sub_list_kernel_slow
|
10334
10402
|
CUDA: foreach_tensor_sub_list_kernel_cuda
|
10335
10403
|
|
10336
10404
|
- func: _foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
10337
10405
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10338
10406
|
variants: function
|
10339
10407
|
dispatch:
|
10340
|
-
|
10408
|
+
CompositeExplicitAutograd: foreach_tensor_sub_list_kernel_slow_
|
10341
10409
|
CUDA: foreach_tensor_sub_list_kernel_cuda_
|
10342
10410
|
autogen: _foreach_sub.List_out
|
10343
10411
|
|
@@ -10345,14 +10413,14 @@
|
|
10345
10413
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10346
10414
|
variants: function
|
10347
10415
|
dispatch:
|
10348
|
-
|
10416
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalarlist_kernel_slow
|
10349
10417
|
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda
|
10350
10418
|
|
10351
10419
|
- func: _foreach_sub_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10352
10420
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10353
10421
|
variants: function
|
10354
10422
|
dispatch:
|
10355
|
-
|
10423
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalarlist_kernel_slow_
|
10356
10424
|
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
|
10357
10425
|
autogen: _foreach_sub.ScalarList_out
|
10358
10426
|
|
@@ -10360,14 +10428,14 @@
|
|
10360
10428
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10361
10429
|
variants: function
|
10362
10430
|
dispatch:
|
10363
|
-
|
10431
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalar_kernel_slow
|
10364
10432
|
CUDA: foreach_tensor_mul_scalar_kernel_cuda
|
10365
10433
|
|
10366
10434
|
- func: _foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10367
10435
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10368
10436
|
variants: function
|
10369
10437
|
dispatch:
|
10370
|
-
|
10438
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalar_kernel_slow_
|
10371
10439
|
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
|
10372
10440
|
autogen: _foreach_mul.Scalar_out
|
10373
10441
|
|
@@ -10375,14 +10443,14 @@
|
|
10375
10443
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10376
10444
|
variants: function
|
10377
10445
|
dispatch:
|
10378
|
-
|
10446
|
+
CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow
|
10379
10447
|
CUDA: foreach_tensor_mul_list_kernel_cuda
|
10380
10448
|
|
10381
10449
|
- func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10382
10450
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10383
10451
|
variants: function
|
10384
10452
|
dispatch:
|
10385
|
-
|
10453
|
+
CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow_
|
10386
10454
|
CUDA: foreach_tensor_mul_list_kernel_cuda_
|
10387
10455
|
autogen: _foreach_mul.List_out
|
10388
10456
|
|
@@ -10390,14 +10458,14 @@
|
|
10390
10458
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10391
10459
|
variants: function
|
10392
10460
|
dispatch:
|
10393
|
-
|
10461
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalarlist_kernel_slow
|
10394
10462
|
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda
|
10395
10463
|
|
10396
10464
|
- func: _foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10397
10465
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10398
10466
|
variants: function
|
10399
10467
|
dispatch:
|
10400
|
-
|
10468
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalarlist_kernel_slow_
|
10401
10469
|
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
|
10402
10470
|
autogen: _foreach_mul.ScalarList_out
|
10403
10471
|
|
@@ -10405,14 +10473,14 @@
|
|
10405
10473
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10406
10474
|
variants: function
|
10407
10475
|
dispatch:
|
10408
|
-
|
10476
|
+
CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow
|
10409
10477
|
CUDA: foreach_tensor_mul_tensor_kernel_cuda
|
10410
10478
|
|
10411
10479
|
- func: _foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
10412
10480
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10413
10481
|
variants: function
|
10414
10482
|
dispatch:
|
10415
|
-
|
10483
|
+
CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow_
|
10416
10484
|
CUDA: foreach_tensor_mul_tensor_kernel_cuda_
|
10417
10485
|
autogen: _foreach_mul.Tensor_out
|
10418
10486
|
|
@@ -10420,14 +10488,14 @@
|
|
10420
10488
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10421
10489
|
variants: function
|
10422
10490
|
dispatch:
|
10423
|
-
|
10491
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalar_kernel_slow
|
10424
10492
|
CUDA: foreach_tensor_div_scalar_kernel_cuda
|
10425
10493
|
|
10426
10494
|
- func: _foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10427
10495
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10428
10496
|
variants: function
|
10429
10497
|
dispatch:
|
10430
|
-
|
10498
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalar_kernel_slow_
|
10431
10499
|
CUDA: foreach_tensor_div_scalar_kernel_cuda_
|
10432
10500
|
autogen: _foreach_div.Scalar_out
|
10433
10501
|
|
@@ -10435,14 +10503,14 @@
|
|
10435
10503
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10436
10504
|
variants: function
|
10437
10505
|
dispatch:
|
10438
|
-
|
10506
|
+
CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow
|
10439
10507
|
CUDA: foreach_tensor_div_list_kernel_cuda
|
10440
10508
|
|
10441
10509
|
- func: _foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10442
10510
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10443
10511
|
variants: function
|
10444
10512
|
dispatch:
|
10445
|
-
|
10513
|
+
CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow_
|
10446
10514
|
CUDA: foreach_tensor_div_list_kernel_cuda_
|
10447
10515
|
autogen: _foreach_div.List_out
|
10448
10516
|
|
@@ -10450,14 +10518,14 @@
|
|
10450
10518
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10451
10519
|
variants: function
|
10452
10520
|
dispatch:
|
10453
|
-
|
10521
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalarlist_kernel_slow
|
10454
10522
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda
|
10455
10523
|
|
10456
10524
|
- func: _foreach_div_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10457
10525
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10458
10526
|
variants: function
|
10459
10527
|
dispatch:
|
10460
|
-
|
10528
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalarlist_kernel_slow_
|
10461
10529
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
|
10462
10530
|
autogen: _foreach_div.ScalarList_out
|
10463
10531
|
|
@@ -10465,14 +10533,14 @@
|
|
10465
10533
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10466
10534
|
variants: function
|
10467
10535
|
dispatch:
|
10468
|
-
|
10536
|
+
CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow
|
10469
10537
|
CUDA: foreach_tensor_div_tensor_kernel_cuda
|
10470
10538
|
|
10471
10539
|
- func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
10472
10540
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10473
10541
|
variants: function
|
10474
10542
|
dispatch:
|
10475
|
-
|
10543
|
+
CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow_
|
10476
10544
|
CUDA: foreach_tensor_div_tensor_kernel_cuda_
|
10477
10545
|
autogen: _foreach_div.Tensor_out
|
10478
10546
|
|
@@ -10480,14 +10548,14 @@
|
|
10480
10548
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10481
10549
|
variants: function
|
10482
10550
|
dispatch:
|
10483
|
-
|
10551
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow
|
10484
10552
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10485
10553
|
|
10486
10554
|
- func: _foreach_clamp_max_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10487
10555
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10488
10556
|
variants: function
|
10489
10557
|
dispatch:
|
10490
|
-
|
10558
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10491
10559
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10492
10560
|
autogen: _foreach_clamp_max.Scalar_out
|
10493
10561
|
|
@@ -10495,14 +10563,14 @@
|
|
10495
10563
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10496
10564
|
variants: function
|
10497
10565
|
dispatch:
|
10498
|
-
|
10566
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow
|
10499
10567
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda
|
10500
10568
|
|
10501
10569
|
- func: _foreach_clamp_max_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10502
10570
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10503
10571
|
variants: function
|
10504
10572
|
dispatch:
|
10505
|
-
|
10573
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow_
|
10506
10574
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
10507
10575
|
autogen: _foreach_clamp_max.List_out
|
10508
10576
|
|
@@ -10510,14 +10578,14 @@
|
|
10510
10578
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10511
10579
|
variants: function
|
10512
10580
|
dispatch:
|
10513
|
-
|
10581
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow
|
10514
10582
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda
|
10515
10583
|
|
10516
10584
|
- func: _foreach_clamp_max_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10517
10585
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10518
10586
|
variants: function
|
10519
10587
|
dispatch:
|
10520
|
-
|
10588
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow_
|
10521
10589
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10522
10590
|
autogen: _foreach_clamp_max.ScalarList_out
|
10523
10591
|
|
@@ -10525,14 +10593,14 @@
|
|
10525
10593
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10526
10594
|
variants: function
|
10527
10595
|
dispatch:
|
10528
|
-
|
10596
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow
|
10529
10597
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
10530
10598
|
|
10531
10599
|
- func: _foreach_clamp_min_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10532
10600
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10533
10601
|
variants: function
|
10534
10602
|
dispatch:
|
10535
|
-
|
10603
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10536
10604
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10537
10605
|
autogen: _foreach_clamp_min.Scalar_out
|
10538
10606
|
|
@@ -10540,14 +10608,14 @@
|
|
10540
10608
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10541
10609
|
variants: function
|
10542
10610
|
dispatch:
|
10543
|
-
|
10611
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow
|
10544
10612
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
10545
10613
|
|
10546
10614
|
- func: _foreach_clamp_min_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10547
10615
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10548
10616
|
variants: function
|
10549
10617
|
dispatch:
|
10550
|
-
|
10618
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow_
|
10551
10619
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10552
10620
|
autogen: _foreach_clamp_min.List_out
|
10553
10621
|
|
@@ -10555,14 +10623,14 @@
|
|
10555
10623
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10556
10624
|
variants: function
|
10557
10625
|
dispatch:
|
10558
|
-
|
10626
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10559
10627
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
10560
10628
|
|
10561
10629
|
- func: _foreach_clamp_min_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10562
10630
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10563
10631
|
variants: function
|
10564
10632
|
dispatch:
|
10565
|
-
|
10633
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10566
10634
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10567
10635
|
autogen: _foreach_clamp_min.ScalarList_out
|
10568
10636
|
|
@@ -10571,14 +10639,14 @@
|
|
10571
10639
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10572
10640
|
variants: function
|
10573
10641
|
dispatch:
|
10574
|
-
|
10642
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow
|
10575
10643
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
10576
10644
|
|
10577
10645
|
- func: _foreach_maximum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10578
10646
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10579
10647
|
variants: function
|
10580
10648
|
dispatch:
|
10581
|
-
|
10649
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10582
10650
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10583
10651
|
autogen: _foreach_maximum.Scalar_out
|
10584
10652
|
|
@@ -10587,14 +10655,14 @@
|
|
10587
10655
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10588
10656
|
variants: function
|
10589
10657
|
dispatch:
|
10590
|
-
|
10658
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow
|
10591
10659
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
10592
10660
|
|
10593
10661
|
- func: _foreach_maximum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10594
10662
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10595
10663
|
variants: function
|
10596
10664
|
dispatch:
|
10597
|
-
|
10665
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow_
|
10598
10666
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10599
10667
|
autogen: _foreach_maximum.List_out
|
10600
10668
|
|
@@ -10603,14 +10671,14 @@
|
|
10603
10671
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10604
10672
|
variants: function
|
10605
10673
|
dispatch:
|
10606
|
-
|
10674
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10607
10675
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
10608
10676
|
|
10609
10677
|
- func: _foreach_maximum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10610
10678
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10611
10679
|
variants: function
|
10612
10680
|
dispatch:
|
10613
|
-
|
10681
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10614
10682
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10615
10683
|
autogen: _foreach_maximum.ScalarList_out
|
10616
10684
|
|
@@ -10618,14 +10686,14 @@
|
|
10618
10686
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10619
10687
|
variants: function
|
10620
10688
|
dispatch:
|
10621
|
-
|
10689
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow
|
10622
10690
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10623
10691
|
|
10624
10692
|
- func: _foreach_minimum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10625
10693
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10626
10694
|
variants: function
|
10627
10695
|
dispatch:
|
10628
|
-
|
10696
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10629
10697
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10630
10698
|
autogen: _foreach_minimum.Scalar_out
|
10631
10699
|
|
@@ -10633,14 +10701,14 @@
|
|
10633
10701
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10634
10702
|
variants: function
|
10635
10703
|
dispatch:
|
10636
|
-
|
10704
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow
|
10637
10705
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda
|
10638
10706
|
|
10639
10707
|
- func: _foreach_minimum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10640
10708
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10641
10709
|
variants: function
|
10642
10710
|
dispatch:
|
10643
|
-
|
10711
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow_
|
10644
10712
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
10645
10713
|
autogen: _foreach_minimum.List_out
|
10646
10714
|
|
@@ -10648,14 +10716,14 @@
|
|
10648
10716
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10649
10717
|
variants: function
|
10650
10718
|
dispatch:
|
10651
|
-
|
10719
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow
|
10652
10720
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda
|
10653
10721
|
|
10654
10722
|
- func: _foreach_minimum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10655
10723
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10656
10724
|
variants: function
|
10657
10725
|
dispatch:
|
10658
|
-
|
10726
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow_
|
10659
10727
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10660
10728
|
autogen: _foreach_minimum.ScalarList_out
|
10661
10729
|
|
@@ -10663,28 +10731,28 @@
|
|
10663
10731
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10664
10732
|
variants: function
|
10665
10733
|
dispatch:
|
10666
|
-
|
10734
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalar_slow
|
10667
10735
|
CUDA: foreach_tensor_addcdiv_scalar_cuda
|
10668
10736
|
|
10669
10737
|
- func: _foreach_addcdiv.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10670
10738
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10671
10739
|
variants: function
|
10672
10740
|
dispatch:
|
10673
|
-
|
10741
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalarlist_slow
|
10674
10742
|
CUDA: foreach_tensor_addcdiv_scalarlist_cuda
|
10675
10743
|
|
10676
10744
|
- func: _foreach_addcdiv.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10677
10745
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10678
10746
|
variants: function
|
10679
10747
|
dispatch:
|
10680
|
-
|
10748
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_tensor_slow
|
10681
10749
|
CUDA: foreach_tensor_addcdiv_tensor_cuda
|
10682
10750
|
|
10683
10751
|
- func: _foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10684
10752
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10685
10753
|
variants: function
|
10686
10754
|
dispatch:
|
10687
|
-
|
10755
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalar_slow_
|
10688
10756
|
CUDA: foreach_tensor_addcdiv_scalar_cuda_
|
10689
10757
|
autogen: _foreach_addcdiv.Scalar_out
|
10690
10758
|
|
@@ -10692,7 +10760,7 @@
|
|
10692
10760
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10693
10761
|
variants: function
|
10694
10762
|
dispatch:
|
10695
|
-
|
10763
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalarlist_slow_
|
10696
10764
|
CUDA: foreach_tensor_addcdiv_scalarlist_cuda_
|
10697
10765
|
autogen: _foreach_addcdiv.ScalarList_out
|
10698
10766
|
|
@@ -10700,7 +10768,7 @@
|
|
10700
10768
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10701
10769
|
variants: function
|
10702
10770
|
dispatch:
|
10703
|
-
|
10771
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_tensor_slow_
|
10704
10772
|
CUDA: foreach_tensor_addcdiv_tensor_cuda_
|
10705
10773
|
autogen: _foreach_addcdiv.Tensor_out
|
10706
10774
|
|
@@ -10708,28 +10776,28 @@
|
|
10708
10776
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10709
10777
|
variants: function
|
10710
10778
|
dispatch:
|
10711
|
-
|
10779
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow
|
10712
10780
|
CUDA: foreach_tensor_addcmul_scalar_cuda
|
10713
10781
|
|
10714
10782
|
- func: _foreach_addcmul.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10715
10783
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10716
10784
|
variants: function
|
10717
10785
|
dispatch:
|
10718
|
-
|
10786
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalarlist_slow
|
10719
10787
|
CUDA: foreach_tensor_addcmul_scalarlist_cuda
|
10720
10788
|
|
10721
10789
|
- func: _foreach_addcmul.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10722
10790
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10723
10791
|
variants: function
|
10724
10792
|
dispatch:
|
10725
|
-
|
10793
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_tensor_slow
|
10726
10794
|
CUDA: foreach_tensor_addcmul_tensor_cuda
|
10727
10795
|
|
10728
10796
|
- func: _foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10729
10797
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10730
10798
|
variants: function
|
10731
10799
|
dispatch:
|
10732
|
-
|
10800
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow_
|
10733
10801
|
CUDA: foreach_tensor_addcmul_scalar_cuda_
|
10734
10802
|
autogen: _foreach_addcmul.Scalar_out
|
10735
10803
|
|
@@ -10737,7 +10805,7 @@
|
|
10737
10805
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10738
10806
|
variants: function
|
10739
10807
|
dispatch:
|
10740
|
-
|
10808
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalarlist_slow_
|
10741
10809
|
CUDA: foreach_tensor_addcmul_scalarlist_cuda_
|
10742
10810
|
autogen: _foreach_addcmul.ScalarList_out
|
10743
10811
|
|
@@ -10745,7 +10813,7 @@
|
|
10745
10813
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10746
10814
|
variants: function
|
10747
10815
|
dispatch:
|
10748
|
-
|
10816
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_tensor_slow_
|
10749
10817
|
CUDA: foreach_tensor_addcmul_tensor_cuda_
|
10750
10818
|
autogen: _foreach_addcmul.Tensor_out
|
10751
10819
|
|
@@ -10753,14 +10821,14 @@
|
|
10753
10821
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10754
10822
|
variants: function
|
10755
10823
|
dispatch:
|
10756
|
-
|
10824
|
+
CompositeExplicitAutograd: foreach_tensor_abs_slow
|
10757
10825
|
CUDA: foreach_tensor_abs_cuda
|
10758
10826
|
|
10759
10827
|
- func: _foreach_abs_(Tensor(a!)[] self) -> ()
|
10760
10828
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10761
10829
|
variants: function
|
10762
10830
|
dispatch:
|
10763
|
-
|
10831
|
+
CompositeExplicitAutograd: foreach_tensor_abs_slow_
|
10764
10832
|
CUDA: foreach_tensor_abs_cuda_
|
10765
10833
|
autogen: _foreach_abs.out
|
10766
10834
|
|
@@ -10768,14 +10836,14 @@
|
|
10768
10836
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10769
10837
|
variants: function
|
10770
10838
|
dispatch:
|
10771
|
-
|
10839
|
+
CompositeExplicitAutograd: foreach_tensor_acos_slow
|
10772
10840
|
CUDA: foreach_tensor_acos_cuda
|
10773
10841
|
|
10774
10842
|
- func: _foreach_acos_(Tensor(a!)[] self) -> ()
|
10775
10843
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10776
10844
|
variants: function
|
10777
10845
|
dispatch:
|
10778
|
-
|
10846
|
+
CompositeExplicitAutograd: foreach_tensor_acos_slow_
|
10779
10847
|
CUDA: foreach_tensor_acos_cuda_
|
10780
10848
|
autogen: _foreach_acos.out
|
10781
10849
|
|
@@ -10783,14 +10851,14 @@
|
|
10783
10851
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10784
10852
|
variants: function
|
10785
10853
|
dispatch:
|
10786
|
-
|
10854
|
+
CompositeExplicitAutograd: foreach_tensor_asin_slow
|
10787
10855
|
CUDA: foreach_tensor_asin_cuda
|
10788
10856
|
|
10789
10857
|
- func: _foreach_asin_(Tensor(a!)[] self) -> ()
|
10790
10858
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10791
10859
|
variants: function
|
10792
10860
|
dispatch:
|
10793
|
-
|
10861
|
+
CompositeExplicitAutograd: foreach_tensor_asin_slow_
|
10794
10862
|
CUDA: foreach_tensor_asin_cuda_
|
10795
10863
|
autogen: _foreach_asin.out
|
10796
10864
|
|
@@ -10798,14 +10866,14 @@
|
|
10798
10866
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10799
10867
|
variants: function
|
10800
10868
|
dispatch:
|
10801
|
-
|
10869
|
+
CompositeExplicitAutograd: foreach_tensor_atan_slow
|
10802
10870
|
CUDA: foreach_tensor_atan_cuda
|
10803
10871
|
|
10804
10872
|
- func: _foreach_atan_(Tensor(a!)[] self) -> ()
|
10805
10873
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10806
10874
|
variants: function
|
10807
10875
|
dispatch:
|
10808
|
-
|
10876
|
+
CompositeExplicitAutograd: foreach_tensor_atan_slow_
|
10809
10877
|
CUDA: foreach_tensor_atan_cuda_
|
10810
10878
|
autogen: _foreach_atan.out
|
10811
10879
|
|
@@ -10813,14 +10881,14 @@
|
|
10813
10881
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10814
10882
|
variants: function
|
10815
10883
|
dispatch:
|
10816
|
-
|
10884
|
+
CompositeExplicitAutograd: foreach_tensor_ceil_slow
|
10817
10885
|
CUDA: foreach_tensor_ceil_cuda
|
10818
10886
|
|
10819
10887
|
- func: _foreach_ceil_(Tensor(a!)[] self) -> ()
|
10820
10888
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10821
10889
|
variants: function
|
10822
10890
|
dispatch:
|
10823
|
-
|
10891
|
+
CompositeExplicitAutograd: foreach_tensor_ceil_slow_
|
10824
10892
|
CUDA: foreach_tensor_ceil_cuda_
|
10825
10893
|
autogen: _foreach_ceil.out
|
10826
10894
|
|
@@ -10828,14 +10896,14 @@
|
|
10828
10896
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10829
10897
|
variants: function
|
10830
10898
|
dispatch:
|
10831
|
-
|
10899
|
+
CompositeExplicitAutograd: foreach_tensor_cos_slow
|
10832
10900
|
CUDA: foreach_tensor_cos_cuda
|
10833
10901
|
|
10834
10902
|
- func: _foreach_cos_(Tensor(a!)[] self) -> ()
|
10835
10903
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10836
10904
|
variants: function
|
10837
10905
|
dispatch:
|
10838
|
-
|
10906
|
+
CompositeExplicitAutograd: foreach_tensor_cos_slow_
|
10839
10907
|
CUDA: foreach_tensor_cos_cuda_
|
10840
10908
|
autogen: _foreach_cos.out
|
10841
10909
|
|
@@ -10843,14 +10911,14 @@
|
|
10843
10911
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10844
10912
|
variants: function
|
10845
10913
|
dispatch:
|
10846
|
-
|
10914
|
+
CompositeExplicitAutograd: foreach_tensor_cosh_slow
|
10847
10915
|
CUDA: foreach_tensor_cosh_cuda
|
10848
10916
|
|
10849
10917
|
- func: _foreach_cosh_(Tensor(a!)[] self) -> ()
|
10850
10918
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10851
10919
|
variants: function
|
10852
10920
|
dispatch:
|
10853
|
-
|
10921
|
+
CompositeExplicitAutograd: foreach_tensor_cosh_slow_
|
10854
10922
|
CUDA: foreach_tensor_cosh_cuda_
|
10855
10923
|
autogen: _foreach_cosh.out
|
10856
10924
|
|
@@ -10858,14 +10926,14 @@
|
|
10858
10926
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10859
10927
|
variants: function
|
10860
10928
|
dispatch:
|
10861
|
-
|
10929
|
+
CompositeExplicitAutograd: foreach_tensor_erf_slow
|
10862
10930
|
CUDA: foreach_tensor_erf_cuda
|
10863
10931
|
|
10864
10932
|
- func: _foreach_erf_(Tensor(a!)[] self) -> ()
|
10865
10933
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10866
10934
|
variants: function
|
10867
10935
|
dispatch:
|
10868
|
-
|
10936
|
+
CompositeExplicitAutograd: foreach_tensor_erf_slow_
|
10869
10937
|
CUDA: foreach_tensor_erf_cuda_
|
10870
10938
|
autogen: _foreach_erf.out
|
10871
10939
|
|
@@ -10873,14 +10941,14 @@
|
|
10873
10941
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10874
10942
|
variants: function
|
10875
10943
|
dispatch:
|
10876
|
-
|
10944
|
+
CompositeExplicitAutograd: foreach_tensor_erfc_slow
|
10877
10945
|
CUDA: foreach_tensor_erfc_cuda
|
10878
10946
|
|
10879
10947
|
- func: _foreach_erfc_(Tensor(a!)[] self) -> ()
|
10880
10948
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10881
10949
|
variants: function
|
10882
10950
|
dispatch:
|
10883
|
-
|
10951
|
+
CompositeExplicitAutograd: foreach_tensor_erfc_slow_
|
10884
10952
|
CUDA: foreach_tensor_erfc_cuda_
|
10885
10953
|
autogen: _foreach_erfc.out
|
10886
10954
|
|
@@ -10888,14 +10956,14 @@
|
|
10888
10956
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10889
10957
|
variants: function
|
10890
10958
|
dispatch:
|
10891
|
-
|
10959
|
+
CompositeExplicitAutograd: foreach_tensor_exp_slow
|
10892
10960
|
CUDA: foreach_tensor_exp_cuda
|
10893
10961
|
|
10894
10962
|
- func: _foreach_exp_(Tensor(a!)[] self) -> ()
|
10895
10963
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10896
10964
|
variants: function
|
10897
10965
|
dispatch:
|
10898
|
-
|
10966
|
+
CompositeExplicitAutograd: foreach_tensor_exp_slow_
|
10899
10967
|
CUDA: foreach_tensor_exp_cuda_
|
10900
10968
|
autogen: _foreach_exp.out
|
10901
10969
|
|
@@ -10903,14 +10971,14 @@
|
|
10903
10971
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10904
10972
|
variants: function
|
10905
10973
|
dispatch:
|
10906
|
-
|
10974
|
+
CompositeExplicitAutograd: foreach_tensor_expm1_slow
|
10907
10975
|
CUDA: foreach_tensor_expm1_cuda
|
10908
10976
|
|
10909
10977
|
- func: _foreach_expm1_(Tensor(a!)[] self) -> ()
|
10910
10978
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10911
10979
|
variants: function
|
10912
10980
|
dispatch:
|
10913
|
-
|
10981
|
+
CompositeExplicitAutograd: foreach_tensor_expm1_slow_
|
10914
10982
|
CUDA: foreach_tensor_expm1_cuda_
|
10915
10983
|
autogen: _foreach_expm1.out
|
10916
10984
|
|
@@ -10918,14 +10986,14 @@
|
|
10918
10986
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10919
10987
|
variants: function
|
10920
10988
|
dispatch:
|
10921
|
-
|
10989
|
+
CompositeExplicitAutograd: foreach_tensor_floor_slow
|
10922
10990
|
CUDA: foreach_tensor_floor_cuda
|
10923
10991
|
|
10924
10992
|
- func: _foreach_floor_(Tensor(a!)[] self) -> ()
|
10925
10993
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10926
10994
|
variants: function
|
10927
10995
|
dispatch:
|
10928
|
-
|
10996
|
+
CompositeExplicitAutograd: foreach_tensor_floor_slow_
|
10929
10997
|
CUDA: foreach_tensor_floor_cuda_
|
10930
10998
|
autogen: _foreach_floor.out
|
10931
10999
|
|
@@ -10933,14 +11001,14 @@
|
|
10933
11001
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10934
11002
|
variants: function
|
10935
11003
|
dispatch:
|
10936
|
-
|
11004
|
+
CompositeExplicitAutograd: foreach_tensor_frac_slow
|
10937
11005
|
CUDA: foreach_tensor_frac_cuda
|
10938
11006
|
|
10939
11007
|
- func: _foreach_frac_(Tensor(a!)[] self) -> ()
|
10940
11008
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10941
11009
|
variants: function
|
10942
11010
|
dispatch:
|
10943
|
-
|
11011
|
+
CompositeExplicitAutograd: foreach_tensor_frac_slow_
|
10944
11012
|
CUDA: foreach_tensor_frac_cuda_
|
10945
11013
|
autogen: _foreach_frac.out
|
10946
11014
|
|
@@ -10948,7 +11016,7 @@
|
|
10948
11016
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10949
11017
|
variants: function
|
10950
11018
|
dispatch:
|
10951
|
-
|
11019
|
+
CompositeExplicitAutograd: foreach_tensor_ternary_lerp_slow
|
10952
11020
|
CUDA: foreach_tensor_lerp_ternary_cuda
|
10953
11021
|
autogen: _foreach_lerp.List_out
|
10954
11022
|
|
@@ -10956,7 +11024,7 @@
|
|
10956
11024
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10957
11025
|
variants: function
|
10958
11026
|
dispatch:
|
10959
|
-
|
11027
|
+
CompositeExplicitAutograd: foreach_tensor_ternary_lerp_slow_
|
10960
11028
|
CUDA: foreach_tensor_lerp_ternary_cuda_
|
10961
11029
|
autogen: _foreach_lerp.List_out
|
10962
11030
|
|
@@ -10964,7 +11032,7 @@
|
|
10964
11032
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10965
11033
|
variants: function
|
10966
11034
|
dispatch:
|
10967
|
-
|
11035
|
+
CompositeExplicitAutograd: foreach_tensor_lerp_list_kernel_slow
|
10968
11036
|
CUDA: foreach_tensor_lerp_list_cuda
|
10969
11037
|
autogen: _foreach_lerp.Scalar_out
|
10970
11038
|
|
@@ -10972,7 +11040,7 @@
|
|
10972
11040
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10973
11041
|
variants: function
|
10974
11042
|
dispatch:
|
10975
|
-
|
11043
|
+
CompositeExplicitAutograd: foreach_tensor_lerp_list_kernel_slow_
|
10976
11044
|
CUDA: foreach_tensor_lerp_list_cuda_
|
10977
11045
|
autogen: _foreach_lerp.Scalar_out
|
10978
11046
|
|
@@ -10980,14 +11048,14 @@
|
|
10980
11048
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10981
11049
|
variants: function
|
10982
11050
|
dispatch:
|
10983
|
-
|
11051
|
+
CompositeExplicitAutograd: foreach_tensor_lgamma_slow
|
10984
11052
|
CUDA: foreach_tensor_lgamma_cuda
|
10985
11053
|
|
10986
11054
|
- func: _foreach_lgamma_(Tensor(a!)[] self) -> ()
|
10987
11055
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10988
11056
|
variants: function
|
10989
11057
|
dispatch:
|
10990
|
-
|
11058
|
+
CompositeExplicitAutograd: foreach_tensor_lgamma_slow_
|
10991
11059
|
CUDA: foreach_tensor_lgamma_cuda_
|
10992
11060
|
autogen: _foreach_lgamma.out
|
10993
11061
|
|
@@ -10995,14 +11063,14 @@
|
|
10995
11063
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10996
11064
|
variants: function
|
10997
11065
|
dispatch:
|
10998
|
-
|
11066
|
+
CompositeExplicitAutograd: foreach_tensor_log_slow
|
10999
11067
|
CUDA: foreach_tensor_log_cuda
|
11000
11068
|
|
11001
11069
|
- func: _foreach_log_(Tensor(a!)[] self) -> ()
|
11002
11070
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11003
11071
|
variants: function
|
11004
11072
|
dispatch:
|
11005
|
-
|
11073
|
+
CompositeExplicitAutograd: foreach_tensor_log_slow_
|
11006
11074
|
CUDA: foreach_tensor_log_cuda_
|
11007
11075
|
autogen: _foreach_log.out
|
11008
11076
|
|
@@ -11010,14 +11078,14 @@
|
|
11010
11078
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11011
11079
|
variants: function
|
11012
11080
|
dispatch:
|
11013
|
-
|
11081
|
+
CompositeExplicitAutograd: foreach_tensor_log10_slow
|
11014
11082
|
CUDA: foreach_tensor_log10_cuda
|
11015
11083
|
|
11016
11084
|
- func: _foreach_log10_(Tensor(a!)[] self) -> ()
|
11017
11085
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11018
11086
|
variants: function
|
11019
11087
|
dispatch:
|
11020
|
-
|
11088
|
+
CompositeExplicitAutograd: foreach_tensor_log10_slow_
|
11021
11089
|
CUDA: foreach_tensor_log10_cuda_
|
11022
11090
|
autogen: _foreach_log10.out
|
11023
11091
|
|
@@ -11025,14 +11093,14 @@
|
|
11025
11093
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11026
11094
|
variants: function
|
11027
11095
|
dispatch:
|
11028
|
-
|
11096
|
+
CompositeExplicitAutograd: foreach_tensor_log1p_slow
|
11029
11097
|
CUDA: foreach_tensor_log1p_cuda
|
11030
11098
|
|
11031
11099
|
- func: _foreach_log1p_(Tensor(a!)[] self) -> ()
|
11032
11100
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11033
11101
|
variants: function
|
11034
11102
|
dispatch:
|
11035
|
-
|
11103
|
+
CompositeExplicitAutograd: foreach_tensor_log1p_slow_
|
11036
11104
|
CUDA: foreach_tensor_log1p_cuda_
|
11037
11105
|
autogen: _foreach_log1p.out
|
11038
11106
|
|
@@ -11040,37 +11108,45 @@
|
|
11040
11108
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11041
11109
|
variants: function
|
11042
11110
|
dispatch:
|
11043
|
-
|
11111
|
+
CompositeExplicitAutograd: foreach_tensor_log2_slow
|
11044
11112
|
CUDA: foreach_tensor_log2_cuda
|
11045
11113
|
|
11046
11114
|
- func: _foreach_log2_(Tensor(a!)[] self) -> ()
|
11047
11115
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11048
11116
|
variants: function
|
11049
11117
|
dispatch:
|
11050
|
-
|
11118
|
+
CompositeExplicitAutograd: foreach_tensor_log2_slow_
|
11051
11119
|
CUDA: foreach_tensor_log2_cuda_
|
11052
11120
|
autogen: _foreach_log2.out
|
11053
11121
|
|
11122
|
+
- func: _foreach_max(Tensor[] self) -> Tensor[]
|
11123
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11124
|
+
variants: function
|
11125
|
+
dispatch:
|
11126
|
+
CompositeExplicitAutograd: foreach_tensor_max_slow
|
11127
|
+
CUDA: foreach_tensor_max_cuda
|
11128
|
+
autogen: _foreach_max.out
|
11129
|
+
|
11054
11130
|
- func: _foreach_neg(Tensor[] self) -> Tensor[]
|
11055
11131
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11056
11132
|
variants: function
|
11057
11133
|
dispatch:
|
11058
|
-
|
11134
|
+
CompositeExplicitAutograd: foreach_tensor_neg_slow
|
11059
11135
|
CUDA: foreach_tensor_neg_cuda
|
11060
11136
|
|
11061
11137
|
- func: _foreach_neg_(Tensor(a!)[] self) -> ()
|
11062
11138
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11063
11139
|
variants: function
|
11064
11140
|
dispatch:
|
11065
|
-
|
11141
|
+
CompositeExplicitAutograd: foreach_tensor_neg_slow_
|
11066
11142
|
CUDA: foreach_tensor_neg_cuda_
|
11067
11143
|
autogen: _foreach_neg.out
|
11068
11144
|
|
11069
|
-
- func: _foreach_norm.Scalar(Tensor[] self, Scalar ord=2) -> Tensor[]
|
11145
|
+
- func: _foreach_norm.Scalar(Tensor[] self, Scalar ord=2, ScalarType? dtype=None) -> Tensor[]
|
11070
11146
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11071
11147
|
variants: function
|
11072
11148
|
dispatch:
|
11073
|
-
|
11149
|
+
CompositeExplicitAutograd: foreach_tensor_norm_slow
|
11074
11150
|
CUDA: foreach_tensor_norm_cuda
|
11075
11151
|
autogen: _foreach_norm.Scalar_out
|
11076
11152
|
|
@@ -11078,35 +11154,35 @@
|
|
11078
11154
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11079
11155
|
variants: function
|
11080
11156
|
dispatch:
|
11081
|
-
|
11157
|
+
CompositeExplicitAutograd: foreach_tensor_pow_list_kernel_slow
|
11082
11158
|
CUDA: foreach_tensor_pow_list_kernel_cuda
|
11083
11159
|
|
11084
11160
|
- func: _foreach_pow.Scalar(Tensor[] self, Scalar exponent) -> Tensor[]
|
11085
11161
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11086
11162
|
variants: function
|
11087
11163
|
dispatch:
|
11088
|
-
|
11164
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalar_kernel_slow
|
11089
11165
|
CUDA: foreach_tensor_pow_scalar_kernel_cuda
|
11090
11166
|
|
11091
11167
|
- func: _foreach_pow.ScalarList(Tensor[] self, Scalar[] exponent) -> Tensor[]
|
11092
11168
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11093
11169
|
variants: function
|
11094
11170
|
dispatch:
|
11095
|
-
|
11171
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalarlist_kernel_slow
|
11096
11172
|
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda
|
11097
11173
|
|
11098
11174
|
- func: _foreach_pow.ScalarAndTensor(Scalar self, Tensor[] exponent) -> Tensor[]
|
11099
11175
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11100
11176
|
variants: function
|
11101
11177
|
dispatch:
|
11102
|
-
|
11178
|
+
CompositeExplicitAutograd: foreach_scalar_pow_list_kernel_slow
|
11103
11179
|
CUDA: foreach_scalar_pow_list_kernel_cuda
|
11104
11180
|
|
11105
11181
|
- func: _foreach_pow_.List(Tensor(a!)[] self, Tensor[] exponent) -> ()
|
11106
11182
|
device_check: NoCheck
|
11107
11183
|
variants: function
|
11108
11184
|
dispatch:
|
11109
|
-
|
11185
|
+
CompositeExplicitAutograd: foreach_tensor_pow_list_kernel_slow_
|
11110
11186
|
CUDA: foreach_tensor_pow_list_kernel_cuda_
|
11111
11187
|
autogen: _foreach_pow.List_out
|
11112
11188
|
|
@@ -11114,7 +11190,7 @@
|
|
11114
11190
|
device_check: NoCheck
|
11115
11191
|
variants: function
|
11116
11192
|
dispatch:
|
11117
|
-
|
11193
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalar_kernel_slow_
|
11118
11194
|
CUDA: foreach_tensor_pow_scalar_kernel_cuda_
|
11119
11195
|
autogen: _foreach_pow.Scalar_out
|
11120
11196
|
|
@@ -11122,7 +11198,7 @@
|
|
11122
11198
|
device_check: NoCheck
|
11123
11199
|
variants: function
|
11124
11200
|
dispatch:
|
11125
|
-
|
11201
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalarlist_kernel_slow_
|
11126
11202
|
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda_
|
11127
11203
|
autogen: _foreach_pow.ScalarList_out
|
11128
11204
|
|
@@ -11130,14 +11206,14 @@
|
|
11130
11206
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11131
11207
|
variants: function
|
11132
11208
|
dispatch:
|
11133
|
-
|
11209
|
+
CompositeExplicitAutograd: foreach_tensor_reciprocal_slow
|
11134
11210
|
CUDA: foreach_tensor_reciprocal_cuda
|
11135
11211
|
|
11136
11212
|
- func: _foreach_reciprocal_(Tensor(a!)[] self) -> ()
|
11137
11213
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11138
11214
|
variants: function
|
11139
11215
|
dispatch:
|
11140
|
-
|
11216
|
+
CompositeExplicitAutograd: foreach_tensor_reciprocal_slow_
|
11141
11217
|
CUDA: foreach_tensor_reciprocal_cuda_
|
11142
11218
|
autogen: _foreach_reciprocal.out
|
11143
11219
|
|
@@ -11145,14 +11221,14 @@
|
|
11145
11221
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11146
11222
|
variants: function
|
11147
11223
|
dispatch:
|
11148
|
-
|
11224
|
+
CompositeExplicitAutograd: foreach_tensor_round_slow
|
11149
11225
|
CUDA: foreach_tensor_round_cuda
|
11150
11226
|
|
11151
11227
|
- func: _foreach_round_(Tensor(a!)[] self) -> ()
|
11152
11228
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11153
11229
|
variants: function
|
11154
11230
|
dispatch:
|
11155
|
-
|
11231
|
+
CompositeExplicitAutograd: foreach_tensor_round_slow_
|
11156
11232
|
CUDA: foreach_tensor_round_cuda_
|
11157
11233
|
autogen: _foreach_round.out
|
11158
11234
|
|
@@ -11160,14 +11236,14 @@
|
|
11160
11236
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11161
11237
|
variants: function
|
11162
11238
|
dispatch:
|
11163
|
-
|
11239
|
+
CompositeExplicitAutograd: foreach_tensor_sigmoid_slow
|
11164
11240
|
CUDA: foreach_tensor_sigmoid_cuda
|
11165
11241
|
|
11166
11242
|
- func: _foreach_sigmoid_(Tensor(a!)[] self) -> ()
|
11167
11243
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11168
11244
|
variants: function
|
11169
11245
|
dispatch:
|
11170
|
-
|
11246
|
+
CompositeExplicitAutograd: foreach_tensor_sigmoid_slow_
|
11171
11247
|
CUDA: foreach_tensor_sigmoid_cuda_
|
11172
11248
|
autogen: _foreach_sigmoid.out
|
11173
11249
|
|
@@ -11175,14 +11251,14 @@
|
|
11175
11251
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11176
11252
|
variants: function
|
11177
11253
|
dispatch:
|
11178
|
-
|
11254
|
+
CompositeExplicitAutograd: foreach_tensor_sign_slow
|
11179
11255
|
CUDA: foreach_tensor_sign_cuda
|
11180
11256
|
|
11181
11257
|
- func: _foreach_sign_(Tensor(a!)[] self) -> ()
|
11182
11258
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11183
11259
|
variants: function
|
11184
11260
|
dispatch:
|
11185
|
-
|
11261
|
+
CompositeExplicitAutograd: foreach_tensor_sign_slow_
|
11186
11262
|
CUDA: foreach_tensor_sign_cuda_
|
11187
11263
|
autogen: _foreach_sign.out
|
11188
11264
|
|
@@ -11190,14 +11266,14 @@
|
|
11190
11266
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11191
11267
|
variants: function
|
11192
11268
|
dispatch:
|
11193
|
-
|
11269
|
+
CompositeExplicitAutograd: foreach_tensor_sin_slow
|
11194
11270
|
CUDA: foreach_tensor_sin_cuda
|
11195
11271
|
|
11196
11272
|
- func: _foreach_sin_(Tensor(a!)[] self) -> ()
|
11197
11273
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11198
11274
|
variants: function
|
11199
11275
|
dispatch:
|
11200
|
-
|
11276
|
+
CompositeExplicitAutograd: foreach_tensor_sin_slow_
|
11201
11277
|
CUDA: foreach_tensor_sin_cuda_
|
11202
11278
|
autogen: _foreach_sin.out
|
11203
11279
|
|
@@ -11205,14 +11281,14 @@
|
|
11205
11281
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11206
11282
|
variants: function
|
11207
11283
|
dispatch:
|
11208
|
-
|
11284
|
+
CompositeExplicitAutograd: foreach_tensor_sinh_slow
|
11209
11285
|
CUDA: foreach_tensor_sinh_cuda
|
11210
11286
|
|
11211
11287
|
- func: _foreach_sinh_(Tensor(a!)[] self) -> ()
|
11212
11288
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11213
11289
|
variants: function
|
11214
11290
|
dispatch:
|
11215
|
-
|
11291
|
+
CompositeExplicitAutograd: foreach_tensor_sinh_slow_
|
11216
11292
|
CUDA: foreach_tensor_sinh_cuda_
|
11217
11293
|
autogen: _foreach_sinh.out
|
11218
11294
|
|
@@ -11220,14 +11296,14 @@
|
|
11220
11296
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11221
11297
|
variants: function
|
11222
11298
|
dispatch:
|
11223
|
-
|
11299
|
+
CompositeExplicitAutograd: foreach_tensor_sqrt_slow
|
11224
11300
|
CUDA: foreach_tensor_sqrt_cuda
|
11225
11301
|
|
11226
11302
|
- func: _foreach_sqrt_(Tensor(a!)[] self) -> ()
|
11227
11303
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11228
11304
|
variants: function
|
11229
11305
|
dispatch:
|
11230
|
-
|
11306
|
+
CompositeExplicitAutograd: foreach_tensor_sqrt_slow_
|
11231
11307
|
CUDA: foreach_tensor_sqrt_cuda_
|
11232
11308
|
autogen: _foreach_sqrt.out
|
11233
11309
|
|
@@ -11235,14 +11311,14 @@
|
|
11235
11311
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11236
11312
|
variants: function
|
11237
11313
|
dispatch:
|
11238
|
-
|
11314
|
+
CompositeExplicitAutograd: foreach_tensor_tan_slow
|
11239
11315
|
CUDA: foreach_tensor_tan_cuda
|
11240
11316
|
|
11241
11317
|
- func: _foreach_tan_(Tensor(a!)[] self) -> ()
|
11242
11318
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11243
11319
|
variants: function
|
11244
11320
|
dispatch:
|
11245
|
-
|
11321
|
+
CompositeExplicitAutograd: foreach_tensor_tan_slow_
|
11246
11322
|
CUDA: foreach_tensor_tan_cuda_
|
11247
11323
|
autogen: _foreach_tan.out
|
11248
11324
|
|
@@ -11250,14 +11326,14 @@
|
|
11250
11326
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11251
11327
|
variants: function
|
11252
11328
|
dispatch:
|
11253
|
-
|
11329
|
+
CompositeExplicitAutograd: foreach_tensor_tanh_slow
|
11254
11330
|
CUDA: foreach_tensor_tanh_cuda
|
11255
11331
|
|
11256
11332
|
- func: _foreach_tanh_(Tensor(a!)[] self) -> ()
|
11257
11333
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11258
11334
|
variants: function
|
11259
11335
|
dispatch:
|
11260
|
-
|
11336
|
+
CompositeExplicitAutograd: foreach_tensor_tanh_slow_
|
11261
11337
|
CUDA: foreach_tensor_tanh_cuda_
|
11262
11338
|
autogen: _foreach_tanh.out
|
11263
11339
|
|
@@ -11265,14 +11341,14 @@
|
|
11265
11341
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11266
11342
|
variants: function
|
11267
11343
|
dispatch:
|
11268
|
-
|
11344
|
+
CompositeExplicitAutograd: foreach_tensor_trunc_slow
|
11269
11345
|
CUDA: foreach_tensor_trunc_cuda
|
11270
11346
|
|
11271
11347
|
- func: _foreach_trunc_(Tensor(a!)[] self) -> ()
|
11272
11348
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11273
11349
|
variants: function
|
11274
11350
|
dispatch:
|
11275
|
-
|
11351
|
+
CompositeExplicitAutograd: foreach_tensor_trunc_slow_
|
11276
11352
|
CUDA: foreach_tensor_trunc_cuda_
|
11277
11353
|
autogen: _foreach_trunc.out
|
11278
11354
|
|
@@ -11280,7 +11356,7 @@
|
|
11280
11356
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11281
11357
|
variants: function
|
11282
11358
|
dispatch:
|
11283
|
-
|
11359
|
+
CompositeExplicitAutograd: foreach_tensor_zero_slow_
|
11284
11360
|
CUDA: foreach_tensor_zero_cuda_
|
11285
11361
|
autogen: _foreach_zero, _foreach_zero.out
|
11286
11362
|
|
@@ -11288,9 +11364,15 @@
|
|
11288
11364
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11289
11365
|
variants: function
|
11290
11366
|
dispatch:
|
11291
|
-
|
11367
|
+
CompositeExplicitAutograd: foreach_tensor_copy_list_kernel_slow_
|
11292
11368
|
CUDA: foreach_tensor_copy_list_kernel_cuda_
|
11293
|
-
autogen: _foreach_copy
|
11369
|
+
autogen: _foreach_copy.out
|
11370
|
+
|
11371
|
+
- func: _foreach_copy(Tensor[] self, Tensor[] src, bool non_blocking=False) -> Tensor[] self_out
|
11372
|
+
device_check: NoCheck
|
11373
|
+
variants: function
|
11374
|
+
dispatch:
|
11375
|
+
CompositeExplicitAutograd: _foreach_copy
|
11294
11376
|
|
11295
11377
|
- func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
|
11296
11378
|
dispatch:
|
@@ -14562,6 +14644,16 @@
|
|
14562
14644
|
NestedTensorCUDA: NestedTensor_to_padded_tensor_cuda
|
14563
14645
|
autogen: to_padded_tensor.out
|
14564
14646
|
|
14647
|
+
- func: _jagged_to_padded_dense_forward(Tensor values, Tensor[] offsets, SymInt[] max_lengths, float padding_value=0.0) -> Tensor
|
14648
|
+
variants: function
|
14649
|
+
dispatch:
|
14650
|
+
CUDA: _fbgemm_jagged_to_padded_dense_forward
|
14651
|
+
|
14652
|
+
- func: _padded_dense_to_jagged_forward(Tensor dense, Tensor[] offsets, SymInt? total_L=None) -> Tensor
|
14653
|
+
variants: function
|
14654
|
+
dispatch:
|
14655
|
+
CUDA: _fbgemm_dense_to_jagged_forward_symint
|
14656
|
+
|
14565
14657
|
- func: _nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor
|
14566
14658
|
dispatch:
|
14567
14659
|
NestedTensorCPU: NestedTensor_softmax_dropout
|
@@ -14636,31 +14728,36 @@
|
|
14636
14728
|
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
|
14637
14729
|
tags: nondeterministic_seeded
|
14638
14730
|
|
14639
|
-
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
|
14731
|
+
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14640
14732
|
dispatch:
|
14641
14733
|
CUDA: _scaled_dot_product_cudnn_attention_cuda
|
14642
14734
|
tags: nondeterministic_seeded
|
14643
14735
|
|
14644
|
-
- func:
|
14736
|
+
- func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14737
|
+
dispatch:
|
14738
|
+
CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
|
14739
|
+
tags: nondeterministic_seeded
|
14740
|
+
|
14741
|
+
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14645
14742
|
variants: function
|
14646
14743
|
dispatch:
|
14647
14744
|
CUDA: _flash_attention_forward
|
14648
14745
|
tags: nondeterministic_seeded
|
14649
14746
|
|
14650
|
-
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14747
|
+
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)
|
14651
14748
|
device_check: NoCheck
|
14652
14749
|
variants: function
|
14653
14750
|
dispatch:
|
14654
14751
|
CUDA: _flash_attention_backward
|
14655
14752
|
|
14656
14753
|
# Returns output, logsumexp if compute_logsumexp
|
14657
|
-
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k,
|
14754
|
+
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt? max_seqlen_q, SymInt? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? seqlen_k=None, int? window_size=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
|
14658
14755
|
variants: function
|
14659
14756
|
dispatch:
|
14660
14757
|
CUDA: _efficient_attention_forward
|
14661
14758
|
tags: nondeterministic_seeded
|
14662
14759
|
|
14663
|
-
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
|
14760
|
+
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None, int? window_size=None, bool shared_storage_dqdkdv=False) -> (Tensor, Tensor, Tensor, Tensor)
|
14664
14761
|
device_check: NoCheck
|
14665
14762
|
variants: function
|
14666
14763
|
dispatch:
|
@@ -15460,11 +15557,11 @@
|
|
15460
15557
|
CPU: foobar
|
15461
15558
|
autogen: _foobar.out
|
15462
15559
|
|
15463
|
-
# Fused Optimizer CUDA kernels.
|
15464
15560
|
- func: _fused_adam_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15465
15561
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15466
15562
|
variants: function
|
15467
15563
|
dispatch:
|
15564
|
+
CPU: _fused_adam_kernel_cpu_
|
15468
15565
|
CUDA: _fused_adam_kernel_cuda_
|
15469
15566
|
autogen: _fused_adam, _fused_adam.out
|
15470
15567
|
|
@@ -15474,6 +15571,7 @@
|
|
15474
15571
|
device_check: NoCheck
|
15475
15572
|
variants: function
|
15476
15573
|
dispatch:
|
15574
|
+
CPU: _fused_adam_kernel_cpu_
|
15477
15575
|
CUDA: _fused_adam_kernel_cuda_
|
15478
15576
|
autogen: _fused_adam.tensor_lr, _fused_adam.tensor_lr_out
|
15479
15577
|
|
@@ -15481,6 +15579,7 @@
|
|
15481
15579
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15482
15580
|
variants: function
|
15483
15581
|
dispatch:
|
15582
|
+
CPU: _fused_adamw_kernel_cpu_
|
15484
15583
|
CUDA: _fused_adamw_kernel_cuda_
|
15485
15584
|
autogen: _fused_adamw, _fused_adamw.out
|
15486
15585
|
|
@@ -15490,6 +15589,7 @@
|
|
15490
15589
|
device_check: NoCheck
|
15491
15590
|
variants: function
|
15492
15591
|
dispatch:
|
15592
|
+
CPU: _fused_adamw_kernel_cpu_
|
15493
15593
|
CUDA: _fused_adamw_kernel_cuda_
|
15494
15594
|
autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
|
15495
15595
|
|
@@ -15497,6 +15597,7 @@
|
|
15497
15597
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15498
15598
|
variants: function
|
15499
15599
|
dispatch:
|
15600
|
+
CPU: _fused_sgd_kernel_cpu_
|
15500
15601
|
CUDA: _fused_sgd_kernel_cuda_
|
15501
15602
|
autogen: _fused_sgd, _fused_sgd.out
|
15502
15603
|
|
@@ -15506,9 +15607,16 @@
|
|
15506
15607
|
device_check: NoCheck
|
15507
15608
|
variants: function
|
15508
15609
|
dispatch:
|
15610
|
+
CPU: _fused_sgd_kernel_cpu_
|
15509
15611
|
CUDA: _fused_sgd_kernel_cuda_
|
15510
15612
|
autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
|
15511
15613
|
|
15614
|
+
- func: _fused_adagrad_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor(d!)[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15615
|
+
variants: function
|
15616
|
+
dispatch:
|
15617
|
+
CPU: _fused_adagrad_kernel_cpu_
|
15618
|
+
autogen: _fused_adagrad, _fused_adagrad.out
|
15619
|
+
|
15512
15620
|
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15513
15621
|
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15514
15622
|
variants: function
|