torch-rb 0.16.0 → 0.17.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +2 -1
- data/codegen/generate_functions.rb +6 -6
- data/codegen/native_functions.yaml +269 -161
- data/ext/torch/fft_functions.h +6 -0
- data/ext/torch/linalg_functions.h +6 -0
- data/ext/torch/nn_functions.h +6 -0
- data/ext/torch/sparse_functions.h +6 -0
- data/ext/torch/special_functions.h +6 -0
- data/ext/torch/tensor_functions.h +6 -0
- data/ext/torch/torch_functions.h +6 -0
- data/ext/torch/utils.h +1 -1
- data/lib/torch/nn/functional.rb +11 -1
- data/lib/torch/nn/functional_attention.rb +5 -5
- data/lib/torch/nn/module.rb +24 -4
- data/lib/torch/tensor.rb +10 -4
- data/lib/torch/version.rb +1 -1
- metadata +11 -4
@@ -549,8 +549,8 @@
|
|
549
549
|
structured_delegate: add.out
|
550
550
|
variants: function, method
|
551
551
|
dispatch:
|
552
|
-
SparseCPU, SparseCUDA: add_sparse
|
553
|
-
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr
|
552
|
+
SparseCPU, SparseCUDA, SparseMeta: add_sparse
|
553
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr
|
554
554
|
MkldnnCPU: mkldnn_add
|
555
555
|
ZeroTensor: add_zerotensor
|
556
556
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
@@ -561,8 +561,8 @@
|
|
561
561
|
variants: method
|
562
562
|
structured_delegate: add.out
|
563
563
|
dispatch:
|
564
|
-
SparseCPU, SparseCUDA: add_sparse_
|
565
|
-
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr_
|
564
|
+
SparseCPU, SparseCUDA, SparseMeta: add_sparse_
|
565
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr_
|
566
566
|
MkldnnCPU: mkldnn_add_
|
567
567
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
568
568
|
tags: pointwise
|
@@ -575,9 +575,9 @@
|
|
575
575
|
Generic: add (AllAndComplex, BFloat16, Half, ComplexHalf)
|
576
576
|
ScalarOnly: add (Bool)
|
577
577
|
dispatch:
|
578
|
-
SparseCPU: add_out_sparse_cpu
|
578
|
+
SparseCPU, SparseMeta: add_out_sparse_cpu
|
579
579
|
SparseCUDA: add_out_sparse_cuda
|
580
|
-
SparseCsrCPU: add_out_sparse_compressed_cpu
|
580
|
+
SparseCsrCPU, SparseCsrMeta: add_out_sparse_compressed_cpu
|
581
581
|
SparseCsrCUDA: add_out_sparse_compressed_cuda
|
582
582
|
MkldnnCPU: mkldnn_add_out
|
583
583
|
MPS: add_out_mps
|
@@ -1750,6 +1750,7 @@
|
|
1750
1750
|
- func: copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor
|
1751
1751
|
variants: function
|
1752
1752
|
dispatch:
|
1753
|
+
Meta: copy_meta
|
1753
1754
|
CompositeExplicitAutogradNonFunctional: copy
|
1754
1755
|
tags: core
|
1755
1756
|
|
@@ -3127,6 +3128,7 @@
|
|
3127
3128
|
structured: True
|
3128
3129
|
dispatch:
|
3129
3130
|
CPU, CUDA: isin_Tensor_Tensor_out
|
3131
|
+
MPS: isin_Tensor_Tensor_out_mps
|
3130
3132
|
|
3131
3133
|
- func: isin.Tensor_Tensor(Tensor elements, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor
|
3132
3134
|
variants: function
|
@@ -3268,6 +3270,8 @@
|
|
3268
3270
|
autogen: native_layer_norm_backward.out
|
3269
3271
|
tags: core
|
3270
3272
|
|
3273
|
+
- func: rms_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, float? eps=None) -> Tensor
|
3274
|
+
|
3271
3275
|
- func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
|
3272
3276
|
variants: function, method
|
3273
3277
|
dispatch:
|
@@ -3340,10 +3344,31 @@
|
|
3340
3344
|
dispatch:
|
3341
3345
|
CUDA: _cslt_sparse_mm_search
|
3342
3346
|
|
3347
|
+
- func: _sparse_semi_structured_tile(Tensor input, str algorithm="", bool use_cutlass=True) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
|
3348
|
+
dispatch:
|
3349
|
+
CUDA: _sparse_semi_structured_tile
|
3350
|
+
|
3351
|
+
- func: _sparse_semi_structured_apply(Tensor input, Tensor thread_masks) -> (Tensor, Tensor)
|
3352
|
+
dispatch:
|
3353
|
+
CUDA: _sparse_semi_structured_apply
|
3354
|
+
|
3355
|
+
- func: _sparse_semi_structured_apply_dense(Tensor input, Tensor thread_masks) -> Tensor
|
3356
|
+
dispatch:
|
3357
|
+
CUDA: _sparse_semi_structured_apply_dense
|
3358
|
+
|
3359
|
+
# DEPRECATED: Use torch.__sparse_semi_structured_mm/torch._sparse_semi_structured_addmm instead
|
3343
3360
|
- func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
|
3344
3361
|
dispatch:
|
3345
3362
|
CUDA: _sparse_semi_structured_linear
|
3346
3363
|
|
3364
|
+
- func: _sparse_semi_structured_mm(Tensor mat1, Tensor mat1_meta, Tensor mat2, *, ScalarType? out_dtype=None) -> Tensor
|
3365
|
+
dispatch:
|
3366
|
+
CUDA: _sparse_semi_structured_mm
|
3367
|
+
|
3368
|
+
- func: _sparse_semi_structured_addmm(Tensor input, Tensor mat1, Tensor mat1_meta, Tensor mat2, *, Scalar alpha=1, Scalar beta=1, ScalarType? out_dtype=None) -> Tensor
|
3369
|
+
dispatch:
|
3370
|
+
CUDA: _sparse_semi_structured_addmm
|
3371
|
+
|
3347
3372
|
- func: _mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor
|
3348
3373
|
dispatch:
|
3349
3374
|
CUDA: _mixed_dtypes_linear
|
@@ -4084,10 +4109,12 @@
|
|
4084
4109
|
|
4085
4110
|
- func: _int_mm(Tensor self, Tensor mat2) -> Tensor
|
4086
4111
|
dispatch:
|
4112
|
+
CPU: _int_mm_cpu
|
4087
4113
|
CUDA: _int_mm_cuda
|
4088
4114
|
|
4089
4115
|
- func: _int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
|
4090
4116
|
dispatch:
|
4117
|
+
CPU: _int_mm_out_cpu
|
4091
4118
|
CUDA: _int_mm_out_cuda
|
4092
4119
|
|
4093
4120
|
- func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
|
@@ -4098,11 +4125,13 @@
|
|
4098
4125
|
- func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
|
4099
4126
|
dispatch:
|
4100
4127
|
CPU: _weight_int4pack_mm_cpu
|
4128
|
+
MPS: _weight_int4pack_mm_mps
|
4101
4129
|
CUDA: _weight_int4pack_mm_cuda
|
4102
4130
|
|
4103
4131
|
- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
|
4104
4132
|
dispatch:
|
4105
4133
|
CPU: _weight_int8pack_mm_cpu
|
4134
|
+
MPS: _weight_int8pack_mm_mps
|
4106
4135
|
|
4107
4136
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
4108
4137
|
python_module: sparse
|
@@ -5397,7 +5426,7 @@
|
|
5397
5426
|
autogen: slice_backward.out
|
5398
5427
|
|
5399
5428
|
# NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
|
5400
|
-
# slice.Tensor, split_with_sizes, et
|
5429
|
+
# slice.Tensor, split_with_sizes, et al.). Currently, these are only used during fake-ification
|
5401
5430
|
# of PT2 graph input subclass instances that are views. This means:
|
5402
5431
|
# * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
|
5403
5432
|
# * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
|
@@ -5620,10 +5649,12 @@
|
|
5620
5649
|
- func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
|
5621
5650
|
dispatch:
|
5622
5651
|
CompositeExplicitAutograd: _chunk_cat
|
5652
|
+
CUDA: _chunk_cat_cuda
|
5623
5653
|
|
5624
5654
|
- func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
|
5625
5655
|
dispatch:
|
5626
5656
|
CompositeExplicitAutograd: _chunk_cat_out
|
5657
|
+
CUDA: _chunk_cat_out_cuda
|
5627
5658
|
|
5628
5659
|
- func: stack(Tensor[] tensors, int dim=0) -> Tensor
|
5629
5660
|
dispatch:
|
@@ -5689,8 +5720,8 @@
|
|
5689
5720
|
variants: function, method
|
5690
5721
|
dispatch:
|
5691
5722
|
CompositeExplicitAutograd: sum
|
5692
|
-
SparseCPU, SparseCUDA: sum_coo
|
5693
|
-
SparseCsrCPU, SparseCsrCUDA: sum_csr
|
5723
|
+
SparseCPU, SparseCUDA, SparseMeta: sum_coo
|
5724
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_csr
|
5694
5725
|
autogen: sum.out
|
5695
5726
|
|
5696
5727
|
- func: sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
@@ -6200,6 +6231,12 @@
|
|
6200
6231
|
category_override: dummy
|
6201
6232
|
dispatch: {}
|
6202
6233
|
|
6234
|
+
- func: _nested_compute_contiguous_strides_offsets(Tensor nested_size) -> (Tensor, Tensor)
|
6235
|
+
variants: function
|
6236
|
+
device_check: NoCheck
|
6237
|
+
dispatch:
|
6238
|
+
CPU, CUDA: _nested_compute_contiguous_strides_offsets
|
6239
|
+
|
6203
6240
|
- func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
|
6204
6241
|
dispatch:
|
6205
6242
|
# calls unsqueeze
|
@@ -6465,7 +6502,7 @@
|
|
6465
6502
|
CPU: _efficientzerotensor
|
6466
6503
|
CUDA: _efficientzerotensor_cuda
|
6467
6504
|
MPS: _efficientzerotensor_mps
|
6468
|
-
Meta:
|
6505
|
+
Meta: _efficientzerotensor_meta_symint
|
6469
6506
|
autogen: _efficientzerotensor.out
|
6470
6507
|
|
6471
6508
|
- func: zeros(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -6542,6 +6579,32 @@
|
|
6542
6579
|
SparseCPU, SparseCUDA: norm_sparse
|
6543
6580
|
autogen: native_norm.ScalarOpt_dim_dtype_out
|
6544
6581
|
|
6582
|
+
- func: _batch_norm_with_update(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor)
|
6583
|
+
dispatch:
|
6584
|
+
CPU: _batch_norm_with_update_cpu
|
6585
|
+
CUDA: _batch_norm_with_update_cuda
|
6586
|
+
MPS: _batch_norm_with_update_mps
|
6587
|
+
MkldnnCPU: _batch_norm_with_update_mkldnn
|
6588
|
+
autogen: _batch_norm_with_update_functional
|
6589
|
+
|
6590
|
+
- func: _batch_norm_with_update.out(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, float momentum, float eps, *, Tensor(d!) out, Tensor(e!) save_mean, Tensor(f!) save_invstd, Tensor(g!) reserve) -> (Tensor(d!), Tensor(e!), Tensor(f!), Tensor(g!))
|
6591
|
+
dispatch:
|
6592
|
+
CPU: _batch_norm_with_update_cpu_out
|
6593
|
+
CUDA: _batch_norm_with_update_cuda_out
|
6594
|
+
MPS: _batch_norm_with_update_mps_out
|
6595
|
+
|
6596
|
+
- func: _batch_norm_no_update(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor)
|
6597
|
+
dispatch:
|
6598
|
+
CompositeExplicitAutograd: _batch_norm_no_update
|
6599
|
+
autogen: _batch_norm_no_update.out
|
6600
|
+
|
6601
|
+
- func: batch_norm_backward(Tensor grad_out, Tensor input, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, bool update, float eps, bool[3] output_mask, Tensor reserve) -> (Tensor, Tensor, Tensor)
|
6602
|
+
dispatch:
|
6603
|
+
CPU: _new_batch_norm_backward_cpu
|
6604
|
+
CUDA: _new_batch_norm_backward_cuda
|
6605
|
+
MPS: _new_batch_norm_backward_mps
|
6606
|
+
MkldnnCPU: _new_batch_norm_backward_mkldnn
|
6607
|
+
|
6545
6608
|
# TODO: reduce signatures down to one when optional args is available
|
6546
6609
|
- func: _sparse_sum(Tensor self) -> Tensor
|
6547
6610
|
|
@@ -7042,6 +7105,10 @@
|
|
7042
7105
|
# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
|
7043
7106
|
# the default would never make sense.
|
7044
7107
|
|
7108
|
+
- func: _sparse_compressed_tensor_with_dims(int nnz, int dense_dim, int[] size, int[] blocksize, ScalarType index_dtype, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
7109
|
+
dispatch:
|
7110
|
+
CompositeExplicitAutograd: sparse_compressed_tensor_with_dims
|
7111
|
+
|
7045
7112
|
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
7046
7113
|
dispatch:
|
7047
7114
|
CompositeExplicitAutograd: sparse_compressed_tensor
|
@@ -7146,9 +7213,9 @@
|
|
7146
7213
|
- func: sparse_dim(Tensor self) -> int
|
7147
7214
|
variants: method
|
7148
7215
|
dispatch:
|
7149
|
-
CPU, CUDA: sparse_dim_strided
|
7150
7216
|
SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
|
7151
7217
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
|
7218
|
+
CompositeExplicitAutograd: sparse_dim_default
|
7152
7219
|
device_check: NoCheck
|
7153
7220
|
device_guard: False
|
7154
7221
|
|
@@ -7163,9 +7230,9 @@
|
|
7163
7230
|
- func: dense_dim(Tensor self) -> int
|
7164
7231
|
variants: method
|
7165
7232
|
dispatch:
|
7166
|
-
CPU, CUDA: dense_dim_strided
|
7167
7233
|
SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
|
7168
7234
|
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
|
7235
|
+
CompositeExplicitAutograd: dense_dim_default
|
7169
7236
|
device_check: NoCheck
|
7170
7237
|
device_guard: False
|
7171
7238
|
|
@@ -7296,7 +7363,7 @@
|
|
7296
7363
|
device_check: NoCheck # Allows copy into different device
|
7297
7364
|
variants: function
|
7298
7365
|
dispatch:
|
7299
|
-
SparseCPU, SparseCUDA: copy_sparse_
|
7366
|
+
SparseCPU, SparseCUDA, SparseMeta: copy_sparse_
|
7300
7367
|
autogen: copy_sparse_to_sparse, copy_sparse_to_sparse.out
|
7301
7368
|
|
7302
7369
|
# By adding the AutogradNestedTensor this makes this function CompositeImplicit-like for nested tensors
|
@@ -7399,7 +7466,7 @@
|
|
7399
7466
|
MkldnnCPU: mkldnn_reorder_conv2d_weight
|
7400
7467
|
autogen: mkldnn_reorder_conv2d_weight.out
|
7401
7468
|
|
7402
|
-
- func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
|
7469
|
+
- func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor
|
7403
7470
|
variants: function
|
7404
7471
|
python_module: nn
|
7405
7472
|
dispatch:
|
@@ -7647,7 +7714,7 @@
|
|
7647
7714
|
|
7648
7715
|
- func: result_type.Scalar_Scalar(Scalar scalar1, Scalar scalar2) -> ScalarType
|
7649
7716
|
|
7650
|
-
- func: can_cast(ScalarType
|
7717
|
+
- func: can_cast(ScalarType from_, ScalarType to) -> bool
|
7651
7718
|
variants: function
|
7652
7719
|
|
7653
7720
|
- func: promote_types(ScalarType type1, ScalarType type2) -> ScalarType
|
@@ -10222,6 +10289,7 @@
|
|
10222
10289
|
variants: method, function
|
10223
10290
|
dispatch:
|
10224
10291
|
CompositeExplicitAutograd: alias
|
10292
|
+
NestedTensorCPU, NestedTensorCUDA: alias_nested
|
10225
10293
|
tags: core
|
10226
10294
|
|
10227
10295
|
- func: _amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
|
@@ -10255,14 +10323,14 @@
|
|
10255
10323
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10256
10324
|
variants: function
|
10257
10325
|
dispatch:
|
10258
|
-
|
10326
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalar_kernel_slow
|
10259
10327
|
CUDA: foreach_tensor_add_scalar_kernel_cuda
|
10260
10328
|
|
10261
10329
|
- func: _foreach_add_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10262
10330
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10263
10331
|
variants: function
|
10264
10332
|
dispatch:
|
10265
|
-
|
10333
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalar_kernel_slow_
|
10266
10334
|
CUDA: foreach_tensor_add_scalar_kernel_cuda_
|
10267
10335
|
autogen: _foreach_add.Scalar_out
|
10268
10336
|
|
@@ -10270,14 +10338,14 @@
|
|
10270
10338
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10271
10339
|
variants: function
|
10272
10340
|
dispatch:
|
10273
|
-
|
10341
|
+
CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow
|
10274
10342
|
CUDA: foreach_tensor_add_list_kernel_cuda
|
10275
10343
|
|
10276
10344
|
- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
10277
10345
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10278
10346
|
variants: function
|
10279
10347
|
dispatch:
|
10280
|
-
|
10348
|
+
CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow_
|
10281
10349
|
CUDA: foreach_tensor_add_list_kernel_cuda_
|
10282
10350
|
autogen: _foreach_add.List_out
|
10283
10351
|
|
@@ -10285,14 +10353,14 @@
|
|
10285
10353
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10286
10354
|
variants: function
|
10287
10355
|
dispatch:
|
10288
|
-
|
10356
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalarlist_kernel_slow
|
10289
10357
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda
|
10290
10358
|
|
10291
10359
|
- func: _foreach_add_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10292
10360
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10293
10361
|
variants: function
|
10294
10362
|
dispatch:
|
10295
|
-
|
10363
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalarlist_kernel_slow_
|
10296
10364
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
|
10297
10365
|
autogen: _foreach_add.ScalarList_out
|
10298
10366
|
|
@@ -10300,14 +10368,14 @@
|
|
10300
10368
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10301
10369
|
variants: function
|
10302
10370
|
dispatch:
|
10303
|
-
|
10371
|
+
CompositeExplicitAutograd: foreach_tensor_add_tensor_kernel_slow
|
10304
10372
|
CUDA: foreach_tensor_add_tensor_kernel_cuda
|
10305
10373
|
|
10306
10374
|
- func: _foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()
|
10307
10375
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10308
10376
|
variants: function
|
10309
10377
|
dispatch:
|
10310
|
-
|
10378
|
+
CompositeExplicitAutograd: foreach_tensor_add_tensor_kernel_slow_
|
10311
10379
|
CUDA: foreach_tensor_add_tensor_kernel_cuda_
|
10312
10380
|
autogen: _foreach_add.Tensor_out
|
10313
10381
|
|
@@ -10315,14 +10383,14 @@
|
|
10315
10383
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10316
10384
|
variants: function
|
10317
10385
|
dispatch:
|
10318
|
-
|
10386
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalar_kernel_slow
|
10319
10387
|
CUDA: foreach_tensor_sub_scalar_kernel_cuda
|
10320
10388
|
|
10321
10389
|
- func: _foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10322
10390
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10323
10391
|
variants: function
|
10324
10392
|
dispatch:
|
10325
|
-
|
10393
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalar_kernel_slow_
|
10326
10394
|
CUDA: foreach_tensor_sub_scalar_kernel_cuda_
|
10327
10395
|
autogen: _foreach_sub.Scalar_out
|
10328
10396
|
|
@@ -10330,14 +10398,14 @@
|
|
10330
10398
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10331
10399
|
variants: function
|
10332
10400
|
dispatch:
|
10333
|
-
|
10401
|
+
CompositeExplicitAutograd: foreach_tensor_sub_list_kernel_slow
|
10334
10402
|
CUDA: foreach_tensor_sub_list_kernel_cuda
|
10335
10403
|
|
10336
10404
|
- func: _foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
10337
10405
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10338
10406
|
variants: function
|
10339
10407
|
dispatch:
|
10340
|
-
|
10408
|
+
CompositeExplicitAutograd: foreach_tensor_sub_list_kernel_slow_
|
10341
10409
|
CUDA: foreach_tensor_sub_list_kernel_cuda_
|
10342
10410
|
autogen: _foreach_sub.List_out
|
10343
10411
|
|
@@ -10345,14 +10413,14 @@
|
|
10345
10413
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10346
10414
|
variants: function
|
10347
10415
|
dispatch:
|
10348
|
-
|
10416
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalarlist_kernel_slow
|
10349
10417
|
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda
|
10350
10418
|
|
10351
10419
|
- func: _foreach_sub_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10352
10420
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10353
10421
|
variants: function
|
10354
10422
|
dispatch:
|
10355
|
-
|
10423
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalarlist_kernel_slow_
|
10356
10424
|
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
|
10357
10425
|
autogen: _foreach_sub.ScalarList_out
|
10358
10426
|
|
@@ -10360,14 +10428,14 @@
|
|
10360
10428
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10361
10429
|
variants: function
|
10362
10430
|
dispatch:
|
10363
|
-
|
10431
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalar_kernel_slow
|
10364
10432
|
CUDA: foreach_tensor_mul_scalar_kernel_cuda
|
10365
10433
|
|
10366
10434
|
- func: _foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10367
10435
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10368
10436
|
variants: function
|
10369
10437
|
dispatch:
|
10370
|
-
|
10438
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalar_kernel_slow_
|
10371
10439
|
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
|
10372
10440
|
autogen: _foreach_mul.Scalar_out
|
10373
10441
|
|
@@ -10375,14 +10443,14 @@
|
|
10375
10443
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10376
10444
|
variants: function
|
10377
10445
|
dispatch:
|
10378
|
-
|
10446
|
+
CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow
|
10379
10447
|
CUDA: foreach_tensor_mul_list_kernel_cuda
|
10380
10448
|
|
10381
10449
|
- func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10382
10450
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10383
10451
|
variants: function
|
10384
10452
|
dispatch:
|
10385
|
-
|
10453
|
+
CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow_
|
10386
10454
|
CUDA: foreach_tensor_mul_list_kernel_cuda_
|
10387
10455
|
autogen: _foreach_mul.List_out
|
10388
10456
|
|
@@ -10390,14 +10458,14 @@
|
|
10390
10458
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10391
10459
|
variants: function
|
10392
10460
|
dispatch:
|
10393
|
-
|
10461
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalarlist_kernel_slow
|
10394
10462
|
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda
|
10395
10463
|
|
10396
10464
|
- func: _foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10397
10465
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10398
10466
|
variants: function
|
10399
10467
|
dispatch:
|
10400
|
-
|
10468
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalarlist_kernel_slow_
|
10401
10469
|
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
|
10402
10470
|
autogen: _foreach_mul.ScalarList_out
|
10403
10471
|
|
@@ -10405,14 +10473,14 @@
|
|
10405
10473
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10406
10474
|
variants: function
|
10407
10475
|
dispatch:
|
10408
|
-
|
10476
|
+
CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow
|
10409
10477
|
CUDA: foreach_tensor_mul_tensor_kernel_cuda
|
10410
10478
|
|
10411
10479
|
- func: _foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
10412
10480
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10413
10481
|
variants: function
|
10414
10482
|
dispatch:
|
10415
|
-
|
10483
|
+
CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow_
|
10416
10484
|
CUDA: foreach_tensor_mul_tensor_kernel_cuda_
|
10417
10485
|
autogen: _foreach_mul.Tensor_out
|
10418
10486
|
|
@@ -10420,14 +10488,14 @@
|
|
10420
10488
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10421
10489
|
variants: function
|
10422
10490
|
dispatch:
|
10423
|
-
|
10491
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalar_kernel_slow
|
10424
10492
|
CUDA: foreach_tensor_div_scalar_kernel_cuda
|
10425
10493
|
|
10426
10494
|
- func: _foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10427
10495
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10428
10496
|
variants: function
|
10429
10497
|
dispatch:
|
10430
|
-
|
10498
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalar_kernel_slow_
|
10431
10499
|
CUDA: foreach_tensor_div_scalar_kernel_cuda_
|
10432
10500
|
autogen: _foreach_div.Scalar_out
|
10433
10501
|
|
@@ -10435,14 +10503,14 @@
|
|
10435
10503
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10436
10504
|
variants: function
|
10437
10505
|
dispatch:
|
10438
|
-
|
10506
|
+
CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow
|
10439
10507
|
CUDA: foreach_tensor_div_list_kernel_cuda
|
10440
10508
|
|
10441
10509
|
- func: _foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10442
10510
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10443
10511
|
variants: function
|
10444
10512
|
dispatch:
|
10445
|
-
|
10513
|
+
CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow_
|
10446
10514
|
CUDA: foreach_tensor_div_list_kernel_cuda_
|
10447
10515
|
autogen: _foreach_div.List_out
|
10448
10516
|
|
@@ -10450,14 +10518,14 @@
|
|
10450
10518
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10451
10519
|
variants: function
|
10452
10520
|
dispatch:
|
10453
|
-
|
10521
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalarlist_kernel_slow
|
10454
10522
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda
|
10455
10523
|
|
10456
10524
|
- func: _foreach_div_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10457
10525
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10458
10526
|
variants: function
|
10459
10527
|
dispatch:
|
10460
|
-
|
10528
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalarlist_kernel_slow_
|
10461
10529
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
|
10462
10530
|
autogen: _foreach_div.ScalarList_out
|
10463
10531
|
|
@@ -10465,14 +10533,14 @@
|
|
10465
10533
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10466
10534
|
variants: function
|
10467
10535
|
dispatch:
|
10468
|
-
|
10536
|
+
CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow
|
10469
10537
|
CUDA: foreach_tensor_div_tensor_kernel_cuda
|
10470
10538
|
|
10471
10539
|
- func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
10472
10540
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10473
10541
|
variants: function
|
10474
10542
|
dispatch:
|
10475
|
-
|
10543
|
+
CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow_
|
10476
10544
|
CUDA: foreach_tensor_div_tensor_kernel_cuda_
|
10477
10545
|
autogen: _foreach_div.Tensor_out
|
10478
10546
|
|
@@ -10480,14 +10548,14 @@
|
|
10480
10548
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10481
10549
|
variants: function
|
10482
10550
|
dispatch:
|
10483
|
-
|
10551
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow
|
10484
10552
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10485
10553
|
|
10486
10554
|
- func: _foreach_clamp_max_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10487
10555
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10488
10556
|
variants: function
|
10489
10557
|
dispatch:
|
10490
|
-
|
10558
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10491
10559
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10492
10560
|
autogen: _foreach_clamp_max.Scalar_out
|
10493
10561
|
|
@@ -10495,14 +10563,14 @@
|
|
10495
10563
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10496
10564
|
variants: function
|
10497
10565
|
dispatch:
|
10498
|
-
|
10566
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow
|
10499
10567
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda
|
10500
10568
|
|
10501
10569
|
- func: _foreach_clamp_max_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10502
10570
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10503
10571
|
variants: function
|
10504
10572
|
dispatch:
|
10505
|
-
|
10573
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow_
|
10506
10574
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
10507
10575
|
autogen: _foreach_clamp_max.List_out
|
10508
10576
|
|
@@ -10510,14 +10578,14 @@
|
|
10510
10578
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10511
10579
|
variants: function
|
10512
10580
|
dispatch:
|
10513
|
-
|
10581
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow
|
10514
10582
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda
|
10515
10583
|
|
10516
10584
|
- func: _foreach_clamp_max_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10517
10585
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10518
10586
|
variants: function
|
10519
10587
|
dispatch:
|
10520
|
-
|
10588
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow_
|
10521
10589
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10522
10590
|
autogen: _foreach_clamp_max.ScalarList_out
|
10523
10591
|
|
@@ -10525,14 +10593,14 @@
|
|
10525
10593
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10526
10594
|
variants: function
|
10527
10595
|
dispatch:
|
10528
|
-
|
10596
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow
|
10529
10597
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
10530
10598
|
|
10531
10599
|
- func: _foreach_clamp_min_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10532
10600
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10533
10601
|
variants: function
|
10534
10602
|
dispatch:
|
10535
|
-
|
10603
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10536
10604
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10537
10605
|
autogen: _foreach_clamp_min.Scalar_out
|
10538
10606
|
|
@@ -10540,14 +10608,14 @@
|
|
10540
10608
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10541
10609
|
variants: function
|
10542
10610
|
dispatch:
|
10543
|
-
|
10611
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow
|
10544
10612
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
10545
10613
|
|
10546
10614
|
- func: _foreach_clamp_min_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10547
10615
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10548
10616
|
variants: function
|
10549
10617
|
dispatch:
|
10550
|
-
|
10618
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow_
|
10551
10619
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10552
10620
|
autogen: _foreach_clamp_min.List_out
|
10553
10621
|
|
@@ -10555,14 +10623,14 @@
|
|
10555
10623
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10556
10624
|
variants: function
|
10557
10625
|
dispatch:
|
10558
|
-
|
10626
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10559
10627
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
10560
10628
|
|
10561
10629
|
- func: _foreach_clamp_min_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10562
10630
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10563
10631
|
variants: function
|
10564
10632
|
dispatch:
|
10565
|
-
|
10633
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10566
10634
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10567
10635
|
autogen: _foreach_clamp_min.ScalarList_out
|
10568
10636
|
|
@@ -10571,14 +10639,14 @@
|
|
10571
10639
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10572
10640
|
variants: function
|
10573
10641
|
dispatch:
|
10574
|
-
|
10642
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow
|
10575
10643
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
10576
10644
|
|
10577
10645
|
- func: _foreach_maximum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10578
10646
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10579
10647
|
variants: function
|
10580
10648
|
dispatch:
|
10581
|
-
|
10649
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10582
10650
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10583
10651
|
autogen: _foreach_maximum.Scalar_out
|
10584
10652
|
|
@@ -10587,14 +10655,14 @@
|
|
10587
10655
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10588
10656
|
variants: function
|
10589
10657
|
dispatch:
|
10590
|
-
|
10658
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow
|
10591
10659
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
10592
10660
|
|
10593
10661
|
- func: _foreach_maximum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10594
10662
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10595
10663
|
variants: function
|
10596
10664
|
dispatch:
|
10597
|
-
|
10665
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow_
|
10598
10666
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10599
10667
|
autogen: _foreach_maximum.List_out
|
10600
10668
|
|
@@ -10603,14 +10671,14 @@
|
|
10603
10671
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10604
10672
|
variants: function
|
10605
10673
|
dispatch:
|
10606
|
-
|
10674
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10607
10675
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
10608
10676
|
|
10609
10677
|
- func: _foreach_maximum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10610
10678
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10611
10679
|
variants: function
|
10612
10680
|
dispatch:
|
10613
|
-
|
10681
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10614
10682
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10615
10683
|
autogen: _foreach_maximum.ScalarList_out
|
10616
10684
|
|
@@ -10618,14 +10686,14 @@
|
|
10618
10686
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10619
10687
|
variants: function
|
10620
10688
|
dispatch:
|
10621
|
-
|
10689
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow
|
10622
10690
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10623
10691
|
|
10624
10692
|
- func: _foreach_minimum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10625
10693
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10626
10694
|
variants: function
|
10627
10695
|
dispatch:
|
10628
|
-
|
10696
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10629
10697
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10630
10698
|
autogen: _foreach_minimum.Scalar_out
|
10631
10699
|
|
@@ -10633,14 +10701,14 @@
|
|
10633
10701
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10634
10702
|
variants: function
|
10635
10703
|
dispatch:
|
10636
|
-
|
10704
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow
|
10637
10705
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda
|
10638
10706
|
|
10639
10707
|
- func: _foreach_minimum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10640
10708
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10641
10709
|
variants: function
|
10642
10710
|
dispatch:
|
10643
|
-
|
10711
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow_
|
10644
10712
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
10645
10713
|
autogen: _foreach_minimum.List_out
|
10646
10714
|
|
@@ -10648,14 +10716,14 @@
|
|
10648
10716
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10649
10717
|
variants: function
|
10650
10718
|
dispatch:
|
10651
|
-
|
10719
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow
|
10652
10720
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda
|
10653
10721
|
|
10654
10722
|
- func: _foreach_minimum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10655
10723
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10656
10724
|
variants: function
|
10657
10725
|
dispatch:
|
10658
|
-
|
10726
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow_
|
10659
10727
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10660
10728
|
autogen: _foreach_minimum.ScalarList_out
|
10661
10729
|
|
@@ -10663,28 +10731,28 @@
|
|
10663
10731
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10664
10732
|
variants: function
|
10665
10733
|
dispatch:
|
10666
|
-
|
10734
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalar_slow
|
10667
10735
|
CUDA: foreach_tensor_addcdiv_scalar_cuda
|
10668
10736
|
|
10669
10737
|
- func: _foreach_addcdiv.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10670
10738
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10671
10739
|
variants: function
|
10672
10740
|
dispatch:
|
10673
|
-
|
10741
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalarlist_slow
|
10674
10742
|
CUDA: foreach_tensor_addcdiv_scalarlist_cuda
|
10675
10743
|
|
10676
10744
|
- func: _foreach_addcdiv.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10677
10745
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10678
10746
|
variants: function
|
10679
10747
|
dispatch:
|
10680
|
-
|
10748
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_tensor_slow
|
10681
10749
|
CUDA: foreach_tensor_addcdiv_tensor_cuda
|
10682
10750
|
|
10683
10751
|
- func: _foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10684
10752
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10685
10753
|
variants: function
|
10686
10754
|
dispatch:
|
10687
|
-
|
10755
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalar_slow_
|
10688
10756
|
CUDA: foreach_tensor_addcdiv_scalar_cuda_
|
10689
10757
|
autogen: _foreach_addcdiv.Scalar_out
|
10690
10758
|
|
@@ -10692,7 +10760,7 @@
|
|
10692
10760
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10693
10761
|
variants: function
|
10694
10762
|
dispatch:
|
10695
|
-
|
10763
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalarlist_slow_
|
10696
10764
|
CUDA: foreach_tensor_addcdiv_scalarlist_cuda_
|
10697
10765
|
autogen: _foreach_addcdiv.ScalarList_out
|
10698
10766
|
|
@@ -10700,7 +10768,7 @@
|
|
10700
10768
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10701
10769
|
variants: function
|
10702
10770
|
dispatch:
|
10703
|
-
|
10771
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_tensor_slow_
|
10704
10772
|
CUDA: foreach_tensor_addcdiv_tensor_cuda_
|
10705
10773
|
autogen: _foreach_addcdiv.Tensor_out
|
10706
10774
|
|
@@ -10708,28 +10776,28 @@
|
|
10708
10776
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10709
10777
|
variants: function
|
10710
10778
|
dispatch:
|
10711
|
-
|
10779
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow
|
10712
10780
|
CUDA: foreach_tensor_addcmul_scalar_cuda
|
10713
10781
|
|
10714
10782
|
- func: _foreach_addcmul.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10715
10783
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10716
10784
|
variants: function
|
10717
10785
|
dispatch:
|
10718
|
-
|
10786
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalarlist_slow
|
10719
10787
|
CUDA: foreach_tensor_addcmul_scalarlist_cuda
|
10720
10788
|
|
10721
10789
|
- func: _foreach_addcmul.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10722
10790
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10723
10791
|
variants: function
|
10724
10792
|
dispatch:
|
10725
|
-
|
10793
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_tensor_slow
|
10726
10794
|
CUDA: foreach_tensor_addcmul_tensor_cuda
|
10727
10795
|
|
10728
10796
|
- func: _foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10729
10797
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10730
10798
|
variants: function
|
10731
10799
|
dispatch:
|
10732
|
-
|
10800
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow_
|
10733
10801
|
CUDA: foreach_tensor_addcmul_scalar_cuda_
|
10734
10802
|
autogen: _foreach_addcmul.Scalar_out
|
10735
10803
|
|
@@ -10737,7 +10805,7 @@
|
|
10737
10805
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10738
10806
|
variants: function
|
10739
10807
|
dispatch:
|
10740
|
-
|
10808
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalarlist_slow_
|
10741
10809
|
CUDA: foreach_tensor_addcmul_scalarlist_cuda_
|
10742
10810
|
autogen: _foreach_addcmul.ScalarList_out
|
10743
10811
|
|
@@ -10745,7 +10813,7 @@
|
|
10745
10813
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10746
10814
|
variants: function
|
10747
10815
|
dispatch:
|
10748
|
-
|
10816
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_tensor_slow_
|
10749
10817
|
CUDA: foreach_tensor_addcmul_tensor_cuda_
|
10750
10818
|
autogen: _foreach_addcmul.Tensor_out
|
10751
10819
|
|
@@ -10753,14 +10821,14 @@
|
|
10753
10821
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10754
10822
|
variants: function
|
10755
10823
|
dispatch:
|
10756
|
-
|
10824
|
+
CompositeExplicitAutograd: foreach_tensor_abs_slow
|
10757
10825
|
CUDA: foreach_tensor_abs_cuda
|
10758
10826
|
|
10759
10827
|
- func: _foreach_abs_(Tensor(a!)[] self) -> ()
|
10760
10828
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10761
10829
|
variants: function
|
10762
10830
|
dispatch:
|
10763
|
-
|
10831
|
+
CompositeExplicitAutograd: foreach_tensor_abs_slow_
|
10764
10832
|
CUDA: foreach_tensor_abs_cuda_
|
10765
10833
|
autogen: _foreach_abs.out
|
10766
10834
|
|
@@ -10768,14 +10836,14 @@
|
|
10768
10836
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10769
10837
|
variants: function
|
10770
10838
|
dispatch:
|
10771
|
-
|
10839
|
+
CompositeExplicitAutograd: foreach_tensor_acos_slow
|
10772
10840
|
CUDA: foreach_tensor_acos_cuda
|
10773
10841
|
|
10774
10842
|
- func: _foreach_acos_(Tensor(a!)[] self) -> ()
|
10775
10843
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10776
10844
|
variants: function
|
10777
10845
|
dispatch:
|
10778
|
-
|
10846
|
+
CompositeExplicitAutograd: foreach_tensor_acos_slow_
|
10779
10847
|
CUDA: foreach_tensor_acos_cuda_
|
10780
10848
|
autogen: _foreach_acos.out
|
10781
10849
|
|
@@ -10783,14 +10851,14 @@
|
|
10783
10851
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10784
10852
|
variants: function
|
10785
10853
|
dispatch:
|
10786
|
-
|
10854
|
+
CompositeExplicitAutograd: foreach_tensor_asin_slow
|
10787
10855
|
CUDA: foreach_tensor_asin_cuda
|
10788
10856
|
|
10789
10857
|
- func: _foreach_asin_(Tensor(a!)[] self) -> ()
|
10790
10858
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10791
10859
|
variants: function
|
10792
10860
|
dispatch:
|
10793
|
-
|
10861
|
+
CompositeExplicitAutograd: foreach_tensor_asin_slow_
|
10794
10862
|
CUDA: foreach_tensor_asin_cuda_
|
10795
10863
|
autogen: _foreach_asin.out
|
10796
10864
|
|
@@ -10798,14 +10866,14 @@
|
|
10798
10866
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10799
10867
|
variants: function
|
10800
10868
|
dispatch:
|
10801
|
-
|
10869
|
+
CompositeExplicitAutograd: foreach_tensor_atan_slow
|
10802
10870
|
CUDA: foreach_tensor_atan_cuda
|
10803
10871
|
|
10804
10872
|
- func: _foreach_atan_(Tensor(a!)[] self) -> ()
|
10805
10873
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10806
10874
|
variants: function
|
10807
10875
|
dispatch:
|
10808
|
-
|
10876
|
+
CompositeExplicitAutograd: foreach_tensor_atan_slow_
|
10809
10877
|
CUDA: foreach_tensor_atan_cuda_
|
10810
10878
|
autogen: _foreach_atan.out
|
10811
10879
|
|
@@ -10813,14 +10881,14 @@
|
|
10813
10881
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10814
10882
|
variants: function
|
10815
10883
|
dispatch:
|
10816
|
-
|
10884
|
+
CompositeExplicitAutograd: foreach_tensor_ceil_slow
|
10817
10885
|
CUDA: foreach_tensor_ceil_cuda
|
10818
10886
|
|
10819
10887
|
- func: _foreach_ceil_(Tensor(a!)[] self) -> ()
|
10820
10888
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10821
10889
|
variants: function
|
10822
10890
|
dispatch:
|
10823
|
-
|
10891
|
+
CompositeExplicitAutograd: foreach_tensor_ceil_slow_
|
10824
10892
|
CUDA: foreach_tensor_ceil_cuda_
|
10825
10893
|
autogen: _foreach_ceil.out
|
10826
10894
|
|
@@ -10828,14 +10896,14 @@
|
|
10828
10896
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10829
10897
|
variants: function
|
10830
10898
|
dispatch:
|
10831
|
-
|
10899
|
+
CompositeExplicitAutograd: foreach_tensor_cos_slow
|
10832
10900
|
CUDA: foreach_tensor_cos_cuda
|
10833
10901
|
|
10834
10902
|
- func: _foreach_cos_(Tensor(a!)[] self) -> ()
|
10835
10903
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10836
10904
|
variants: function
|
10837
10905
|
dispatch:
|
10838
|
-
|
10906
|
+
CompositeExplicitAutograd: foreach_tensor_cos_slow_
|
10839
10907
|
CUDA: foreach_tensor_cos_cuda_
|
10840
10908
|
autogen: _foreach_cos.out
|
10841
10909
|
|
@@ -10843,14 +10911,14 @@
|
|
10843
10911
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10844
10912
|
variants: function
|
10845
10913
|
dispatch:
|
10846
|
-
|
10914
|
+
CompositeExplicitAutograd: foreach_tensor_cosh_slow
|
10847
10915
|
CUDA: foreach_tensor_cosh_cuda
|
10848
10916
|
|
10849
10917
|
- func: _foreach_cosh_(Tensor(a!)[] self) -> ()
|
10850
10918
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10851
10919
|
variants: function
|
10852
10920
|
dispatch:
|
10853
|
-
|
10921
|
+
CompositeExplicitAutograd: foreach_tensor_cosh_slow_
|
10854
10922
|
CUDA: foreach_tensor_cosh_cuda_
|
10855
10923
|
autogen: _foreach_cosh.out
|
10856
10924
|
|
@@ -10858,14 +10926,14 @@
|
|
10858
10926
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10859
10927
|
variants: function
|
10860
10928
|
dispatch:
|
10861
|
-
|
10929
|
+
CompositeExplicitAutograd: foreach_tensor_erf_slow
|
10862
10930
|
CUDA: foreach_tensor_erf_cuda
|
10863
10931
|
|
10864
10932
|
- func: _foreach_erf_(Tensor(a!)[] self) -> ()
|
10865
10933
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10866
10934
|
variants: function
|
10867
10935
|
dispatch:
|
10868
|
-
|
10936
|
+
CompositeExplicitAutograd: foreach_tensor_erf_slow_
|
10869
10937
|
CUDA: foreach_tensor_erf_cuda_
|
10870
10938
|
autogen: _foreach_erf.out
|
10871
10939
|
|
@@ -10873,14 +10941,14 @@
|
|
10873
10941
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10874
10942
|
variants: function
|
10875
10943
|
dispatch:
|
10876
|
-
|
10944
|
+
CompositeExplicitAutograd: foreach_tensor_erfc_slow
|
10877
10945
|
CUDA: foreach_tensor_erfc_cuda
|
10878
10946
|
|
10879
10947
|
- func: _foreach_erfc_(Tensor(a!)[] self) -> ()
|
10880
10948
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10881
10949
|
variants: function
|
10882
10950
|
dispatch:
|
10883
|
-
|
10951
|
+
CompositeExplicitAutograd: foreach_tensor_erfc_slow_
|
10884
10952
|
CUDA: foreach_tensor_erfc_cuda_
|
10885
10953
|
autogen: _foreach_erfc.out
|
10886
10954
|
|
@@ -10888,14 +10956,14 @@
|
|
10888
10956
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10889
10957
|
variants: function
|
10890
10958
|
dispatch:
|
10891
|
-
|
10959
|
+
CompositeExplicitAutograd: foreach_tensor_exp_slow
|
10892
10960
|
CUDA: foreach_tensor_exp_cuda
|
10893
10961
|
|
10894
10962
|
- func: _foreach_exp_(Tensor(a!)[] self) -> ()
|
10895
10963
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10896
10964
|
variants: function
|
10897
10965
|
dispatch:
|
10898
|
-
|
10966
|
+
CompositeExplicitAutograd: foreach_tensor_exp_slow_
|
10899
10967
|
CUDA: foreach_tensor_exp_cuda_
|
10900
10968
|
autogen: _foreach_exp.out
|
10901
10969
|
|
@@ -10903,14 +10971,14 @@
|
|
10903
10971
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10904
10972
|
variants: function
|
10905
10973
|
dispatch:
|
10906
|
-
|
10974
|
+
CompositeExplicitAutograd: foreach_tensor_expm1_slow
|
10907
10975
|
CUDA: foreach_tensor_expm1_cuda
|
10908
10976
|
|
10909
10977
|
- func: _foreach_expm1_(Tensor(a!)[] self) -> ()
|
10910
10978
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10911
10979
|
variants: function
|
10912
10980
|
dispatch:
|
10913
|
-
|
10981
|
+
CompositeExplicitAutograd: foreach_tensor_expm1_slow_
|
10914
10982
|
CUDA: foreach_tensor_expm1_cuda_
|
10915
10983
|
autogen: _foreach_expm1.out
|
10916
10984
|
|
@@ -10918,14 +10986,14 @@
|
|
10918
10986
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10919
10987
|
variants: function
|
10920
10988
|
dispatch:
|
10921
|
-
|
10989
|
+
CompositeExplicitAutograd: foreach_tensor_floor_slow
|
10922
10990
|
CUDA: foreach_tensor_floor_cuda
|
10923
10991
|
|
10924
10992
|
- func: _foreach_floor_(Tensor(a!)[] self) -> ()
|
10925
10993
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10926
10994
|
variants: function
|
10927
10995
|
dispatch:
|
10928
|
-
|
10996
|
+
CompositeExplicitAutograd: foreach_tensor_floor_slow_
|
10929
10997
|
CUDA: foreach_tensor_floor_cuda_
|
10930
10998
|
autogen: _foreach_floor.out
|
10931
10999
|
|
@@ -10933,14 +11001,14 @@
|
|
10933
11001
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10934
11002
|
variants: function
|
10935
11003
|
dispatch:
|
10936
|
-
|
11004
|
+
CompositeExplicitAutograd: foreach_tensor_frac_slow
|
10937
11005
|
CUDA: foreach_tensor_frac_cuda
|
10938
11006
|
|
10939
11007
|
- func: _foreach_frac_(Tensor(a!)[] self) -> ()
|
10940
11008
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10941
11009
|
variants: function
|
10942
11010
|
dispatch:
|
10943
|
-
|
11011
|
+
CompositeExplicitAutograd: foreach_tensor_frac_slow_
|
10944
11012
|
CUDA: foreach_tensor_frac_cuda_
|
10945
11013
|
autogen: _foreach_frac.out
|
10946
11014
|
|
@@ -10948,7 +11016,7 @@
|
|
10948
11016
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10949
11017
|
variants: function
|
10950
11018
|
dispatch:
|
10951
|
-
|
11019
|
+
CompositeExplicitAutograd: foreach_tensor_ternary_lerp_slow
|
10952
11020
|
CUDA: foreach_tensor_lerp_ternary_cuda
|
10953
11021
|
autogen: _foreach_lerp.List_out
|
10954
11022
|
|
@@ -10956,7 +11024,7 @@
|
|
10956
11024
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10957
11025
|
variants: function
|
10958
11026
|
dispatch:
|
10959
|
-
|
11027
|
+
CompositeExplicitAutograd: foreach_tensor_ternary_lerp_slow_
|
10960
11028
|
CUDA: foreach_tensor_lerp_ternary_cuda_
|
10961
11029
|
autogen: _foreach_lerp.List_out
|
10962
11030
|
|
@@ -10964,7 +11032,7 @@
|
|
10964
11032
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10965
11033
|
variants: function
|
10966
11034
|
dispatch:
|
10967
|
-
|
11035
|
+
CompositeExplicitAutograd: foreach_tensor_lerp_list_kernel_slow
|
10968
11036
|
CUDA: foreach_tensor_lerp_list_cuda
|
10969
11037
|
autogen: _foreach_lerp.Scalar_out
|
10970
11038
|
|
@@ -10972,7 +11040,7 @@
|
|
10972
11040
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10973
11041
|
variants: function
|
10974
11042
|
dispatch:
|
10975
|
-
|
11043
|
+
CompositeExplicitAutograd: foreach_tensor_lerp_list_kernel_slow_
|
10976
11044
|
CUDA: foreach_tensor_lerp_list_cuda_
|
10977
11045
|
autogen: _foreach_lerp.Scalar_out
|
10978
11046
|
|
@@ -10980,14 +11048,14 @@
|
|
10980
11048
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10981
11049
|
variants: function
|
10982
11050
|
dispatch:
|
10983
|
-
|
11051
|
+
CompositeExplicitAutograd: foreach_tensor_lgamma_slow
|
10984
11052
|
CUDA: foreach_tensor_lgamma_cuda
|
10985
11053
|
|
10986
11054
|
- func: _foreach_lgamma_(Tensor(a!)[] self) -> ()
|
10987
11055
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10988
11056
|
variants: function
|
10989
11057
|
dispatch:
|
10990
|
-
|
11058
|
+
CompositeExplicitAutograd: foreach_tensor_lgamma_slow_
|
10991
11059
|
CUDA: foreach_tensor_lgamma_cuda_
|
10992
11060
|
autogen: _foreach_lgamma.out
|
10993
11061
|
|
@@ -10995,14 +11063,14 @@
|
|
10995
11063
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10996
11064
|
variants: function
|
10997
11065
|
dispatch:
|
10998
|
-
|
11066
|
+
CompositeExplicitAutograd: foreach_tensor_log_slow
|
10999
11067
|
CUDA: foreach_tensor_log_cuda
|
11000
11068
|
|
11001
11069
|
- func: _foreach_log_(Tensor(a!)[] self) -> ()
|
11002
11070
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11003
11071
|
variants: function
|
11004
11072
|
dispatch:
|
11005
|
-
|
11073
|
+
CompositeExplicitAutograd: foreach_tensor_log_slow_
|
11006
11074
|
CUDA: foreach_tensor_log_cuda_
|
11007
11075
|
autogen: _foreach_log.out
|
11008
11076
|
|
@@ -11010,14 +11078,14 @@
|
|
11010
11078
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11011
11079
|
variants: function
|
11012
11080
|
dispatch:
|
11013
|
-
|
11081
|
+
CompositeExplicitAutograd: foreach_tensor_log10_slow
|
11014
11082
|
CUDA: foreach_tensor_log10_cuda
|
11015
11083
|
|
11016
11084
|
- func: _foreach_log10_(Tensor(a!)[] self) -> ()
|
11017
11085
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11018
11086
|
variants: function
|
11019
11087
|
dispatch:
|
11020
|
-
|
11088
|
+
CompositeExplicitAutograd: foreach_tensor_log10_slow_
|
11021
11089
|
CUDA: foreach_tensor_log10_cuda_
|
11022
11090
|
autogen: _foreach_log10.out
|
11023
11091
|
|
@@ -11025,14 +11093,14 @@
|
|
11025
11093
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11026
11094
|
variants: function
|
11027
11095
|
dispatch:
|
11028
|
-
|
11096
|
+
CompositeExplicitAutograd: foreach_tensor_log1p_slow
|
11029
11097
|
CUDA: foreach_tensor_log1p_cuda
|
11030
11098
|
|
11031
11099
|
- func: _foreach_log1p_(Tensor(a!)[] self) -> ()
|
11032
11100
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11033
11101
|
variants: function
|
11034
11102
|
dispatch:
|
11035
|
-
|
11103
|
+
CompositeExplicitAutograd: foreach_tensor_log1p_slow_
|
11036
11104
|
CUDA: foreach_tensor_log1p_cuda_
|
11037
11105
|
autogen: _foreach_log1p.out
|
11038
11106
|
|
@@ -11040,37 +11108,45 @@
|
|
11040
11108
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11041
11109
|
variants: function
|
11042
11110
|
dispatch:
|
11043
|
-
|
11111
|
+
CompositeExplicitAutograd: foreach_tensor_log2_slow
|
11044
11112
|
CUDA: foreach_tensor_log2_cuda
|
11045
11113
|
|
11046
11114
|
- func: _foreach_log2_(Tensor(a!)[] self) -> ()
|
11047
11115
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11048
11116
|
variants: function
|
11049
11117
|
dispatch:
|
11050
|
-
|
11118
|
+
CompositeExplicitAutograd: foreach_tensor_log2_slow_
|
11051
11119
|
CUDA: foreach_tensor_log2_cuda_
|
11052
11120
|
autogen: _foreach_log2.out
|
11053
11121
|
|
11122
|
+
- func: _foreach_max(Tensor[] self) -> Tensor[]
|
11123
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11124
|
+
variants: function
|
11125
|
+
dispatch:
|
11126
|
+
CompositeExplicitAutograd: foreach_tensor_max_slow
|
11127
|
+
CUDA: foreach_tensor_max_cuda
|
11128
|
+
autogen: _foreach_max.out
|
11129
|
+
|
11054
11130
|
- func: _foreach_neg(Tensor[] self) -> Tensor[]
|
11055
11131
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11056
11132
|
variants: function
|
11057
11133
|
dispatch:
|
11058
|
-
|
11134
|
+
CompositeExplicitAutograd: foreach_tensor_neg_slow
|
11059
11135
|
CUDA: foreach_tensor_neg_cuda
|
11060
11136
|
|
11061
11137
|
- func: _foreach_neg_(Tensor(a!)[] self) -> ()
|
11062
11138
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11063
11139
|
variants: function
|
11064
11140
|
dispatch:
|
11065
|
-
|
11141
|
+
CompositeExplicitAutograd: foreach_tensor_neg_slow_
|
11066
11142
|
CUDA: foreach_tensor_neg_cuda_
|
11067
11143
|
autogen: _foreach_neg.out
|
11068
11144
|
|
11069
|
-
- func: _foreach_norm.Scalar(Tensor[] self, Scalar ord=2) -> Tensor[]
|
11145
|
+
- func: _foreach_norm.Scalar(Tensor[] self, Scalar ord=2, ScalarType? dtype=None) -> Tensor[]
|
11070
11146
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11071
11147
|
variants: function
|
11072
11148
|
dispatch:
|
11073
|
-
|
11149
|
+
CompositeExplicitAutograd: foreach_tensor_norm_slow
|
11074
11150
|
CUDA: foreach_tensor_norm_cuda
|
11075
11151
|
autogen: _foreach_norm.Scalar_out
|
11076
11152
|
|
@@ -11078,35 +11154,35 @@
|
|
11078
11154
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11079
11155
|
variants: function
|
11080
11156
|
dispatch:
|
11081
|
-
|
11157
|
+
CompositeExplicitAutograd: foreach_tensor_pow_list_kernel_slow
|
11082
11158
|
CUDA: foreach_tensor_pow_list_kernel_cuda
|
11083
11159
|
|
11084
11160
|
- func: _foreach_pow.Scalar(Tensor[] self, Scalar exponent) -> Tensor[]
|
11085
11161
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11086
11162
|
variants: function
|
11087
11163
|
dispatch:
|
11088
|
-
|
11164
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalar_kernel_slow
|
11089
11165
|
CUDA: foreach_tensor_pow_scalar_kernel_cuda
|
11090
11166
|
|
11091
11167
|
- func: _foreach_pow.ScalarList(Tensor[] self, Scalar[] exponent) -> Tensor[]
|
11092
11168
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11093
11169
|
variants: function
|
11094
11170
|
dispatch:
|
11095
|
-
|
11171
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalarlist_kernel_slow
|
11096
11172
|
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda
|
11097
11173
|
|
11098
11174
|
- func: _foreach_pow.ScalarAndTensor(Scalar self, Tensor[] exponent) -> Tensor[]
|
11099
11175
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11100
11176
|
variants: function
|
11101
11177
|
dispatch:
|
11102
|
-
|
11178
|
+
CompositeExplicitAutograd: foreach_scalar_pow_list_kernel_slow
|
11103
11179
|
CUDA: foreach_scalar_pow_list_kernel_cuda
|
11104
11180
|
|
11105
11181
|
- func: _foreach_pow_.List(Tensor(a!)[] self, Tensor[] exponent) -> ()
|
11106
11182
|
device_check: NoCheck
|
11107
11183
|
variants: function
|
11108
11184
|
dispatch:
|
11109
|
-
|
11185
|
+
CompositeExplicitAutograd: foreach_tensor_pow_list_kernel_slow_
|
11110
11186
|
CUDA: foreach_tensor_pow_list_kernel_cuda_
|
11111
11187
|
autogen: _foreach_pow.List_out
|
11112
11188
|
|
@@ -11114,7 +11190,7 @@
|
|
11114
11190
|
device_check: NoCheck
|
11115
11191
|
variants: function
|
11116
11192
|
dispatch:
|
11117
|
-
|
11193
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalar_kernel_slow_
|
11118
11194
|
CUDA: foreach_tensor_pow_scalar_kernel_cuda_
|
11119
11195
|
autogen: _foreach_pow.Scalar_out
|
11120
11196
|
|
@@ -11122,7 +11198,7 @@
|
|
11122
11198
|
device_check: NoCheck
|
11123
11199
|
variants: function
|
11124
11200
|
dispatch:
|
11125
|
-
|
11201
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalarlist_kernel_slow_
|
11126
11202
|
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda_
|
11127
11203
|
autogen: _foreach_pow.ScalarList_out
|
11128
11204
|
|
@@ -11130,14 +11206,14 @@
|
|
11130
11206
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11131
11207
|
variants: function
|
11132
11208
|
dispatch:
|
11133
|
-
|
11209
|
+
CompositeExplicitAutograd: foreach_tensor_reciprocal_slow
|
11134
11210
|
CUDA: foreach_tensor_reciprocal_cuda
|
11135
11211
|
|
11136
11212
|
- func: _foreach_reciprocal_(Tensor(a!)[] self) -> ()
|
11137
11213
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11138
11214
|
variants: function
|
11139
11215
|
dispatch:
|
11140
|
-
|
11216
|
+
CompositeExplicitAutograd: foreach_tensor_reciprocal_slow_
|
11141
11217
|
CUDA: foreach_tensor_reciprocal_cuda_
|
11142
11218
|
autogen: _foreach_reciprocal.out
|
11143
11219
|
|
@@ -11145,14 +11221,14 @@
|
|
11145
11221
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11146
11222
|
variants: function
|
11147
11223
|
dispatch:
|
11148
|
-
|
11224
|
+
CompositeExplicitAutograd: foreach_tensor_round_slow
|
11149
11225
|
CUDA: foreach_tensor_round_cuda
|
11150
11226
|
|
11151
11227
|
- func: _foreach_round_(Tensor(a!)[] self) -> ()
|
11152
11228
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11153
11229
|
variants: function
|
11154
11230
|
dispatch:
|
11155
|
-
|
11231
|
+
CompositeExplicitAutograd: foreach_tensor_round_slow_
|
11156
11232
|
CUDA: foreach_tensor_round_cuda_
|
11157
11233
|
autogen: _foreach_round.out
|
11158
11234
|
|
@@ -11160,14 +11236,14 @@
|
|
11160
11236
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11161
11237
|
variants: function
|
11162
11238
|
dispatch:
|
11163
|
-
|
11239
|
+
CompositeExplicitAutograd: foreach_tensor_sigmoid_slow
|
11164
11240
|
CUDA: foreach_tensor_sigmoid_cuda
|
11165
11241
|
|
11166
11242
|
- func: _foreach_sigmoid_(Tensor(a!)[] self) -> ()
|
11167
11243
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11168
11244
|
variants: function
|
11169
11245
|
dispatch:
|
11170
|
-
|
11246
|
+
CompositeExplicitAutograd: foreach_tensor_sigmoid_slow_
|
11171
11247
|
CUDA: foreach_tensor_sigmoid_cuda_
|
11172
11248
|
autogen: _foreach_sigmoid.out
|
11173
11249
|
|
@@ -11175,14 +11251,14 @@
|
|
11175
11251
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11176
11252
|
variants: function
|
11177
11253
|
dispatch:
|
11178
|
-
|
11254
|
+
CompositeExplicitAutograd: foreach_tensor_sign_slow
|
11179
11255
|
CUDA: foreach_tensor_sign_cuda
|
11180
11256
|
|
11181
11257
|
- func: _foreach_sign_(Tensor(a!)[] self) -> ()
|
11182
11258
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11183
11259
|
variants: function
|
11184
11260
|
dispatch:
|
11185
|
-
|
11261
|
+
CompositeExplicitAutograd: foreach_tensor_sign_slow_
|
11186
11262
|
CUDA: foreach_tensor_sign_cuda_
|
11187
11263
|
autogen: _foreach_sign.out
|
11188
11264
|
|
@@ -11190,14 +11266,14 @@
|
|
11190
11266
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11191
11267
|
variants: function
|
11192
11268
|
dispatch:
|
11193
|
-
|
11269
|
+
CompositeExplicitAutograd: foreach_tensor_sin_slow
|
11194
11270
|
CUDA: foreach_tensor_sin_cuda
|
11195
11271
|
|
11196
11272
|
- func: _foreach_sin_(Tensor(a!)[] self) -> ()
|
11197
11273
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11198
11274
|
variants: function
|
11199
11275
|
dispatch:
|
11200
|
-
|
11276
|
+
CompositeExplicitAutograd: foreach_tensor_sin_slow_
|
11201
11277
|
CUDA: foreach_tensor_sin_cuda_
|
11202
11278
|
autogen: _foreach_sin.out
|
11203
11279
|
|
@@ -11205,14 +11281,14 @@
|
|
11205
11281
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11206
11282
|
variants: function
|
11207
11283
|
dispatch:
|
11208
|
-
|
11284
|
+
CompositeExplicitAutograd: foreach_tensor_sinh_slow
|
11209
11285
|
CUDA: foreach_tensor_sinh_cuda
|
11210
11286
|
|
11211
11287
|
- func: _foreach_sinh_(Tensor(a!)[] self) -> ()
|
11212
11288
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11213
11289
|
variants: function
|
11214
11290
|
dispatch:
|
11215
|
-
|
11291
|
+
CompositeExplicitAutograd: foreach_tensor_sinh_slow_
|
11216
11292
|
CUDA: foreach_tensor_sinh_cuda_
|
11217
11293
|
autogen: _foreach_sinh.out
|
11218
11294
|
|
@@ -11220,14 +11296,14 @@
|
|
11220
11296
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11221
11297
|
variants: function
|
11222
11298
|
dispatch:
|
11223
|
-
|
11299
|
+
CompositeExplicitAutograd: foreach_tensor_sqrt_slow
|
11224
11300
|
CUDA: foreach_tensor_sqrt_cuda
|
11225
11301
|
|
11226
11302
|
- func: _foreach_sqrt_(Tensor(a!)[] self) -> ()
|
11227
11303
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11228
11304
|
variants: function
|
11229
11305
|
dispatch:
|
11230
|
-
|
11306
|
+
CompositeExplicitAutograd: foreach_tensor_sqrt_slow_
|
11231
11307
|
CUDA: foreach_tensor_sqrt_cuda_
|
11232
11308
|
autogen: _foreach_sqrt.out
|
11233
11309
|
|
@@ -11235,14 +11311,14 @@
|
|
11235
11311
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11236
11312
|
variants: function
|
11237
11313
|
dispatch:
|
11238
|
-
|
11314
|
+
CompositeExplicitAutograd: foreach_tensor_tan_slow
|
11239
11315
|
CUDA: foreach_tensor_tan_cuda
|
11240
11316
|
|
11241
11317
|
- func: _foreach_tan_(Tensor(a!)[] self) -> ()
|
11242
11318
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11243
11319
|
variants: function
|
11244
11320
|
dispatch:
|
11245
|
-
|
11321
|
+
CompositeExplicitAutograd: foreach_tensor_tan_slow_
|
11246
11322
|
CUDA: foreach_tensor_tan_cuda_
|
11247
11323
|
autogen: _foreach_tan.out
|
11248
11324
|
|
@@ -11250,14 +11326,14 @@
|
|
11250
11326
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11251
11327
|
variants: function
|
11252
11328
|
dispatch:
|
11253
|
-
|
11329
|
+
CompositeExplicitAutograd: foreach_tensor_tanh_slow
|
11254
11330
|
CUDA: foreach_tensor_tanh_cuda
|
11255
11331
|
|
11256
11332
|
- func: _foreach_tanh_(Tensor(a!)[] self) -> ()
|
11257
11333
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11258
11334
|
variants: function
|
11259
11335
|
dispatch:
|
11260
|
-
|
11336
|
+
CompositeExplicitAutograd: foreach_tensor_tanh_slow_
|
11261
11337
|
CUDA: foreach_tensor_tanh_cuda_
|
11262
11338
|
autogen: _foreach_tanh.out
|
11263
11339
|
|
@@ -11265,14 +11341,14 @@
|
|
11265
11341
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11266
11342
|
variants: function
|
11267
11343
|
dispatch:
|
11268
|
-
|
11344
|
+
CompositeExplicitAutograd: foreach_tensor_trunc_slow
|
11269
11345
|
CUDA: foreach_tensor_trunc_cuda
|
11270
11346
|
|
11271
11347
|
- func: _foreach_trunc_(Tensor(a!)[] self) -> ()
|
11272
11348
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11273
11349
|
variants: function
|
11274
11350
|
dispatch:
|
11275
|
-
|
11351
|
+
CompositeExplicitAutograd: foreach_tensor_trunc_slow_
|
11276
11352
|
CUDA: foreach_tensor_trunc_cuda_
|
11277
11353
|
autogen: _foreach_trunc.out
|
11278
11354
|
|
@@ -11280,7 +11356,7 @@
|
|
11280
11356
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11281
11357
|
variants: function
|
11282
11358
|
dispatch:
|
11283
|
-
|
11359
|
+
CompositeExplicitAutograd: foreach_tensor_zero_slow_
|
11284
11360
|
CUDA: foreach_tensor_zero_cuda_
|
11285
11361
|
autogen: _foreach_zero, _foreach_zero.out
|
11286
11362
|
|
@@ -11288,9 +11364,15 @@
|
|
11288
11364
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11289
11365
|
variants: function
|
11290
11366
|
dispatch:
|
11291
|
-
|
11367
|
+
CompositeExplicitAutograd: foreach_tensor_copy_list_kernel_slow_
|
11292
11368
|
CUDA: foreach_tensor_copy_list_kernel_cuda_
|
11293
|
-
autogen: _foreach_copy
|
11369
|
+
autogen: _foreach_copy.out
|
11370
|
+
|
11371
|
+
- func: _foreach_copy(Tensor[] self, Tensor[] src, bool non_blocking=False) -> Tensor[] self_out
|
11372
|
+
device_check: NoCheck
|
11373
|
+
variants: function
|
11374
|
+
dispatch:
|
11375
|
+
CompositeExplicitAutograd: _foreach_copy
|
11294
11376
|
|
11295
11377
|
- func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
|
11296
11378
|
dispatch:
|
@@ -14562,6 +14644,16 @@
|
|
14562
14644
|
NestedTensorCUDA: NestedTensor_to_padded_tensor_cuda
|
14563
14645
|
autogen: to_padded_tensor.out
|
14564
14646
|
|
14647
|
+
- func: _jagged_to_padded_dense_forward(Tensor values, Tensor[] offsets, SymInt[] max_lengths, float padding_value=0.0) -> Tensor
|
14648
|
+
variants: function
|
14649
|
+
dispatch:
|
14650
|
+
CUDA: _fbgemm_jagged_to_padded_dense_forward
|
14651
|
+
|
14652
|
+
- func: _padded_dense_to_jagged_forward(Tensor dense, Tensor[] offsets, SymInt? total_L=None) -> Tensor
|
14653
|
+
variants: function
|
14654
|
+
dispatch:
|
14655
|
+
CUDA: _fbgemm_dense_to_jagged_forward_symint
|
14656
|
+
|
14565
14657
|
- func: _nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor
|
14566
14658
|
dispatch:
|
14567
14659
|
NestedTensorCPU: NestedTensor_softmax_dropout
|
@@ -14636,31 +14728,36 @@
|
|
14636
14728
|
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
|
14637
14729
|
tags: nondeterministic_seeded
|
14638
14730
|
|
14639
|
-
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
|
14731
|
+
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14640
14732
|
dispatch:
|
14641
14733
|
CUDA: _scaled_dot_product_cudnn_attention_cuda
|
14642
14734
|
tags: nondeterministic_seeded
|
14643
14735
|
|
14644
|
-
- func:
|
14736
|
+
- func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14737
|
+
dispatch:
|
14738
|
+
CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
|
14739
|
+
tags: nondeterministic_seeded
|
14740
|
+
|
14741
|
+
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14645
14742
|
variants: function
|
14646
14743
|
dispatch:
|
14647
14744
|
CUDA: _flash_attention_forward
|
14648
14745
|
tags: nondeterministic_seeded
|
14649
14746
|
|
14650
|
-
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14747
|
+
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)
|
14651
14748
|
device_check: NoCheck
|
14652
14749
|
variants: function
|
14653
14750
|
dispatch:
|
14654
14751
|
CUDA: _flash_attention_backward
|
14655
14752
|
|
14656
14753
|
# Returns output, logsumexp if compute_logsumexp
|
14657
|
-
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k,
|
14754
|
+
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt? max_seqlen_q, SymInt? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? seqlen_k=None, int? window_size=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
|
14658
14755
|
variants: function
|
14659
14756
|
dispatch:
|
14660
14757
|
CUDA: _efficient_attention_forward
|
14661
14758
|
tags: nondeterministic_seeded
|
14662
14759
|
|
14663
|
-
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
|
14760
|
+
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None, int? window_size=None, bool shared_storage_dqdkdv=False) -> (Tensor, Tensor, Tensor, Tensor)
|
14664
14761
|
device_check: NoCheck
|
14665
14762
|
variants: function
|
14666
14763
|
dispatch:
|
@@ -15460,11 +15557,11 @@
|
|
15460
15557
|
CPU: foobar
|
15461
15558
|
autogen: _foobar.out
|
15462
15559
|
|
15463
|
-
# Fused Optimizer CUDA kernels.
|
15464
15560
|
- func: _fused_adam_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15465
15561
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15466
15562
|
variants: function
|
15467
15563
|
dispatch:
|
15564
|
+
CPU: _fused_adam_kernel_cpu_
|
15468
15565
|
CUDA: _fused_adam_kernel_cuda_
|
15469
15566
|
autogen: _fused_adam, _fused_adam.out
|
15470
15567
|
|
@@ -15474,6 +15571,7 @@
|
|
15474
15571
|
device_check: NoCheck
|
15475
15572
|
variants: function
|
15476
15573
|
dispatch:
|
15574
|
+
CPU: _fused_adam_kernel_cpu_
|
15477
15575
|
CUDA: _fused_adam_kernel_cuda_
|
15478
15576
|
autogen: _fused_adam.tensor_lr, _fused_adam.tensor_lr_out
|
15479
15577
|
|
@@ -15481,6 +15579,7 @@
|
|
15481
15579
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15482
15580
|
variants: function
|
15483
15581
|
dispatch:
|
15582
|
+
CPU: _fused_adamw_kernel_cpu_
|
15484
15583
|
CUDA: _fused_adamw_kernel_cuda_
|
15485
15584
|
autogen: _fused_adamw, _fused_adamw.out
|
15486
15585
|
|
@@ -15490,6 +15589,7 @@
|
|
15490
15589
|
device_check: NoCheck
|
15491
15590
|
variants: function
|
15492
15591
|
dispatch:
|
15592
|
+
CPU: _fused_adamw_kernel_cpu_
|
15493
15593
|
CUDA: _fused_adamw_kernel_cuda_
|
15494
15594
|
autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
|
15495
15595
|
|
@@ -15497,6 +15597,7 @@
|
|
15497
15597
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15498
15598
|
variants: function
|
15499
15599
|
dispatch:
|
15600
|
+
CPU: _fused_sgd_kernel_cpu_
|
15500
15601
|
CUDA: _fused_sgd_kernel_cuda_
|
15501
15602
|
autogen: _fused_sgd, _fused_sgd.out
|
15502
15603
|
|
@@ -15506,9 +15607,16 @@
|
|
15506
15607
|
device_check: NoCheck
|
15507
15608
|
variants: function
|
15508
15609
|
dispatch:
|
15610
|
+
CPU: _fused_sgd_kernel_cpu_
|
15509
15611
|
CUDA: _fused_sgd_kernel_cuda_
|
15510
15612
|
autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
|
15511
15613
|
|
15614
|
+
- func: _fused_adagrad_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor(d!)[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15615
|
+
variants: function
|
15616
|
+
dispatch:
|
15617
|
+
CPU: _fused_adagrad_kernel_cpu_
|
15618
|
+
autogen: _fused_adagrad, _fused_adagrad.out
|
15619
|
+
|
15512
15620
|
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15513
15621
|
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15514
15622
|
variants: function
|