torch-rb 0.15.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +3 -1
- data/codegen/generate_functions.rb +6 -6
- data/codegen/native_functions.yaml +462 -190
- data/ext/torch/utils.h +1 -1
- data/lib/torch/nn/elu.rb +20 -0
- data/lib/torch/nn/functional.rb +23 -1
- data/lib/torch/nn/functional_attention.rb +5 -5
- data/lib/torch/nn/gelu.rb +18 -0
- data/lib/torch/nn/leaky_relu.rb +1 -1
- data/lib/torch/tensor.rb +3 -1
- data/lib/torch/version.rb +1 -1
- data/lib/torch.rb +2 -0
- metadata +6 -4
@@ -134,7 +134,7 @@
|
|
134
134
|
autogen: _new_zeros_with_same_feature_meta.out
|
135
135
|
|
136
136
|
# This function compares the storage numel of self with that of other, where
|
137
|
-
# storage numel is
|
137
|
+
# storage numel is computed as: `other.storage().nbytes() / other.itemsize()`.
|
138
138
|
# We create this function for composite compliance purposes. The batching rule
|
139
139
|
# always returns true because vmapped as_strided does not support accessing
|
140
140
|
# storage locations not indexable by the input tensor.
|
@@ -175,12 +175,24 @@
|
|
175
175
|
CPU: _assert_async_msg_cpu
|
176
176
|
CUDA: _assert_async_msg_cuda
|
177
177
|
|
178
|
+
- func: _assert_scalar(Scalar self, str assert_msg) -> ()
|
179
|
+
dispatch:
|
180
|
+
CompositeExplicitAutograd: _assert_scalar
|
181
|
+
|
182
|
+
- func: _functional_assert_scalar(Scalar self, str assert_msg, Tensor dep_token) -> Tensor
|
183
|
+
dispatch:
|
184
|
+
CompositeExplicitAutograd: _functional_assert_scalar
|
185
|
+
|
178
186
|
- func: _functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor
|
179
187
|
dispatch:
|
180
188
|
CPU: _functional_assert_async_msg_cpu
|
181
189
|
|
182
190
|
- func: _assert_tensor_metadata(Tensor a, SymInt[]? size=None, SymInt[]? stride=None, ScalarType? dtype=None) -> ()
|
183
191
|
|
192
|
+
- func: _print(str s) -> ()
|
193
|
+
dispatch:
|
194
|
+
CompositeExplicitAutograd: _print
|
195
|
+
|
184
196
|
- func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
|
185
197
|
dispatch:
|
186
198
|
CompositeExplicitAutograd: sym_constrain_range
|
@@ -470,6 +482,7 @@
|
|
470
482
|
- func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
471
483
|
dispatch:
|
472
484
|
CPU, CUDA: conj_physical_out
|
485
|
+
MPS: conj_physical_out_mps
|
473
486
|
SparseCPU, SparseCUDA: conj_physical_out_sparse
|
474
487
|
SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
|
475
488
|
tags: pointwise
|
@@ -536,8 +549,8 @@
|
|
536
549
|
structured_delegate: add.out
|
537
550
|
variants: function, method
|
538
551
|
dispatch:
|
539
|
-
SparseCPU, SparseCUDA: add_sparse
|
540
|
-
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr
|
552
|
+
SparseCPU, SparseCUDA, SparseMeta: add_sparse
|
553
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr
|
541
554
|
MkldnnCPU: mkldnn_add
|
542
555
|
ZeroTensor: add_zerotensor
|
543
556
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add_Tensor
|
@@ -548,8 +561,8 @@
|
|
548
561
|
variants: method
|
549
562
|
structured_delegate: add.out
|
550
563
|
dispatch:
|
551
|
-
SparseCPU, SparseCUDA: add_sparse_
|
552
|
-
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr_
|
564
|
+
SparseCPU, SparseCUDA, SparseMeta: add_sparse_
|
565
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: add_sparse_csr_
|
553
566
|
MkldnnCPU: mkldnn_add_
|
554
567
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_add__Tensor
|
555
568
|
tags: pointwise
|
@@ -562,10 +575,10 @@
|
|
562
575
|
Generic: add (AllAndComplex, BFloat16, Half, ComplexHalf)
|
563
576
|
ScalarOnly: add (Bool)
|
564
577
|
dispatch:
|
565
|
-
SparseCPU: add_out_sparse_cpu
|
578
|
+
SparseCPU, SparseMeta: add_out_sparse_cpu
|
566
579
|
SparseCUDA: add_out_sparse_cuda
|
567
|
-
SparseCsrCPU:
|
568
|
-
SparseCsrCUDA:
|
580
|
+
SparseCsrCPU, SparseCsrMeta: add_out_sparse_compressed_cpu
|
581
|
+
SparseCsrCUDA: add_out_sparse_compressed_cuda
|
569
582
|
MkldnnCPU: mkldnn_add_out
|
570
583
|
MPS: add_out_mps
|
571
584
|
tags: pointwise
|
@@ -763,7 +776,7 @@
|
|
763
776
|
dispatch:
|
764
777
|
CompositeExplicitAutograd: arange
|
765
778
|
|
766
|
-
# This operator should be named `
|
779
|
+
# This operator should be named `arange.start_out` if following the naming convention. However that
|
767
780
|
# name is already taken. Disabled because of CI job failures.
|
768
781
|
# FIXME: enable this
|
769
782
|
#- func: arange.start_out_(Scalar start, Scalar end, *, Tensor(a!) out) -> Tensor(a!)
|
@@ -1220,6 +1233,13 @@
|
|
1220
1233
|
CompositeExplicitAutograd: copysign_out
|
1221
1234
|
tags: pointwise
|
1222
1235
|
|
1236
|
+
- func: _lazy_clone(Tensor self) -> Tensor
|
1237
|
+
# Like clone, but the copy takes place lazily, only if either the
|
1238
|
+
# input or the output are written.
|
1239
|
+
variants: function, method
|
1240
|
+
dispatch:
|
1241
|
+
CompositeExplicitAutograd: _lazy_clone
|
1242
|
+
|
1223
1243
|
- func: logical_not(Tensor self) -> Tensor
|
1224
1244
|
device_check: NoCheck # TensorIterator
|
1225
1245
|
variants: function, method
|
@@ -1621,6 +1641,7 @@
|
|
1621
1641
|
- func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
|
1622
1642
|
dispatch:
|
1623
1643
|
CPU, CUDA: complex_out
|
1644
|
+
MPS: complex_out_mps
|
1624
1645
|
|
1625
1646
|
- func: polar(Tensor abs, Tensor angle) -> Tensor
|
1626
1647
|
variants: function
|
@@ -1729,6 +1750,7 @@
|
|
1729
1750
|
- func: copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor
|
1730
1751
|
variants: function
|
1731
1752
|
dispatch:
|
1753
|
+
Meta: copy_meta
|
1732
1754
|
CompositeExplicitAutogradNonFunctional: copy
|
1733
1755
|
tags: core
|
1734
1756
|
|
@@ -1847,7 +1869,10 @@
|
|
1847
1869
|
- func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1848
1870
|
dispatch:
|
1849
1871
|
CUDA: cudnn_convolution
|
1850
|
-
|
1872
|
+
|
1873
|
+
- func: cudnn_convolution.out(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32, *, Tensor(a!) out) -> Tensor(a!)
|
1874
|
+
dispatch:
|
1875
|
+
CUDA: cudnn_convolution_out
|
1851
1876
|
|
1852
1877
|
- func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
|
1853
1878
|
dispatch:
|
@@ -2346,7 +2371,7 @@
|
|
2346
2371
|
Meta: empty_meta_symint
|
2347
2372
|
MkldnnCPU: empty_mkldnn
|
2348
2373
|
SparseCPU, SparseCUDA, SparseMeta: empty_sparse
|
2349
|
-
SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
|
2374
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
|
2350
2375
|
QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
|
2351
2376
|
tags: core
|
2352
2377
|
|
@@ -2452,7 +2477,7 @@
|
|
2452
2477
|
CompositeExplicitAutograd: empty_like
|
2453
2478
|
QuantizedCPU, QuantizedCUDA: empty_like_quantized
|
2454
2479
|
SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo
|
2455
|
-
SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
|
2480
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr
|
2456
2481
|
NestedTensorCPU, NestedTensorCUDA: empty_like_nested
|
2457
2482
|
autogen: empty_like.out
|
2458
2483
|
|
@@ -2954,12 +2979,14 @@
|
|
2954
2979
|
dispatch:
|
2955
2980
|
CPU: _fft_r2c_mkl
|
2956
2981
|
CUDA: _fft_r2c_cufft
|
2982
|
+
MPS: _fft_r2c_mps
|
2957
2983
|
|
2958
2984
|
- func: _fft_r2c.out(Tensor self, int[] dim, int normalization, bool onesided, *, Tensor(a!) out) -> Tensor(a!)
|
2959
2985
|
variants: function
|
2960
2986
|
dispatch:
|
2961
2987
|
CPU: _fft_r2c_mkl_out
|
2962
2988
|
CUDA: _fft_r2c_cufft_out
|
2989
|
+
MPS: _fft_r2c_mps_out
|
2963
2990
|
|
2964
2991
|
# Complex to real inverse FFT
|
2965
2992
|
- func: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
|
@@ -2967,12 +2994,14 @@
|
|
2967
2994
|
dispatch:
|
2968
2995
|
CPU: _fft_c2r_mkl
|
2969
2996
|
CUDA: _fft_c2r_cufft
|
2997
|
+
MPS: _fft_c2r_mps
|
2970
2998
|
|
2971
2999
|
- func: _fft_c2r.out(Tensor self, int[] dim, int normalization, SymInt last_dim_size, *, Tensor(a!) out) -> Tensor(a!)
|
2972
3000
|
variants: function
|
2973
3001
|
dispatch:
|
2974
3002
|
CPU: _fft_c2r_mkl_out
|
2975
3003
|
CUDA: _fft_c2r_cufft_out
|
3004
|
+
MPS: _fft_c2r_mps_out
|
2976
3005
|
|
2977
3006
|
# Standard complex to complex FFT (forward or backward)
|
2978
3007
|
- func: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
|
@@ -2980,12 +3009,14 @@
|
|
2980
3009
|
dispatch:
|
2981
3010
|
CPU: _fft_c2c_mkl
|
2982
3011
|
CUDA: _fft_c2c_cufft
|
3012
|
+
MPS: _fft_c2c_mps
|
2983
3013
|
|
2984
3014
|
- func: _fft_c2c.out(Tensor self, SymInt[] dim, int normalization, bool forward, *, Tensor(a!) out) -> Tensor(a!)
|
2985
3015
|
variants: function
|
2986
3016
|
dispatch:
|
2987
3017
|
CPU: _fft_c2c_mkl_out
|
2988
3018
|
CUDA: _fft_c2c_cufft_out
|
3019
|
+
MPS: _fft_c2c_mps_out
|
2989
3020
|
|
2990
3021
|
- func: _validate_compressed_sparse_indices(bool is_crow, Tensor compressed_idx, Tensor plain_idx, int cdim, int dim, int nnz) -> ()
|
2991
3022
|
device_check: NoCheck
|
@@ -3097,6 +3128,7 @@
|
|
3097
3128
|
structured: True
|
3098
3129
|
dispatch:
|
3099
3130
|
CPU, CUDA: isin_Tensor_Tensor_out
|
3131
|
+
MPS: isin_Tensor_Tensor_out_mps
|
3100
3132
|
|
3101
3133
|
- func: isin.Tensor_Tensor(Tensor elements, Tensor test_elements, *, bool assume_unique=False, bool invert=False) -> Tensor
|
3102
3134
|
variants: function
|
@@ -3238,6 +3270,8 @@
|
|
3238
3270
|
autogen: native_layer_norm_backward.out
|
3239
3271
|
tags: core
|
3240
3272
|
|
3273
|
+
- func: rms_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, float? eps=None) -> Tensor
|
3274
|
+
|
3241
3275
|
- func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
|
3242
3276
|
variants: function, method
|
3243
3277
|
dispatch:
|
@@ -3302,14 +3336,39 @@
|
|
3302
3336
|
dispatch:
|
3303
3337
|
CUDA: _cslt_compress
|
3304
3338
|
|
3305
|
-
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> Tensor
|
3339
|
+
- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False, int alg_id=0) -> Tensor
|
3306
3340
|
dispatch:
|
3307
3341
|
CUDA: _cslt_sparse_mm
|
3308
3342
|
|
3309
|
-
- func:
|
3343
|
+
- func: _cslt_sparse_mm_search(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> int
|
3344
|
+
dispatch:
|
3345
|
+
CUDA: _cslt_sparse_mm_search
|
3346
|
+
|
3347
|
+
- func: _sparse_semi_structured_tile(Tensor input, str algorithm="", bool use_cutlass=True) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
|
3348
|
+
dispatch:
|
3349
|
+
CUDA: _sparse_semi_structured_tile
|
3350
|
+
|
3351
|
+
- func: _sparse_semi_structured_apply(Tensor input, Tensor thread_masks) -> (Tensor, Tensor)
|
3352
|
+
dispatch:
|
3353
|
+
CUDA: _sparse_semi_structured_apply
|
3354
|
+
|
3355
|
+
- func: _sparse_semi_structured_apply_dense(Tensor input, Tensor thread_masks) -> Tensor
|
3356
|
+
dispatch:
|
3357
|
+
CUDA: _sparse_semi_structured_apply_dense
|
3358
|
+
|
3359
|
+
# DEPRECATED: Use torch.__sparse_semi_structured_mm/torch._sparse_semi_structured_addmm instead
|
3360
|
+
- func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None, ScalarType? out_dtype=None) -> Tensor
|
3310
3361
|
dispatch:
|
3311
3362
|
CUDA: _sparse_semi_structured_linear
|
3312
3363
|
|
3364
|
+
- func: _sparse_semi_structured_mm(Tensor mat1, Tensor mat1_meta, Tensor mat2, *, ScalarType? out_dtype=None) -> Tensor
|
3365
|
+
dispatch:
|
3366
|
+
CUDA: _sparse_semi_structured_mm
|
3367
|
+
|
3368
|
+
- func: _sparse_semi_structured_addmm(Tensor input, Tensor mat1, Tensor mat1_meta, Tensor mat2, *, Scalar alpha=1, Scalar beta=1, ScalarType? out_dtype=None) -> Tensor
|
3369
|
+
dispatch:
|
3370
|
+
CUDA: _sparse_semi_structured_addmm
|
3371
|
+
|
3313
3372
|
- func: _mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor
|
3314
3373
|
dispatch:
|
3315
3374
|
CUDA: _mixed_dtypes_linear
|
@@ -4050,20 +4109,30 @@
|
|
4050
4109
|
|
4051
4110
|
- func: _int_mm(Tensor self, Tensor mat2) -> Tensor
|
4052
4111
|
dispatch:
|
4112
|
+
CPU: _int_mm_cpu
|
4053
4113
|
CUDA: _int_mm_cuda
|
4054
4114
|
|
4055
4115
|
- func: _int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
|
4056
4116
|
dispatch:
|
4117
|
+
CPU: _int_mm_out_cpu
|
4057
4118
|
CUDA: _int_mm_out_cuda
|
4058
4119
|
|
4059
4120
|
- func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
|
4060
4121
|
dispatch:
|
4122
|
+
CPU: _convert_weight_to_int4pack_cpu
|
4061
4123
|
CUDA: _convert_weight_to_int4pack_cuda
|
4062
4124
|
|
4063
4125
|
- func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
|
4064
4126
|
dispatch:
|
4127
|
+
CPU: _weight_int4pack_mm_cpu
|
4128
|
+
MPS: _weight_int4pack_mm_mps
|
4065
4129
|
CUDA: _weight_int4pack_mm_cuda
|
4066
4130
|
|
4131
|
+
- func: _weight_int8pack_mm(Tensor self, Tensor mat2, Tensor scales) -> Tensor
|
4132
|
+
dispatch:
|
4133
|
+
CPU: _weight_int8pack_mm_cpu
|
4134
|
+
MPS: _weight_int8pack_mm_mps
|
4135
|
+
|
4067
4136
|
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
|
4068
4137
|
python_module: sparse
|
4069
4138
|
|
@@ -4439,7 +4508,6 @@
|
|
4439
4508
|
MPS: pixel_shuffle_mps
|
4440
4509
|
CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
|
4441
4510
|
autogen: pixel_shuffle.out
|
4442
|
-
tags: core
|
4443
4511
|
|
4444
4512
|
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
|
4445
4513
|
dispatch:
|
@@ -4810,7 +4878,7 @@
|
|
4810
4878
|
device_guard: False
|
4811
4879
|
dispatch:
|
4812
4880
|
CompositeImplicitAutograd: reshape_symint
|
4813
|
-
CompositeImplicitAutogradNestedTensor:
|
4881
|
+
CompositeImplicitAutogradNestedTensor: reshape_nested_symint
|
4814
4882
|
|
4815
4883
|
- func: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
|
4816
4884
|
variants: function
|
@@ -4969,6 +5037,7 @@
|
|
4969
5037
|
device_check: NoCheck # TensorIterator
|
4970
5038
|
python_module: nn
|
4971
5039
|
dispatch:
|
5040
|
+
QuantizedCPU: gelu_quantized_cpu_
|
4972
5041
|
NestedTensorCPU, NestedTensorCUDA: NestedTensor_gelu_
|
4973
5042
|
|
4974
5043
|
- func: gelu(Tensor self, *, str approximate='none') -> Tensor
|
@@ -5356,6 +5425,21 @@
|
|
5356
5425
|
CompositeExplicitAutograd: slice_backward
|
5357
5426
|
autogen: slice_backward.out
|
5358
5427
|
|
5428
|
+
# NB: This op exists to back the implementation of reverse view_funcs for various views (chunk,
|
5429
|
+
# slice.Tensor, split_with_sizes, et al.). Currently, these are only used during fake-ification
|
5430
|
+
# of PT2 graph input subclass instances that are views. This means:
|
5431
|
+
# * This op shouldn't really show up in eager mode (so e.g. XLA shouldn't have to implement it)
|
5432
|
+
# * This op shouldn't show up in a PT2 graph (so a PT2 backend shouldn't have to implement it)
|
5433
|
+
# * A subclass will have to implement this to work in PT2 if a subclass view is used as a graph
|
5434
|
+
# input AND the view utilizes this op in its inverse. The idea is that slice_inverse() is
|
5435
|
+
# easier to implement for a subclass than as_strided()
|
5436
|
+
- func: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
|
5437
|
+
variants: function, method
|
5438
|
+
device_check: NoCheck
|
5439
|
+
device_guard: False
|
5440
|
+
dispatch:
|
5441
|
+
CompositeExplicitAutograd: slice_inverse_symint
|
5442
|
+
|
5359
5443
|
- func: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
|
5360
5444
|
variants: function, method
|
5361
5445
|
device_check: NoCheck
|
@@ -5363,7 +5447,7 @@
|
|
5363
5447
|
dispatch:
|
5364
5448
|
CompositeExplicitAutogradNonFunctional: slice_scatter
|
5365
5449
|
autogen: slice_scatter.out
|
5366
|
-
tags: core
|
5450
|
+
tags: [core, view_copy]
|
5367
5451
|
|
5368
5452
|
- func: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
|
5369
5453
|
variants: function, method
|
@@ -5562,6 +5646,16 @@
|
|
5562
5646
|
SparseCPU: _sspaddmm_out_cpu
|
5563
5647
|
SparseCUDA: _sspaddmm_out_cuda
|
5564
5648
|
|
5649
|
+
- func: _chunk_cat(Tensor[] tensors, int dim, int num_chunks) -> Tensor
|
5650
|
+
dispatch:
|
5651
|
+
CompositeExplicitAutograd: _chunk_cat
|
5652
|
+
CUDA: _chunk_cat_cuda
|
5653
|
+
|
5654
|
+
- func: _chunk_cat.out(Tensor[] tensors, int dim, int num_chunks, *, Tensor(a!) out) -> Tensor(a!)
|
5655
|
+
dispatch:
|
5656
|
+
CompositeExplicitAutograd: _chunk_cat_out
|
5657
|
+
CUDA: _chunk_cat_out_cuda
|
5658
|
+
|
5565
5659
|
- func: stack(Tensor[] tensors, int dim=0) -> Tensor
|
5566
5660
|
dispatch:
|
5567
5661
|
CompositeExplicitAutograd: stack
|
@@ -5626,8 +5720,8 @@
|
|
5626
5720
|
variants: function, method
|
5627
5721
|
dispatch:
|
5628
5722
|
CompositeExplicitAutograd: sum
|
5629
|
-
SparseCPU, SparseCUDA: sum_coo
|
5630
|
-
SparseCsrCPU, SparseCsrCUDA: sum_csr
|
5723
|
+
SparseCPU, SparseCUDA, SparseMeta: sum_coo
|
5724
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_csr
|
5631
5725
|
autogen: sum.out
|
5632
5726
|
|
5633
5727
|
- func: sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
|
@@ -5753,6 +5847,7 @@
|
|
5753
5847
|
variants: function
|
5754
5848
|
dispatch:
|
5755
5849
|
CPU, CUDA: std_mean
|
5850
|
+
MPS: std_mean_mps
|
5756
5851
|
autogen: std_mean.correction_out
|
5757
5852
|
|
5758
5853
|
- func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -6008,7 +6103,6 @@
|
|
6008
6103
|
CPU, MPS: roll
|
6009
6104
|
CUDA: roll_cuda
|
6010
6105
|
autogen: roll.out
|
6011
|
-
tags: core
|
6012
6106
|
|
6013
6107
|
# default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
|
6014
6108
|
|
@@ -6091,6 +6185,58 @@
|
|
6091
6185
|
CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
|
6092
6186
|
autogen: _nested_view_from_buffer_copy.out
|
6093
6187
|
|
6188
|
+
- func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
|
6189
|
+
variants: function
|
6190
|
+
device_check: NoCheck
|
6191
|
+
dispatch: {}
|
6192
|
+
|
6193
|
+
- func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
|
6194
|
+
variants: function
|
6195
|
+
device_check: NoCheck
|
6196
|
+
tags: view_copy
|
6197
|
+
dispatch:
|
6198
|
+
CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
|
6199
|
+
autogen: _nested_view_from_jagged_copy.out
|
6200
|
+
|
6201
|
+
- func: _nested_get_values(Tensor(a) self) -> Tensor(a)
|
6202
|
+
variants: function
|
6203
|
+
device_check: NoCheck
|
6204
|
+
dispatch: {}
|
6205
|
+
|
6206
|
+
- func: _nested_get_values_copy(Tensor self) -> Tensor
|
6207
|
+
variants: function
|
6208
|
+
device_check: NoCheck
|
6209
|
+
tags: view_copy
|
6210
|
+
dispatch:
|
6211
|
+
CompositeExplicitAutogradNonFunctional: _nested_get_values_copy
|
6212
|
+
autogen: _nested_get_values_copy.out
|
6213
|
+
|
6214
|
+
- func: _nested_get_offsets(Tensor self) -> Tensor
|
6215
|
+
variants: function
|
6216
|
+
device_check: NoCheck
|
6217
|
+
dispatch: {}
|
6218
|
+
|
6219
|
+
# returns undefined Tensor if no lengths present
|
6220
|
+
- func: _nested_get_lengths(Tensor self) -> Tensor
|
6221
|
+
variants: function
|
6222
|
+
device_check: NoCheck
|
6223
|
+
dispatch: {}
|
6224
|
+
|
6225
|
+
- func: _nested_get_ragged_idx(Tensor self) -> int
|
6226
|
+
variants: function
|
6227
|
+
device_check: NoCheck
|
6228
|
+
dispatch: {}
|
6229
|
+
|
6230
|
+
- func: _nested_get_jagged_dummy(Tensor any) -> Tensor
|
6231
|
+
category_override: dummy
|
6232
|
+
dispatch: {}
|
6233
|
+
|
6234
|
+
- func: _nested_compute_contiguous_strides_offsets(Tensor nested_size) -> (Tensor, Tensor)
|
6235
|
+
variants: function
|
6236
|
+
device_check: NoCheck
|
6237
|
+
dispatch:
|
6238
|
+
CPU, CUDA: _nested_compute_contiguous_strides_offsets
|
6239
|
+
|
6094
6240
|
- func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
|
6095
6241
|
dispatch:
|
6096
6242
|
# calls unsqueeze
|
@@ -6275,6 +6421,7 @@
|
|
6275
6421
|
variants: function
|
6276
6422
|
dispatch:
|
6277
6423
|
CPU, CUDA: var_mean
|
6424
|
+
MPS: var_mean_mps
|
6278
6425
|
autogen: var_mean.correction_out
|
6279
6426
|
|
6280
6427
|
- func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
|
@@ -6295,15 +6442,13 @@
|
|
6295
6442
|
device_check: NoCheck # TensorIterator
|
6296
6443
|
variants: function, method
|
6297
6444
|
dispatch:
|
6298
|
-
CPU, CUDA: where
|
6299
|
-
MPS: where_mps
|
6445
|
+
CPU, CUDA, MPS: where
|
6300
6446
|
tags: [core, pointwise]
|
6301
6447
|
|
6302
6448
|
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
6303
6449
|
device_check: NoCheck # TensorIterator
|
6304
6450
|
dispatch:
|
6305
|
-
CPU, CUDA: where_self_out
|
6306
|
-
MPS: where_self_out_mps
|
6451
|
+
CPU, CUDA, MPS: where_self_out
|
6307
6452
|
|
6308
6453
|
- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
|
6309
6454
|
variants: function
|
@@ -6357,7 +6502,7 @@
|
|
6357
6502
|
CPU: _efficientzerotensor
|
6358
6503
|
CUDA: _efficientzerotensor_cuda
|
6359
6504
|
MPS: _efficientzerotensor_mps
|
6360
|
-
Meta:
|
6505
|
+
Meta: _efficientzerotensor_meta_symint
|
6361
6506
|
autogen: _efficientzerotensor.out
|
6362
6507
|
|
6363
6508
|
- func: zeros(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -6434,6 +6579,32 @@
|
|
6434
6579
|
SparseCPU, SparseCUDA: norm_sparse
|
6435
6580
|
autogen: native_norm.ScalarOpt_dim_dtype_out
|
6436
6581
|
|
6582
|
+
- func: _batch_norm_with_update(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor)
|
6583
|
+
dispatch:
|
6584
|
+
CPU: _batch_norm_with_update_cpu
|
6585
|
+
CUDA: _batch_norm_with_update_cuda
|
6586
|
+
MPS: _batch_norm_with_update_mps
|
6587
|
+
MkldnnCPU: _batch_norm_with_update_mkldnn
|
6588
|
+
autogen: _batch_norm_with_update_functional
|
6589
|
+
|
6590
|
+
- func: _batch_norm_with_update.out(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, float momentum, float eps, *, Tensor(d!) out, Tensor(e!) save_mean, Tensor(f!) save_invstd, Tensor(g!) reserve) -> (Tensor(d!), Tensor(e!), Tensor(f!), Tensor(g!))
|
6591
|
+
dispatch:
|
6592
|
+
CPU: _batch_norm_with_update_cpu_out
|
6593
|
+
CUDA: _batch_norm_with_update_cuda_out
|
6594
|
+
MPS: _batch_norm_with_update_mps_out
|
6595
|
+
|
6596
|
+
- func: _batch_norm_no_update(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor, Tensor)
|
6597
|
+
dispatch:
|
6598
|
+
CompositeExplicitAutograd: _batch_norm_no_update
|
6599
|
+
autogen: _batch_norm_no_update.out
|
6600
|
+
|
6601
|
+
- func: batch_norm_backward(Tensor grad_out, Tensor input, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, bool update, float eps, bool[3] output_mask, Tensor reserve) -> (Tensor, Tensor, Tensor)
|
6602
|
+
dispatch:
|
6603
|
+
CPU: _new_batch_norm_backward_cpu
|
6604
|
+
CUDA: _new_batch_norm_backward_cuda
|
6605
|
+
MPS: _new_batch_norm_backward_mps
|
6606
|
+
MkldnnCPU: _new_batch_norm_backward_mkldnn
|
6607
|
+
|
6437
6608
|
# TODO: reduce signatures down to one when optional args is available
|
6438
6609
|
- func: _sparse_sum(Tensor self) -> Tensor
|
6439
6610
|
|
@@ -6644,7 +6815,7 @@
|
|
6644
6815
|
MPS: zero_mps_
|
6645
6816
|
Meta: zero_meta_
|
6646
6817
|
SparseCPU, SparseCUDA, SparseMeta: zero_sparse_
|
6647
|
-
SparseCsrCPU, SparseCsrCUDA: zero_sparse_csr_
|
6818
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: zero_sparse_csr_
|
6648
6819
|
MkldnnCPU: mkldnn_zero_
|
6649
6820
|
NestedTensorCPU, NestedTensorCUDA: zero_nested_
|
6650
6821
|
autogen: zero, zero.out
|
@@ -6934,7 +7105,11 @@
|
|
6934
7105
|
# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
|
6935
7106
|
# the default would never make sense.
|
6936
7107
|
|
6937
|
-
- func:
|
7108
|
+
- func: _sparse_compressed_tensor_with_dims(int nnz, int dense_dim, int[] size, int[] blocksize, ScalarType index_dtype, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
7109
|
+
dispatch:
|
7110
|
+
CompositeExplicitAutograd: sparse_compressed_tensor_with_dims
|
7111
|
+
|
7112
|
+
- func: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6938
7113
|
dispatch:
|
6939
7114
|
CompositeExplicitAutograd: sparse_compressed_tensor
|
6940
7115
|
|
@@ -6951,7 +7126,10 @@
|
|
6951
7126
|
- func: sparse_bsr_tensor.crow_col_value(Tensor crow_indices, Tensor col_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6952
7127
|
- func: sparse_bsc_tensor.ccol_row_value(Tensor ccol_indices, Tensor row_indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
|
6953
7128
|
|
6954
|
-
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values,
|
7129
|
+
- func: _sparse_compressed_tensor_unsafe(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
7130
|
+
dispatch:
|
7131
|
+
CompositeImplicitAutograd: _sparse_compressed_tensor_unsafe_symint
|
7132
|
+
|
6955
7133
|
- func: _sparse_csr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6956
7134
|
- func: _sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
6957
7135
|
- func: _sparse_bsr_tensor_unsafe(Tensor crow_indices, Tensor col_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
|
@@ -7035,9 +7213,9 @@
|
|
7035
7213
|
- func: sparse_dim(Tensor self) -> int
|
7036
7214
|
variants: method
|
7037
7215
|
dispatch:
|
7038
|
-
CPU, CUDA: sparse_dim_strided
|
7039
7216
|
SparseCPU, SparseCUDA, SparseMeta: sparse_dim_sparse
|
7040
|
-
SparseCsrCPU, SparseCsrCUDA: sparse_dim_sparse_csr
|
7217
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_dim_sparse_csr
|
7218
|
+
CompositeExplicitAutograd: sparse_dim_default
|
7041
7219
|
device_check: NoCheck
|
7042
7220
|
device_guard: False
|
7043
7221
|
|
@@ -7052,9 +7230,9 @@
|
|
7052
7230
|
- func: dense_dim(Tensor self) -> int
|
7053
7231
|
variants: method
|
7054
7232
|
dispatch:
|
7055
|
-
CPU, CUDA: dense_dim_strided
|
7056
7233
|
SparseCPU, SparseCUDA, SparseMeta: dense_dim_sparse
|
7057
|
-
SparseCsrCPU, SparseCsrCUDA: dense_dim_sparse_csr
|
7234
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: dense_dim_sparse_csr
|
7235
|
+
CompositeExplicitAutograd: dense_dim_default
|
7058
7236
|
device_check: NoCheck
|
7059
7237
|
device_guard: False
|
7060
7238
|
|
@@ -7070,7 +7248,7 @@
|
|
7070
7248
|
variants: method
|
7071
7249
|
dispatch:
|
7072
7250
|
SparseCPU, SparseCUDA, SparseMeta: _nnz_sparse
|
7073
|
-
SparseCsrCPU, SparseCsrCUDA: _nnz_sparse_csr
|
7251
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _nnz_sparse_csr
|
7074
7252
|
device_check: NoCheck
|
7075
7253
|
device_guard: False
|
7076
7254
|
|
@@ -7133,7 +7311,7 @@
|
|
7133
7311
|
variants: method
|
7134
7312
|
dispatch:
|
7135
7313
|
SparseCPU, SparseCUDA, SparseMeta: values_sparse
|
7136
|
-
SparseCsrCPU, SparseCsrCUDA: values_sparse_csr
|
7314
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: values_sparse_csr
|
7137
7315
|
NestedTensorCPU, NestedTensorCUDA: values_nested
|
7138
7316
|
CompositeExplicitAutograd: values_default
|
7139
7317
|
device_check: NoCheck
|
@@ -7142,7 +7320,7 @@
|
|
7142
7320
|
- func: crow_indices(Tensor(a) self) -> Tensor(a)
|
7143
7321
|
variants: method
|
7144
7322
|
dispatch:
|
7145
|
-
SparseCsrCPU, SparseCsrCUDA: crow_indices_sparse_csr
|
7323
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: crow_indices_sparse_csr
|
7146
7324
|
CompositeExplicitAutograd: crow_indices_default
|
7147
7325
|
device_check: NoCheck
|
7148
7326
|
device_guard: False
|
@@ -7150,7 +7328,7 @@
|
|
7150
7328
|
- func: col_indices(Tensor(a) self) -> Tensor(a)
|
7151
7329
|
variants: method
|
7152
7330
|
dispatch:
|
7153
|
-
SparseCsrCPU, SparseCsrCUDA: col_indices_sparse_csr
|
7331
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: col_indices_sparse_csr
|
7154
7332
|
CompositeExplicitAutograd: col_indices_default
|
7155
7333
|
device_check: NoCheck
|
7156
7334
|
device_guard: False
|
@@ -7158,7 +7336,7 @@
|
|
7158
7336
|
- func: ccol_indices(Tensor(a) self) -> Tensor(a)
|
7159
7337
|
variants: method
|
7160
7338
|
dispatch:
|
7161
|
-
SparseCsrCPU, SparseCsrCUDA: ccol_indices_sparse_csr
|
7339
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ccol_indices_sparse_csr
|
7162
7340
|
CompositeExplicitAutograd: ccol_indices_default
|
7163
7341
|
device_check: NoCheck
|
7164
7342
|
device_guard: False
|
@@ -7166,7 +7344,7 @@
|
|
7166
7344
|
- func: row_indices(Tensor(a) self) -> Tensor(a)
|
7167
7345
|
variants: method
|
7168
7346
|
dispatch:
|
7169
|
-
SparseCsrCPU, SparseCsrCUDA: row_indices_sparse_csr
|
7347
|
+
SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: row_indices_sparse_csr
|
7170
7348
|
CompositeExplicitAutograd: row_indices_default
|
7171
7349
|
device_check: NoCheck
|
7172
7350
|
device_guard: False
|
@@ -7185,7 +7363,7 @@
|
|
7185
7363
|
device_check: NoCheck # Allows copy into different device
|
7186
7364
|
variants: function
|
7187
7365
|
dispatch:
|
7188
|
-
SparseCPU, SparseCUDA: copy_sparse_
|
7366
|
+
SparseCPU, SparseCUDA, SparseMeta: copy_sparse_
|
7189
7367
|
autogen: copy_sparse_to_sparse, copy_sparse_to_sparse.out
|
7190
7368
|
|
7191
7369
|
# By adding the AutogradNestedTensor this makes this function CompositeImplicit-like for nested tensors
|
@@ -7288,7 +7466,7 @@
|
|
7288
7466
|
MkldnnCPU: mkldnn_reorder_conv2d_weight
|
7289
7467
|
autogen: mkldnn_reorder_conv2d_weight.out
|
7290
7468
|
|
7291
|
-
- func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
|
7469
|
+
- func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor
|
7292
7470
|
variants: function
|
7293
7471
|
python_module: nn
|
7294
7472
|
dispatch:
|
@@ -7536,7 +7714,7 @@
|
|
7536
7714
|
|
7537
7715
|
- func: result_type.Scalar_Scalar(Scalar scalar1, Scalar scalar2) -> ScalarType
|
7538
7716
|
|
7539
|
-
- func: can_cast(ScalarType
|
7717
|
+
- func: can_cast(ScalarType from_, ScalarType to) -> bool
|
7540
7718
|
variants: function
|
7541
7719
|
|
7542
7720
|
- func: promote_types(ScalarType type1, ScalarType type2) -> ScalarType
|
@@ -7675,6 +7853,7 @@
|
|
7675
7853
|
dispatch:
|
7676
7854
|
CPU, CUDA, Meta, MPS: set_
|
7677
7855
|
autogen: set.source_Storage, set.source_Storage_out
|
7856
|
+
tags: inplace_view
|
7678
7857
|
|
7679
7858
|
- func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7680
7859
|
variants: method
|
@@ -7687,6 +7866,7 @@
|
|
7687
7866
|
MPS: set_storage_mps_
|
7688
7867
|
QuantizedCPU, QuantizedCUDA: set_storage_quantized_
|
7689
7868
|
autogen: set.source_Storage_storage_offset, set.source_Storage_storage_offset_out
|
7869
|
+
tags: inplace_view
|
7690
7870
|
|
7691
7871
|
- func: set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!)
|
7692
7872
|
variants: method
|
@@ -7694,6 +7874,7 @@
|
|
7694
7874
|
device_guard: False
|
7695
7875
|
dispatch:
|
7696
7876
|
CompositeImplicitAutograd: set__symint
|
7877
|
+
tags: inplace_view
|
7697
7878
|
|
7698
7879
|
- func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
|
7699
7880
|
variants: method
|
@@ -7702,6 +7883,7 @@
|
|
7702
7883
|
dispatch:
|
7703
7884
|
CPU, CUDA, Meta, MPS: set_tensor_
|
7704
7885
|
autogen: set.source_Tensor, set.source_Tensor_out
|
7886
|
+
tags: inplace_view
|
7705
7887
|
|
7706
7888
|
- func: set_(Tensor(a!) self) -> Tensor(a!)
|
7707
7889
|
variants: method
|
@@ -7711,6 +7893,7 @@
|
|
7711
7893
|
Meta: set_meta_
|
7712
7894
|
MPS: set_mps_
|
7713
7895
|
autogen: set, set.out
|
7896
|
+
tags: inplace_view
|
7714
7897
|
|
7715
7898
|
# Not making it CompositeImplicitAutograd because lift
|
7716
7899
|
# should be a primitive w.r.t. functorch
|
@@ -10106,18 +10289,21 @@
|
|
10106
10289
|
variants: method, function
|
10107
10290
|
dispatch:
|
10108
10291
|
CompositeExplicitAutograd: alias
|
10292
|
+
NestedTensorCPU, NestedTensorCUDA: alias_nested
|
10109
10293
|
tags: core
|
10110
10294
|
|
10111
10295
|
- func: _amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
|
10112
10296
|
variants: function
|
10113
10297
|
dispatch:
|
10114
10298
|
CUDA: _amp_foreach_non_finite_check_and_unscale_cuda_
|
10299
|
+
CPU: _amp_foreach_non_finite_check_and_unscale_cpu_
|
10115
10300
|
autogen: _amp_foreach_non_finite_check_and_unscale, _amp_foreach_non_finite_check_and_unscale.out
|
10116
10301
|
|
10117
10302
|
- func: _amp_update_scale_(Tensor(a!) self, Tensor(b!) growth_tracker, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor(a!)
|
10118
10303
|
variants: function
|
10119
10304
|
dispatch:
|
10120
10305
|
CUDA: _amp_update_scale_cuda_
|
10306
|
+
CPU: _amp_update_scale_cpu_
|
10121
10307
|
autogen: _amp_update_scale, _amp_update_scale.out
|
10122
10308
|
|
10123
10309
|
#- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
|
@@ -10137,14 +10323,14 @@
|
|
10137
10323
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10138
10324
|
variants: function
|
10139
10325
|
dispatch:
|
10140
|
-
|
10326
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalar_kernel_slow
|
10141
10327
|
CUDA: foreach_tensor_add_scalar_kernel_cuda
|
10142
10328
|
|
10143
10329
|
- func: _foreach_add_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10144
10330
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10145
10331
|
variants: function
|
10146
10332
|
dispatch:
|
10147
|
-
|
10333
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalar_kernel_slow_
|
10148
10334
|
CUDA: foreach_tensor_add_scalar_kernel_cuda_
|
10149
10335
|
autogen: _foreach_add.Scalar_out
|
10150
10336
|
|
@@ -10152,14 +10338,14 @@
|
|
10152
10338
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10153
10339
|
variants: function
|
10154
10340
|
dispatch:
|
10155
|
-
|
10341
|
+
CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow
|
10156
10342
|
CUDA: foreach_tensor_add_list_kernel_cuda
|
10157
10343
|
|
10158
10344
|
- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
10159
10345
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10160
10346
|
variants: function
|
10161
10347
|
dispatch:
|
10162
|
-
|
10348
|
+
CompositeExplicitAutograd: foreach_tensor_add_list_kernel_slow_
|
10163
10349
|
CUDA: foreach_tensor_add_list_kernel_cuda_
|
10164
10350
|
autogen: _foreach_add.List_out
|
10165
10351
|
|
@@ -10167,14 +10353,14 @@
|
|
10167
10353
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10168
10354
|
variants: function
|
10169
10355
|
dispatch:
|
10170
|
-
|
10356
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalarlist_kernel_slow
|
10171
10357
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda
|
10172
10358
|
|
10173
10359
|
- func: _foreach_add_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10174
10360
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10175
10361
|
variants: function
|
10176
10362
|
dispatch:
|
10177
|
-
|
10363
|
+
CompositeExplicitAutograd: foreach_tensor_add_scalarlist_kernel_slow_
|
10178
10364
|
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
|
10179
10365
|
autogen: _foreach_add.ScalarList_out
|
10180
10366
|
|
@@ -10182,14 +10368,14 @@
|
|
10182
10368
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10183
10369
|
variants: function
|
10184
10370
|
dispatch:
|
10185
|
-
|
10371
|
+
CompositeExplicitAutograd: foreach_tensor_add_tensor_kernel_slow
|
10186
10372
|
CUDA: foreach_tensor_add_tensor_kernel_cuda
|
10187
10373
|
|
10188
10374
|
- func: _foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()
|
10189
10375
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10190
10376
|
variants: function
|
10191
10377
|
dispatch:
|
10192
|
-
|
10378
|
+
CompositeExplicitAutograd: foreach_tensor_add_tensor_kernel_slow_
|
10193
10379
|
CUDA: foreach_tensor_add_tensor_kernel_cuda_
|
10194
10380
|
autogen: _foreach_add.Tensor_out
|
10195
10381
|
|
@@ -10197,14 +10383,14 @@
|
|
10197
10383
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10198
10384
|
variants: function
|
10199
10385
|
dispatch:
|
10200
|
-
|
10386
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalar_kernel_slow
|
10201
10387
|
CUDA: foreach_tensor_sub_scalar_kernel_cuda
|
10202
10388
|
|
10203
10389
|
- func: _foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10204
10390
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10205
10391
|
variants: function
|
10206
10392
|
dispatch:
|
10207
|
-
|
10393
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalar_kernel_slow_
|
10208
10394
|
CUDA: foreach_tensor_sub_scalar_kernel_cuda_
|
10209
10395
|
autogen: _foreach_sub.Scalar_out
|
10210
10396
|
|
@@ -10212,14 +10398,14 @@
|
|
10212
10398
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10213
10399
|
variants: function
|
10214
10400
|
dispatch:
|
10215
|
-
|
10401
|
+
CompositeExplicitAutograd: foreach_tensor_sub_list_kernel_slow
|
10216
10402
|
CUDA: foreach_tensor_sub_list_kernel_cuda
|
10217
10403
|
|
10218
10404
|
- func: _foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
|
10219
10405
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10220
10406
|
variants: function
|
10221
10407
|
dispatch:
|
10222
|
-
|
10408
|
+
CompositeExplicitAutograd: foreach_tensor_sub_list_kernel_slow_
|
10223
10409
|
CUDA: foreach_tensor_sub_list_kernel_cuda_
|
10224
10410
|
autogen: _foreach_sub.List_out
|
10225
10411
|
|
@@ -10227,14 +10413,14 @@
|
|
10227
10413
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10228
10414
|
variants: function
|
10229
10415
|
dispatch:
|
10230
|
-
|
10416
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalarlist_kernel_slow
|
10231
10417
|
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda
|
10232
10418
|
|
10233
10419
|
- func: _foreach_sub_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10234
10420
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10235
10421
|
variants: function
|
10236
10422
|
dispatch:
|
10237
|
-
|
10423
|
+
CompositeExplicitAutograd: foreach_tensor_sub_scalarlist_kernel_slow_
|
10238
10424
|
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
|
10239
10425
|
autogen: _foreach_sub.ScalarList_out
|
10240
10426
|
|
@@ -10242,14 +10428,14 @@
|
|
10242
10428
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10243
10429
|
variants: function
|
10244
10430
|
dispatch:
|
10245
|
-
|
10431
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalar_kernel_slow
|
10246
10432
|
CUDA: foreach_tensor_mul_scalar_kernel_cuda
|
10247
10433
|
|
10248
10434
|
- func: _foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10249
10435
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10250
10436
|
variants: function
|
10251
10437
|
dispatch:
|
10252
|
-
|
10438
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalar_kernel_slow_
|
10253
10439
|
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
|
10254
10440
|
autogen: _foreach_mul.Scalar_out
|
10255
10441
|
|
@@ -10257,14 +10443,14 @@
|
|
10257
10443
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10258
10444
|
variants: function
|
10259
10445
|
dispatch:
|
10260
|
-
|
10446
|
+
CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow
|
10261
10447
|
CUDA: foreach_tensor_mul_list_kernel_cuda
|
10262
10448
|
|
10263
10449
|
- func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10264
10450
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10265
10451
|
variants: function
|
10266
10452
|
dispatch:
|
10267
|
-
|
10453
|
+
CompositeExplicitAutograd: foreach_tensor_mul_list_kernel_slow_
|
10268
10454
|
CUDA: foreach_tensor_mul_list_kernel_cuda_
|
10269
10455
|
autogen: _foreach_mul.List_out
|
10270
10456
|
|
@@ -10272,14 +10458,14 @@
|
|
10272
10458
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10273
10459
|
variants: function
|
10274
10460
|
dispatch:
|
10275
|
-
|
10461
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalarlist_kernel_slow
|
10276
10462
|
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda
|
10277
10463
|
|
10278
10464
|
- func: _foreach_mul_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10279
10465
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10280
10466
|
variants: function
|
10281
10467
|
dispatch:
|
10282
|
-
|
10468
|
+
CompositeExplicitAutograd: foreach_tensor_mul_scalarlist_kernel_slow_
|
10283
10469
|
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
|
10284
10470
|
autogen: _foreach_mul.ScalarList_out
|
10285
10471
|
|
@@ -10287,14 +10473,14 @@
|
|
10287
10473
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10288
10474
|
variants: function
|
10289
10475
|
dispatch:
|
10290
|
-
|
10476
|
+
CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow
|
10291
10477
|
CUDA: foreach_tensor_mul_tensor_kernel_cuda
|
10292
10478
|
|
10293
10479
|
- func: _foreach_mul_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
10294
10480
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10295
10481
|
variants: function
|
10296
10482
|
dispatch:
|
10297
|
-
|
10483
|
+
CompositeExplicitAutograd: foreach_tensor_mul_tensor_kernel_slow_
|
10298
10484
|
CUDA: foreach_tensor_mul_tensor_kernel_cuda_
|
10299
10485
|
autogen: _foreach_mul.Tensor_out
|
10300
10486
|
|
@@ -10302,14 +10488,14 @@
|
|
10302
10488
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10303
10489
|
variants: function
|
10304
10490
|
dispatch:
|
10305
|
-
|
10491
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalar_kernel_slow
|
10306
10492
|
CUDA: foreach_tensor_div_scalar_kernel_cuda
|
10307
10493
|
|
10308
10494
|
- func: _foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10309
10495
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10310
10496
|
variants: function
|
10311
10497
|
dispatch:
|
10312
|
-
|
10498
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalar_kernel_slow_
|
10313
10499
|
CUDA: foreach_tensor_div_scalar_kernel_cuda_
|
10314
10500
|
autogen: _foreach_div.Scalar_out
|
10315
10501
|
|
@@ -10317,14 +10503,14 @@
|
|
10317
10503
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10318
10504
|
variants: function
|
10319
10505
|
dispatch:
|
10320
|
-
|
10506
|
+
CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow
|
10321
10507
|
CUDA: foreach_tensor_div_list_kernel_cuda
|
10322
10508
|
|
10323
10509
|
- func: _foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10324
10510
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10325
10511
|
variants: function
|
10326
10512
|
dispatch:
|
10327
|
-
|
10513
|
+
CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow_
|
10328
10514
|
CUDA: foreach_tensor_div_list_kernel_cuda_
|
10329
10515
|
autogen: _foreach_div.List_out
|
10330
10516
|
|
@@ -10332,14 +10518,14 @@
|
|
10332
10518
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10333
10519
|
variants: function
|
10334
10520
|
dispatch:
|
10335
|
-
|
10521
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalarlist_kernel_slow
|
10336
10522
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda
|
10337
10523
|
|
10338
10524
|
- func: _foreach_div_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10339
10525
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10340
10526
|
variants: function
|
10341
10527
|
dispatch:
|
10342
|
-
|
10528
|
+
CompositeExplicitAutograd: foreach_tensor_div_scalarlist_kernel_slow_
|
10343
10529
|
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
|
10344
10530
|
autogen: _foreach_div.ScalarList_out
|
10345
10531
|
|
@@ -10347,14 +10533,14 @@
|
|
10347
10533
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10348
10534
|
variants: function
|
10349
10535
|
dispatch:
|
10350
|
-
|
10536
|
+
CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow
|
10351
10537
|
CUDA: foreach_tensor_div_tensor_kernel_cuda
|
10352
10538
|
|
10353
10539
|
- func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
|
10354
10540
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10355
10541
|
variants: function
|
10356
10542
|
dispatch:
|
10357
|
-
|
10543
|
+
CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow_
|
10358
10544
|
CUDA: foreach_tensor_div_tensor_kernel_cuda_
|
10359
10545
|
autogen: _foreach_div.Tensor_out
|
10360
10546
|
|
@@ -10362,14 +10548,14 @@
|
|
10362
10548
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10363
10549
|
variants: function
|
10364
10550
|
dispatch:
|
10365
|
-
|
10551
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow
|
10366
10552
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10367
10553
|
|
10368
10554
|
- func: _foreach_clamp_max_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10369
10555
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10370
10556
|
variants: function
|
10371
10557
|
dispatch:
|
10372
|
-
|
10558
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10373
10559
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10374
10560
|
autogen: _foreach_clamp_max.Scalar_out
|
10375
10561
|
|
@@ -10377,14 +10563,14 @@
|
|
10377
10563
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10378
10564
|
variants: function
|
10379
10565
|
dispatch:
|
10380
|
-
|
10566
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow
|
10381
10567
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda
|
10382
10568
|
|
10383
10569
|
- func: _foreach_clamp_max_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10384
10570
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10385
10571
|
variants: function
|
10386
10572
|
dispatch:
|
10387
|
-
|
10573
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow_
|
10388
10574
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
10389
10575
|
autogen: _foreach_clamp_max.List_out
|
10390
10576
|
|
@@ -10392,14 +10578,14 @@
|
|
10392
10578
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10393
10579
|
variants: function
|
10394
10580
|
dispatch:
|
10395
|
-
|
10581
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow
|
10396
10582
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda
|
10397
10583
|
|
10398
10584
|
- func: _foreach_clamp_max_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10399
10585
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10400
10586
|
variants: function
|
10401
10587
|
dispatch:
|
10402
|
-
|
10588
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow_
|
10403
10589
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10404
10590
|
autogen: _foreach_clamp_max.ScalarList_out
|
10405
10591
|
|
@@ -10407,14 +10593,14 @@
|
|
10407
10593
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10408
10594
|
variants: function
|
10409
10595
|
dispatch:
|
10410
|
-
|
10596
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow
|
10411
10597
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
10412
10598
|
|
10413
10599
|
- func: _foreach_clamp_min_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10414
10600
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10415
10601
|
variants: function
|
10416
10602
|
dispatch:
|
10417
|
-
|
10603
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10418
10604
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10419
10605
|
autogen: _foreach_clamp_min.Scalar_out
|
10420
10606
|
|
@@ -10422,14 +10608,14 @@
|
|
10422
10608
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10423
10609
|
variants: function
|
10424
10610
|
dispatch:
|
10425
|
-
|
10611
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow
|
10426
10612
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
10427
10613
|
|
10428
10614
|
- func: _foreach_clamp_min_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10429
10615
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10430
10616
|
variants: function
|
10431
10617
|
dispatch:
|
10432
|
-
|
10618
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow_
|
10433
10619
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10434
10620
|
autogen: _foreach_clamp_min.List_out
|
10435
10621
|
|
@@ -10437,14 +10623,14 @@
|
|
10437
10623
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10438
10624
|
variants: function
|
10439
10625
|
dispatch:
|
10440
|
-
|
10626
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10441
10627
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
10442
10628
|
|
10443
10629
|
- func: _foreach_clamp_min_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10444
10630
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10445
10631
|
variants: function
|
10446
10632
|
dispatch:
|
10447
|
-
|
10633
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10448
10634
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10449
10635
|
autogen: _foreach_clamp_min.ScalarList_out
|
10450
10636
|
|
@@ -10453,14 +10639,14 @@
|
|
10453
10639
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10454
10640
|
variants: function
|
10455
10641
|
dispatch:
|
10456
|
-
|
10642
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow
|
10457
10643
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda
|
10458
10644
|
|
10459
10645
|
- func: _foreach_maximum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10460
10646
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10461
10647
|
variants: function
|
10462
10648
|
dispatch:
|
10463
|
-
|
10649
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalar_kernel_slow_
|
10464
10650
|
CUDA: foreach_tensor_clamp_min_scalar_kernel_cuda_
|
10465
10651
|
autogen: _foreach_maximum.Scalar_out
|
10466
10652
|
|
@@ -10469,14 +10655,14 @@
|
|
10469
10655
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10470
10656
|
variants: function
|
10471
10657
|
dispatch:
|
10472
|
-
|
10658
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow
|
10473
10659
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda
|
10474
10660
|
|
10475
10661
|
- func: _foreach_maximum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10476
10662
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10477
10663
|
variants: function
|
10478
10664
|
dispatch:
|
10479
|
-
|
10665
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_list_kernel_slow_
|
10480
10666
|
CUDA: foreach_tensor_clamp_min_list_kernel_cuda_
|
10481
10667
|
autogen: _foreach_maximum.List_out
|
10482
10668
|
|
@@ -10485,14 +10671,14 @@
|
|
10485
10671
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10486
10672
|
variants: function
|
10487
10673
|
dispatch:
|
10488
|
-
|
10674
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow
|
10489
10675
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda
|
10490
10676
|
|
10491
10677
|
- func: _foreach_maximum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10492
10678
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10493
10679
|
variants: function
|
10494
10680
|
dispatch:
|
10495
|
-
|
10681
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_min_scalarlist_kernel_slow_
|
10496
10682
|
CUDA: foreach_tensor_clamp_min_scalarlist_kernel_cuda_
|
10497
10683
|
autogen: _foreach_maximum.ScalarList_out
|
10498
10684
|
|
@@ -10500,14 +10686,14 @@
|
|
10500
10686
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10501
10687
|
variants: function
|
10502
10688
|
dispatch:
|
10503
|
-
|
10689
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow
|
10504
10690
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda
|
10505
10691
|
|
10506
10692
|
- func: _foreach_minimum_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
|
10507
10693
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10508
10694
|
variants: function
|
10509
10695
|
dispatch:
|
10510
|
-
|
10696
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalar_kernel_slow_
|
10511
10697
|
CUDA: foreach_tensor_clamp_max_scalar_kernel_cuda_
|
10512
10698
|
autogen: _foreach_minimum.Scalar_out
|
10513
10699
|
|
@@ -10515,14 +10701,14 @@
|
|
10515
10701
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10516
10702
|
variants: function
|
10517
10703
|
dispatch:
|
10518
|
-
|
10704
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow
|
10519
10705
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda
|
10520
10706
|
|
10521
10707
|
- func: _foreach_minimum_.List(Tensor(a!)[] self, Tensor[] other) -> ()
|
10522
10708
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10523
10709
|
variants: function
|
10524
10710
|
dispatch:
|
10525
|
-
|
10711
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_list_kernel_slow_
|
10526
10712
|
CUDA: foreach_tensor_clamp_max_list_kernel_cuda_
|
10527
10713
|
autogen: _foreach_minimum.List_out
|
10528
10714
|
|
@@ -10530,14 +10716,14 @@
|
|
10530
10716
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10531
10717
|
variants: function
|
10532
10718
|
dispatch:
|
10533
|
-
|
10719
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow
|
10534
10720
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda
|
10535
10721
|
|
10536
10722
|
- func: _foreach_minimum_.ScalarList(Tensor(a!)[] self, Scalar[] scalars) -> ()
|
10537
10723
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10538
10724
|
variants: function
|
10539
10725
|
dispatch:
|
10540
|
-
|
10726
|
+
CompositeExplicitAutograd: foreach_tensor_clamp_max_scalarlist_kernel_slow_
|
10541
10727
|
CUDA: foreach_tensor_clamp_max_scalarlist_kernel_cuda_
|
10542
10728
|
autogen: _foreach_minimum.ScalarList_out
|
10543
10729
|
|
@@ -10545,28 +10731,28 @@
|
|
10545
10731
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10546
10732
|
variants: function
|
10547
10733
|
dispatch:
|
10548
|
-
|
10734
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalar_slow
|
10549
10735
|
CUDA: foreach_tensor_addcdiv_scalar_cuda
|
10550
10736
|
|
10551
10737
|
- func: _foreach_addcdiv.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10552
10738
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10553
10739
|
variants: function
|
10554
10740
|
dispatch:
|
10555
|
-
|
10741
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalarlist_slow
|
10556
10742
|
CUDA: foreach_tensor_addcdiv_scalarlist_cuda
|
10557
10743
|
|
10558
10744
|
- func: _foreach_addcdiv.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10559
10745
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10560
10746
|
variants: function
|
10561
10747
|
dispatch:
|
10562
|
-
|
10748
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_tensor_slow
|
10563
10749
|
CUDA: foreach_tensor_addcdiv_tensor_cuda
|
10564
10750
|
|
10565
10751
|
- func: _foreach_addcdiv_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10566
10752
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10567
10753
|
variants: function
|
10568
10754
|
dispatch:
|
10569
|
-
|
10755
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalar_slow_
|
10570
10756
|
CUDA: foreach_tensor_addcdiv_scalar_cuda_
|
10571
10757
|
autogen: _foreach_addcdiv.Scalar_out
|
10572
10758
|
|
@@ -10574,7 +10760,7 @@
|
|
10574
10760
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10575
10761
|
variants: function
|
10576
10762
|
dispatch:
|
10577
|
-
|
10763
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_scalarlist_slow_
|
10578
10764
|
CUDA: foreach_tensor_addcdiv_scalarlist_cuda_
|
10579
10765
|
autogen: _foreach_addcdiv.ScalarList_out
|
10580
10766
|
|
@@ -10582,7 +10768,7 @@
|
|
10582
10768
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10583
10769
|
variants: function
|
10584
10770
|
dispatch:
|
10585
|
-
|
10771
|
+
CompositeExplicitAutograd: foreach_tensor_addcdiv_tensor_slow_
|
10586
10772
|
CUDA: foreach_tensor_addcdiv_tensor_cuda_
|
10587
10773
|
autogen: _foreach_addcdiv.Tensor_out
|
10588
10774
|
|
@@ -10590,28 +10776,28 @@
|
|
10590
10776
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10591
10777
|
variants: function
|
10592
10778
|
dispatch:
|
10593
|
-
|
10779
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow
|
10594
10780
|
CUDA: foreach_tensor_addcmul_scalar_cuda
|
10595
10781
|
|
10596
10782
|
- func: _foreach_addcmul.ScalarList(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar[] scalars) -> Tensor[]
|
10597
10783
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10598
10784
|
variants: function
|
10599
10785
|
dispatch:
|
10600
|
-
|
10786
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalarlist_slow
|
10601
10787
|
CUDA: foreach_tensor_addcmul_scalarlist_cuda
|
10602
10788
|
|
10603
10789
|
- func: _foreach_addcmul.Tensor(Tensor[] self, Tensor[] tensor1, Tensor[] tensor2, Tensor scalars) -> Tensor[]
|
10604
10790
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10605
10791
|
variants: function
|
10606
10792
|
dispatch:
|
10607
|
-
|
10793
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_tensor_slow
|
10608
10794
|
CUDA: foreach_tensor_addcmul_tensor_cuda
|
10609
10795
|
|
10610
10796
|
- func: _foreach_addcmul_.Scalar(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
|
10611
10797
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10612
10798
|
variants: function
|
10613
10799
|
dispatch:
|
10614
|
-
|
10800
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalar_slow_
|
10615
10801
|
CUDA: foreach_tensor_addcmul_scalar_cuda_
|
10616
10802
|
autogen: _foreach_addcmul.Scalar_out
|
10617
10803
|
|
@@ -10619,7 +10805,7 @@
|
|
10619
10805
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10620
10806
|
variants: function
|
10621
10807
|
dispatch:
|
10622
|
-
|
10808
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_scalarlist_slow_
|
10623
10809
|
CUDA: foreach_tensor_addcmul_scalarlist_cuda_
|
10624
10810
|
autogen: _foreach_addcmul.ScalarList_out
|
10625
10811
|
|
@@ -10627,7 +10813,7 @@
|
|
10627
10813
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10628
10814
|
variants: function
|
10629
10815
|
dispatch:
|
10630
|
-
|
10816
|
+
CompositeExplicitAutograd: foreach_tensor_addcmul_tensor_slow_
|
10631
10817
|
CUDA: foreach_tensor_addcmul_tensor_cuda_
|
10632
10818
|
autogen: _foreach_addcmul.Tensor_out
|
10633
10819
|
|
@@ -10635,14 +10821,14 @@
|
|
10635
10821
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10636
10822
|
variants: function
|
10637
10823
|
dispatch:
|
10638
|
-
|
10824
|
+
CompositeExplicitAutograd: foreach_tensor_abs_slow
|
10639
10825
|
CUDA: foreach_tensor_abs_cuda
|
10640
10826
|
|
10641
10827
|
- func: _foreach_abs_(Tensor(a!)[] self) -> ()
|
10642
10828
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10643
10829
|
variants: function
|
10644
10830
|
dispatch:
|
10645
|
-
|
10831
|
+
CompositeExplicitAutograd: foreach_tensor_abs_slow_
|
10646
10832
|
CUDA: foreach_tensor_abs_cuda_
|
10647
10833
|
autogen: _foreach_abs.out
|
10648
10834
|
|
@@ -10650,14 +10836,14 @@
|
|
10650
10836
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10651
10837
|
variants: function
|
10652
10838
|
dispatch:
|
10653
|
-
|
10839
|
+
CompositeExplicitAutograd: foreach_tensor_acos_slow
|
10654
10840
|
CUDA: foreach_tensor_acos_cuda
|
10655
10841
|
|
10656
10842
|
- func: _foreach_acos_(Tensor(a!)[] self) -> ()
|
10657
10843
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10658
10844
|
variants: function
|
10659
10845
|
dispatch:
|
10660
|
-
|
10846
|
+
CompositeExplicitAutograd: foreach_tensor_acos_slow_
|
10661
10847
|
CUDA: foreach_tensor_acos_cuda_
|
10662
10848
|
autogen: _foreach_acos.out
|
10663
10849
|
|
@@ -10665,14 +10851,14 @@
|
|
10665
10851
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10666
10852
|
variants: function
|
10667
10853
|
dispatch:
|
10668
|
-
|
10854
|
+
CompositeExplicitAutograd: foreach_tensor_asin_slow
|
10669
10855
|
CUDA: foreach_tensor_asin_cuda
|
10670
10856
|
|
10671
10857
|
- func: _foreach_asin_(Tensor(a!)[] self) -> ()
|
10672
10858
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10673
10859
|
variants: function
|
10674
10860
|
dispatch:
|
10675
|
-
|
10861
|
+
CompositeExplicitAutograd: foreach_tensor_asin_slow_
|
10676
10862
|
CUDA: foreach_tensor_asin_cuda_
|
10677
10863
|
autogen: _foreach_asin.out
|
10678
10864
|
|
@@ -10680,14 +10866,14 @@
|
|
10680
10866
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10681
10867
|
variants: function
|
10682
10868
|
dispatch:
|
10683
|
-
|
10869
|
+
CompositeExplicitAutograd: foreach_tensor_atan_slow
|
10684
10870
|
CUDA: foreach_tensor_atan_cuda
|
10685
10871
|
|
10686
10872
|
- func: _foreach_atan_(Tensor(a!)[] self) -> ()
|
10687
10873
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10688
10874
|
variants: function
|
10689
10875
|
dispatch:
|
10690
|
-
|
10876
|
+
CompositeExplicitAutograd: foreach_tensor_atan_slow_
|
10691
10877
|
CUDA: foreach_tensor_atan_cuda_
|
10692
10878
|
autogen: _foreach_atan.out
|
10693
10879
|
|
@@ -10695,14 +10881,14 @@
|
|
10695
10881
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10696
10882
|
variants: function
|
10697
10883
|
dispatch:
|
10698
|
-
|
10884
|
+
CompositeExplicitAutograd: foreach_tensor_ceil_slow
|
10699
10885
|
CUDA: foreach_tensor_ceil_cuda
|
10700
10886
|
|
10701
10887
|
- func: _foreach_ceil_(Tensor(a!)[] self) -> ()
|
10702
10888
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10703
10889
|
variants: function
|
10704
10890
|
dispatch:
|
10705
|
-
|
10891
|
+
CompositeExplicitAutograd: foreach_tensor_ceil_slow_
|
10706
10892
|
CUDA: foreach_tensor_ceil_cuda_
|
10707
10893
|
autogen: _foreach_ceil.out
|
10708
10894
|
|
@@ -10710,14 +10896,14 @@
|
|
10710
10896
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10711
10897
|
variants: function
|
10712
10898
|
dispatch:
|
10713
|
-
|
10899
|
+
CompositeExplicitAutograd: foreach_tensor_cos_slow
|
10714
10900
|
CUDA: foreach_tensor_cos_cuda
|
10715
10901
|
|
10716
10902
|
- func: _foreach_cos_(Tensor(a!)[] self) -> ()
|
10717
10903
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10718
10904
|
variants: function
|
10719
10905
|
dispatch:
|
10720
|
-
|
10906
|
+
CompositeExplicitAutograd: foreach_tensor_cos_slow_
|
10721
10907
|
CUDA: foreach_tensor_cos_cuda_
|
10722
10908
|
autogen: _foreach_cos.out
|
10723
10909
|
|
@@ -10725,14 +10911,14 @@
|
|
10725
10911
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10726
10912
|
variants: function
|
10727
10913
|
dispatch:
|
10728
|
-
|
10914
|
+
CompositeExplicitAutograd: foreach_tensor_cosh_slow
|
10729
10915
|
CUDA: foreach_tensor_cosh_cuda
|
10730
10916
|
|
10731
10917
|
- func: _foreach_cosh_(Tensor(a!)[] self) -> ()
|
10732
10918
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10733
10919
|
variants: function
|
10734
10920
|
dispatch:
|
10735
|
-
|
10921
|
+
CompositeExplicitAutograd: foreach_tensor_cosh_slow_
|
10736
10922
|
CUDA: foreach_tensor_cosh_cuda_
|
10737
10923
|
autogen: _foreach_cosh.out
|
10738
10924
|
|
@@ -10740,14 +10926,14 @@
|
|
10740
10926
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10741
10927
|
variants: function
|
10742
10928
|
dispatch:
|
10743
|
-
|
10929
|
+
CompositeExplicitAutograd: foreach_tensor_erf_slow
|
10744
10930
|
CUDA: foreach_tensor_erf_cuda
|
10745
10931
|
|
10746
10932
|
- func: _foreach_erf_(Tensor(a!)[] self) -> ()
|
10747
10933
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10748
10934
|
variants: function
|
10749
10935
|
dispatch:
|
10750
|
-
|
10936
|
+
CompositeExplicitAutograd: foreach_tensor_erf_slow_
|
10751
10937
|
CUDA: foreach_tensor_erf_cuda_
|
10752
10938
|
autogen: _foreach_erf.out
|
10753
10939
|
|
@@ -10755,14 +10941,14 @@
|
|
10755
10941
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10756
10942
|
variants: function
|
10757
10943
|
dispatch:
|
10758
|
-
|
10944
|
+
CompositeExplicitAutograd: foreach_tensor_erfc_slow
|
10759
10945
|
CUDA: foreach_tensor_erfc_cuda
|
10760
10946
|
|
10761
10947
|
- func: _foreach_erfc_(Tensor(a!)[] self) -> ()
|
10762
10948
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10763
10949
|
variants: function
|
10764
10950
|
dispatch:
|
10765
|
-
|
10951
|
+
CompositeExplicitAutograd: foreach_tensor_erfc_slow_
|
10766
10952
|
CUDA: foreach_tensor_erfc_cuda_
|
10767
10953
|
autogen: _foreach_erfc.out
|
10768
10954
|
|
@@ -10770,14 +10956,14 @@
|
|
10770
10956
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10771
10957
|
variants: function
|
10772
10958
|
dispatch:
|
10773
|
-
|
10959
|
+
CompositeExplicitAutograd: foreach_tensor_exp_slow
|
10774
10960
|
CUDA: foreach_tensor_exp_cuda
|
10775
10961
|
|
10776
10962
|
- func: _foreach_exp_(Tensor(a!)[] self) -> ()
|
10777
10963
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10778
10964
|
variants: function
|
10779
10965
|
dispatch:
|
10780
|
-
|
10966
|
+
CompositeExplicitAutograd: foreach_tensor_exp_slow_
|
10781
10967
|
CUDA: foreach_tensor_exp_cuda_
|
10782
10968
|
autogen: _foreach_exp.out
|
10783
10969
|
|
@@ -10785,14 +10971,14 @@
|
|
10785
10971
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10786
10972
|
variants: function
|
10787
10973
|
dispatch:
|
10788
|
-
|
10974
|
+
CompositeExplicitAutograd: foreach_tensor_expm1_slow
|
10789
10975
|
CUDA: foreach_tensor_expm1_cuda
|
10790
10976
|
|
10791
10977
|
- func: _foreach_expm1_(Tensor(a!)[] self) -> ()
|
10792
10978
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10793
10979
|
variants: function
|
10794
10980
|
dispatch:
|
10795
|
-
|
10981
|
+
CompositeExplicitAutograd: foreach_tensor_expm1_slow_
|
10796
10982
|
CUDA: foreach_tensor_expm1_cuda_
|
10797
10983
|
autogen: _foreach_expm1.out
|
10798
10984
|
|
@@ -10800,14 +10986,14 @@
|
|
10800
10986
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10801
10987
|
variants: function
|
10802
10988
|
dispatch:
|
10803
|
-
|
10989
|
+
CompositeExplicitAutograd: foreach_tensor_floor_slow
|
10804
10990
|
CUDA: foreach_tensor_floor_cuda
|
10805
10991
|
|
10806
10992
|
- func: _foreach_floor_(Tensor(a!)[] self) -> ()
|
10807
10993
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10808
10994
|
variants: function
|
10809
10995
|
dispatch:
|
10810
|
-
|
10996
|
+
CompositeExplicitAutograd: foreach_tensor_floor_slow_
|
10811
10997
|
CUDA: foreach_tensor_floor_cuda_
|
10812
10998
|
autogen: _foreach_floor.out
|
10813
10999
|
|
@@ -10815,14 +11001,14 @@
|
|
10815
11001
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10816
11002
|
variants: function
|
10817
11003
|
dispatch:
|
10818
|
-
|
11004
|
+
CompositeExplicitAutograd: foreach_tensor_frac_slow
|
10819
11005
|
CUDA: foreach_tensor_frac_cuda
|
10820
11006
|
|
10821
11007
|
- func: _foreach_frac_(Tensor(a!)[] self) -> ()
|
10822
11008
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10823
11009
|
variants: function
|
10824
11010
|
dispatch:
|
10825
|
-
|
11011
|
+
CompositeExplicitAutograd: foreach_tensor_frac_slow_
|
10826
11012
|
CUDA: foreach_tensor_frac_cuda_
|
10827
11013
|
autogen: _foreach_frac.out
|
10828
11014
|
|
@@ -10830,7 +11016,7 @@
|
|
10830
11016
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10831
11017
|
variants: function
|
10832
11018
|
dispatch:
|
10833
|
-
|
11019
|
+
CompositeExplicitAutograd: foreach_tensor_ternary_lerp_slow
|
10834
11020
|
CUDA: foreach_tensor_lerp_ternary_cuda
|
10835
11021
|
autogen: _foreach_lerp.List_out
|
10836
11022
|
|
@@ -10838,7 +11024,7 @@
|
|
10838
11024
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10839
11025
|
variants: function
|
10840
11026
|
dispatch:
|
10841
|
-
|
11027
|
+
CompositeExplicitAutograd: foreach_tensor_ternary_lerp_slow_
|
10842
11028
|
CUDA: foreach_tensor_lerp_ternary_cuda_
|
10843
11029
|
autogen: _foreach_lerp.List_out
|
10844
11030
|
|
@@ -10846,7 +11032,7 @@
|
|
10846
11032
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10847
11033
|
variants: function
|
10848
11034
|
dispatch:
|
10849
|
-
|
11035
|
+
CompositeExplicitAutograd: foreach_tensor_lerp_list_kernel_slow
|
10850
11036
|
CUDA: foreach_tensor_lerp_list_cuda
|
10851
11037
|
autogen: _foreach_lerp.Scalar_out
|
10852
11038
|
|
@@ -10854,7 +11040,7 @@
|
|
10854
11040
|
device_check: NoCheck # foreach kernels fall back to slow path when tensors are on different devices
|
10855
11041
|
variants: function
|
10856
11042
|
dispatch:
|
10857
|
-
|
11043
|
+
CompositeExplicitAutograd: foreach_tensor_lerp_list_kernel_slow_
|
10858
11044
|
CUDA: foreach_tensor_lerp_list_cuda_
|
10859
11045
|
autogen: _foreach_lerp.Scalar_out
|
10860
11046
|
|
@@ -10862,14 +11048,14 @@
|
|
10862
11048
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10863
11049
|
variants: function
|
10864
11050
|
dispatch:
|
10865
|
-
|
11051
|
+
CompositeExplicitAutograd: foreach_tensor_lgamma_slow
|
10866
11052
|
CUDA: foreach_tensor_lgamma_cuda
|
10867
11053
|
|
10868
11054
|
- func: _foreach_lgamma_(Tensor(a!)[] self) -> ()
|
10869
11055
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10870
11056
|
variants: function
|
10871
11057
|
dispatch:
|
10872
|
-
|
11058
|
+
CompositeExplicitAutograd: foreach_tensor_lgamma_slow_
|
10873
11059
|
CUDA: foreach_tensor_lgamma_cuda_
|
10874
11060
|
autogen: _foreach_lgamma.out
|
10875
11061
|
|
@@ -10877,14 +11063,14 @@
|
|
10877
11063
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10878
11064
|
variants: function
|
10879
11065
|
dispatch:
|
10880
|
-
|
11066
|
+
CompositeExplicitAutograd: foreach_tensor_log_slow
|
10881
11067
|
CUDA: foreach_tensor_log_cuda
|
10882
11068
|
|
10883
11069
|
- func: _foreach_log_(Tensor(a!)[] self) -> ()
|
10884
11070
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10885
11071
|
variants: function
|
10886
11072
|
dispatch:
|
10887
|
-
|
11073
|
+
CompositeExplicitAutograd: foreach_tensor_log_slow_
|
10888
11074
|
CUDA: foreach_tensor_log_cuda_
|
10889
11075
|
autogen: _foreach_log.out
|
10890
11076
|
|
@@ -10892,14 +11078,14 @@
|
|
10892
11078
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10893
11079
|
variants: function
|
10894
11080
|
dispatch:
|
10895
|
-
|
11081
|
+
CompositeExplicitAutograd: foreach_tensor_log10_slow
|
10896
11082
|
CUDA: foreach_tensor_log10_cuda
|
10897
11083
|
|
10898
11084
|
- func: _foreach_log10_(Tensor(a!)[] self) -> ()
|
10899
11085
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10900
11086
|
variants: function
|
10901
11087
|
dispatch:
|
10902
|
-
|
11088
|
+
CompositeExplicitAutograd: foreach_tensor_log10_slow_
|
10903
11089
|
CUDA: foreach_tensor_log10_cuda_
|
10904
11090
|
autogen: _foreach_log10.out
|
10905
11091
|
|
@@ -10907,14 +11093,14 @@
|
|
10907
11093
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10908
11094
|
variants: function
|
10909
11095
|
dispatch:
|
10910
|
-
|
11096
|
+
CompositeExplicitAutograd: foreach_tensor_log1p_slow
|
10911
11097
|
CUDA: foreach_tensor_log1p_cuda
|
10912
11098
|
|
10913
11099
|
- func: _foreach_log1p_(Tensor(a!)[] self) -> ()
|
10914
11100
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10915
11101
|
variants: function
|
10916
11102
|
dispatch:
|
10917
|
-
|
11103
|
+
CompositeExplicitAutograd: foreach_tensor_log1p_slow_
|
10918
11104
|
CUDA: foreach_tensor_log1p_cuda_
|
10919
11105
|
autogen: _foreach_log1p.out
|
10920
11106
|
|
@@ -10922,37 +11108,45 @@
|
|
10922
11108
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10923
11109
|
variants: function
|
10924
11110
|
dispatch:
|
10925
|
-
|
11111
|
+
CompositeExplicitAutograd: foreach_tensor_log2_slow
|
10926
11112
|
CUDA: foreach_tensor_log2_cuda
|
10927
11113
|
|
10928
11114
|
- func: _foreach_log2_(Tensor(a!)[] self) -> ()
|
10929
11115
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10930
11116
|
variants: function
|
10931
11117
|
dispatch:
|
10932
|
-
|
11118
|
+
CompositeExplicitAutograd: foreach_tensor_log2_slow_
|
10933
11119
|
CUDA: foreach_tensor_log2_cuda_
|
10934
11120
|
autogen: _foreach_log2.out
|
10935
11121
|
|
11122
|
+
- func: _foreach_max(Tensor[] self) -> Tensor[]
|
11123
|
+
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11124
|
+
variants: function
|
11125
|
+
dispatch:
|
11126
|
+
CompositeExplicitAutograd: foreach_tensor_max_slow
|
11127
|
+
CUDA: foreach_tensor_max_cuda
|
11128
|
+
autogen: _foreach_max.out
|
11129
|
+
|
10936
11130
|
- func: _foreach_neg(Tensor[] self) -> Tensor[]
|
10937
11131
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10938
11132
|
variants: function
|
10939
11133
|
dispatch:
|
10940
|
-
|
11134
|
+
CompositeExplicitAutograd: foreach_tensor_neg_slow
|
10941
11135
|
CUDA: foreach_tensor_neg_cuda
|
10942
11136
|
|
10943
11137
|
- func: _foreach_neg_(Tensor(a!)[] self) -> ()
|
10944
11138
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10945
11139
|
variants: function
|
10946
11140
|
dispatch:
|
10947
|
-
|
11141
|
+
CompositeExplicitAutograd: foreach_tensor_neg_slow_
|
10948
11142
|
CUDA: foreach_tensor_neg_cuda_
|
10949
11143
|
autogen: _foreach_neg.out
|
10950
11144
|
|
10951
|
-
- func: _foreach_norm.Scalar(Tensor[] self, Scalar ord=2) -> Tensor[]
|
11145
|
+
- func: _foreach_norm.Scalar(Tensor[] self, Scalar ord=2, ScalarType? dtype=None) -> Tensor[]
|
10952
11146
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10953
11147
|
variants: function
|
10954
11148
|
dispatch:
|
10955
|
-
|
11149
|
+
CompositeExplicitAutograd: foreach_tensor_norm_slow
|
10956
11150
|
CUDA: foreach_tensor_norm_cuda
|
10957
11151
|
autogen: _foreach_norm.Scalar_out
|
10958
11152
|
|
@@ -10960,35 +11154,35 @@
|
|
10960
11154
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10961
11155
|
variants: function
|
10962
11156
|
dispatch:
|
10963
|
-
|
11157
|
+
CompositeExplicitAutograd: foreach_tensor_pow_list_kernel_slow
|
10964
11158
|
CUDA: foreach_tensor_pow_list_kernel_cuda
|
10965
11159
|
|
10966
11160
|
- func: _foreach_pow.Scalar(Tensor[] self, Scalar exponent) -> Tensor[]
|
10967
11161
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10968
11162
|
variants: function
|
10969
11163
|
dispatch:
|
10970
|
-
|
11164
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalar_kernel_slow
|
10971
11165
|
CUDA: foreach_tensor_pow_scalar_kernel_cuda
|
10972
11166
|
|
10973
11167
|
- func: _foreach_pow.ScalarList(Tensor[] self, Scalar[] exponent) -> Tensor[]
|
10974
11168
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10975
11169
|
variants: function
|
10976
11170
|
dispatch:
|
10977
|
-
|
11171
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalarlist_kernel_slow
|
10978
11172
|
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda
|
10979
11173
|
|
10980
11174
|
- func: _foreach_pow.ScalarAndTensor(Scalar self, Tensor[] exponent) -> Tensor[]
|
10981
11175
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
10982
11176
|
variants: function
|
10983
11177
|
dispatch:
|
10984
|
-
|
11178
|
+
CompositeExplicitAutograd: foreach_scalar_pow_list_kernel_slow
|
10985
11179
|
CUDA: foreach_scalar_pow_list_kernel_cuda
|
10986
11180
|
|
10987
11181
|
- func: _foreach_pow_.List(Tensor(a!)[] self, Tensor[] exponent) -> ()
|
10988
11182
|
device_check: NoCheck
|
10989
11183
|
variants: function
|
10990
11184
|
dispatch:
|
10991
|
-
|
11185
|
+
CompositeExplicitAutograd: foreach_tensor_pow_list_kernel_slow_
|
10992
11186
|
CUDA: foreach_tensor_pow_list_kernel_cuda_
|
10993
11187
|
autogen: _foreach_pow.List_out
|
10994
11188
|
|
@@ -10996,7 +11190,7 @@
|
|
10996
11190
|
device_check: NoCheck
|
10997
11191
|
variants: function
|
10998
11192
|
dispatch:
|
10999
|
-
|
11193
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalar_kernel_slow_
|
11000
11194
|
CUDA: foreach_tensor_pow_scalar_kernel_cuda_
|
11001
11195
|
autogen: _foreach_pow.Scalar_out
|
11002
11196
|
|
@@ -11004,7 +11198,7 @@
|
|
11004
11198
|
device_check: NoCheck
|
11005
11199
|
variants: function
|
11006
11200
|
dispatch:
|
11007
|
-
|
11201
|
+
CompositeExplicitAutograd: foreach_tensor_pow_scalarlist_kernel_slow_
|
11008
11202
|
CUDA: foreach_tensor_pow_scalarlist_kernel_cuda_
|
11009
11203
|
autogen: _foreach_pow.ScalarList_out
|
11010
11204
|
|
@@ -11012,14 +11206,14 @@
|
|
11012
11206
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11013
11207
|
variants: function
|
11014
11208
|
dispatch:
|
11015
|
-
|
11209
|
+
CompositeExplicitAutograd: foreach_tensor_reciprocal_slow
|
11016
11210
|
CUDA: foreach_tensor_reciprocal_cuda
|
11017
11211
|
|
11018
11212
|
- func: _foreach_reciprocal_(Tensor(a!)[] self) -> ()
|
11019
11213
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11020
11214
|
variants: function
|
11021
11215
|
dispatch:
|
11022
|
-
|
11216
|
+
CompositeExplicitAutograd: foreach_tensor_reciprocal_slow_
|
11023
11217
|
CUDA: foreach_tensor_reciprocal_cuda_
|
11024
11218
|
autogen: _foreach_reciprocal.out
|
11025
11219
|
|
@@ -11027,14 +11221,14 @@
|
|
11027
11221
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11028
11222
|
variants: function
|
11029
11223
|
dispatch:
|
11030
|
-
|
11224
|
+
CompositeExplicitAutograd: foreach_tensor_round_slow
|
11031
11225
|
CUDA: foreach_tensor_round_cuda
|
11032
11226
|
|
11033
11227
|
- func: _foreach_round_(Tensor(a!)[] self) -> ()
|
11034
11228
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11035
11229
|
variants: function
|
11036
11230
|
dispatch:
|
11037
|
-
|
11231
|
+
CompositeExplicitAutograd: foreach_tensor_round_slow_
|
11038
11232
|
CUDA: foreach_tensor_round_cuda_
|
11039
11233
|
autogen: _foreach_round.out
|
11040
11234
|
|
@@ -11042,14 +11236,14 @@
|
|
11042
11236
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11043
11237
|
variants: function
|
11044
11238
|
dispatch:
|
11045
|
-
|
11239
|
+
CompositeExplicitAutograd: foreach_tensor_sigmoid_slow
|
11046
11240
|
CUDA: foreach_tensor_sigmoid_cuda
|
11047
11241
|
|
11048
11242
|
- func: _foreach_sigmoid_(Tensor(a!)[] self) -> ()
|
11049
11243
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11050
11244
|
variants: function
|
11051
11245
|
dispatch:
|
11052
|
-
|
11246
|
+
CompositeExplicitAutograd: foreach_tensor_sigmoid_slow_
|
11053
11247
|
CUDA: foreach_tensor_sigmoid_cuda_
|
11054
11248
|
autogen: _foreach_sigmoid.out
|
11055
11249
|
|
@@ -11057,14 +11251,14 @@
|
|
11057
11251
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11058
11252
|
variants: function
|
11059
11253
|
dispatch:
|
11060
|
-
|
11254
|
+
CompositeExplicitAutograd: foreach_tensor_sign_slow
|
11061
11255
|
CUDA: foreach_tensor_sign_cuda
|
11062
11256
|
|
11063
11257
|
- func: _foreach_sign_(Tensor(a!)[] self) -> ()
|
11064
11258
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11065
11259
|
variants: function
|
11066
11260
|
dispatch:
|
11067
|
-
|
11261
|
+
CompositeExplicitAutograd: foreach_tensor_sign_slow_
|
11068
11262
|
CUDA: foreach_tensor_sign_cuda_
|
11069
11263
|
autogen: _foreach_sign.out
|
11070
11264
|
|
@@ -11072,14 +11266,14 @@
|
|
11072
11266
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11073
11267
|
variants: function
|
11074
11268
|
dispatch:
|
11075
|
-
|
11269
|
+
CompositeExplicitAutograd: foreach_tensor_sin_slow
|
11076
11270
|
CUDA: foreach_tensor_sin_cuda
|
11077
11271
|
|
11078
11272
|
- func: _foreach_sin_(Tensor(a!)[] self) -> ()
|
11079
11273
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11080
11274
|
variants: function
|
11081
11275
|
dispatch:
|
11082
|
-
|
11276
|
+
CompositeExplicitAutograd: foreach_tensor_sin_slow_
|
11083
11277
|
CUDA: foreach_tensor_sin_cuda_
|
11084
11278
|
autogen: _foreach_sin.out
|
11085
11279
|
|
@@ -11087,14 +11281,14 @@
|
|
11087
11281
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11088
11282
|
variants: function
|
11089
11283
|
dispatch:
|
11090
|
-
|
11284
|
+
CompositeExplicitAutograd: foreach_tensor_sinh_slow
|
11091
11285
|
CUDA: foreach_tensor_sinh_cuda
|
11092
11286
|
|
11093
11287
|
- func: _foreach_sinh_(Tensor(a!)[] self) -> ()
|
11094
11288
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11095
11289
|
variants: function
|
11096
11290
|
dispatch:
|
11097
|
-
|
11291
|
+
CompositeExplicitAutograd: foreach_tensor_sinh_slow_
|
11098
11292
|
CUDA: foreach_tensor_sinh_cuda_
|
11099
11293
|
autogen: _foreach_sinh.out
|
11100
11294
|
|
@@ -11102,14 +11296,14 @@
|
|
11102
11296
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11103
11297
|
variants: function
|
11104
11298
|
dispatch:
|
11105
|
-
|
11299
|
+
CompositeExplicitAutograd: foreach_tensor_sqrt_slow
|
11106
11300
|
CUDA: foreach_tensor_sqrt_cuda
|
11107
11301
|
|
11108
11302
|
- func: _foreach_sqrt_(Tensor(a!)[] self) -> ()
|
11109
11303
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11110
11304
|
variants: function
|
11111
11305
|
dispatch:
|
11112
|
-
|
11306
|
+
CompositeExplicitAutograd: foreach_tensor_sqrt_slow_
|
11113
11307
|
CUDA: foreach_tensor_sqrt_cuda_
|
11114
11308
|
autogen: _foreach_sqrt.out
|
11115
11309
|
|
@@ -11117,14 +11311,14 @@
|
|
11117
11311
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11118
11312
|
variants: function
|
11119
11313
|
dispatch:
|
11120
|
-
|
11314
|
+
CompositeExplicitAutograd: foreach_tensor_tan_slow
|
11121
11315
|
CUDA: foreach_tensor_tan_cuda
|
11122
11316
|
|
11123
11317
|
- func: _foreach_tan_(Tensor(a!)[] self) -> ()
|
11124
11318
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11125
11319
|
variants: function
|
11126
11320
|
dispatch:
|
11127
|
-
|
11321
|
+
CompositeExplicitAutograd: foreach_tensor_tan_slow_
|
11128
11322
|
CUDA: foreach_tensor_tan_cuda_
|
11129
11323
|
autogen: _foreach_tan.out
|
11130
11324
|
|
@@ -11132,14 +11326,14 @@
|
|
11132
11326
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11133
11327
|
variants: function
|
11134
11328
|
dispatch:
|
11135
|
-
|
11329
|
+
CompositeExplicitAutograd: foreach_tensor_tanh_slow
|
11136
11330
|
CUDA: foreach_tensor_tanh_cuda
|
11137
11331
|
|
11138
11332
|
- func: _foreach_tanh_(Tensor(a!)[] self) -> ()
|
11139
11333
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11140
11334
|
variants: function
|
11141
11335
|
dispatch:
|
11142
|
-
|
11336
|
+
CompositeExplicitAutograd: foreach_tensor_tanh_slow_
|
11143
11337
|
CUDA: foreach_tensor_tanh_cuda_
|
11144
11338
|
autogen: _foreach_tanh.out
|
11145
11339
|
|
@@ -11147,14 +11341,14 @@
|
|
11147
11341
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11148
11342
|
variants: function
|
11149
11343
|
dispatch:
|
11150
|
-
|
11344
|
+
CompositeExplicitAutograd: foreach_tensor_trunc_slow
|
11151
11345
|
CUDA: foreach_tensor_trunc_cuda
|
11152
11346
|
|
11153
11347
|
- func: _foreach_trunc_(Tensor(a!)[] self) -> ()
|
11154
11348
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11155
11349
|
variants: function
|
11156
11350
|
dispatch:
|
11157
|
-
|
11351
|
+
CompositeExplicitAutograd: foreach_tensor_trunc_slow_
|
11158
11352
|
CUDA: foreach_tensor_trunc_cuda_
|
11159
11353
|
autogen: _foreach_trunc.out
|
11160
11354
|
|
@@ -11162,7 +11356,7 @@
|
|
11162
11356
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11163
11357
|
variants: function
|
11164
11358
|
dispatch:
|
11165
|
-
|
11359
|
+
CompositeExplicitAutograd: foreach_tensor_zero_slow_
|
11166
11360
|
CUDA: foreach_tensor_zero_cuda_
|
11167
11361
|
autogen: _foreach_zero, _foreach_zero.out
|
11168
11362
|
|
@@ -11170,9 +11364,15 @@
|
|
11170
11364
|
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
|
11171
11365
|
variants: function
|
11172
11366
|
dispatch:
|
11173
|
-
|
11367
|
+
CompositeExplicitAutograd: foreach_tensor_copy_list_kernel_slow_
|
11174
11368
|
CUDA: foreach_tensor_copy_list_kernel_cuda_
|
11175
|
-
autogen: _foreach_copy
|
11369
|
+
autogen: _foreach_copy.out
|
11370
|
+
|
11371
|
+
- func: _foreach_copy(Tensor[] self, Tensor[] src, bool non_blocking=False) -> Tensor[] self_out
|
11372
|
+
device_check: NoCheck
|
11373
|
+
variants: function
|
11374
|
+
dispatch:
|
11375
|
+
CompositeExplicitAutograd: _foreach_copy
|
11176
11376
|
|
11177
11377
|
- func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
|
11178
11378
|
dispatch:
|
@@ -12341,6 +12541,7 @@
|
|
12341
12541
|
dispatch:
|
12342
12542
|
CPU: upsample_linear1d_out_cpu
|
12343
12543
|
CUDA: upsample_linear1d_out_cuda
|
12544
|
+
MPS: upsample_linear1d_out_mps
|
12344
12545
|
|
12345
12546
|
- func: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
|
12346
12547
|
python_module: nn
|
@@ -12352,6 +12553,7 @@
|
|
12352
12553
|
dispatch:
|
12353
12554
|
CPU: upsample_linear1d_backward_out_cpu
|
12354
12555
|
CUDA: upsample_linear1d_backward_out_cuda
|
12556
|
+
MPS: upsample_linear1d_backward_out_mps
|
12355
12557
|
|
12356
12558
|
- func: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
|
12357
12559
|
python_module: nn
|
@@ -12824,7 +13026,7 @@
|
|
12824
13026
|
SparseMeta: isinf_sparse_meta
|
12825
13027
|
SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
|
12826
13028
|
autogen: isinf.out
|
12827
|
-
tags: core
|
13029
|
+
tags: [core, pointwise]
|
12828
13030
|
|
12829
13031
|
- func: record_stream(Tensor(a!) self, Stream s) -> ()
|
12830
13032
|
variants: method
|
@@ -13750,11 +13952,18 @@
|
|
13750
13952
|
dispatch:
|
13751
13953
|
CPU, CUDA: linalg_eig_out
|
13752
13954
|
|
13955
|
+
- func: _linalg_eigvals(Tensor self) -> Tensor
|
13956
|
+
python_module: linalg
|
13957
|
+
dispatch:
|
13958
|
+
CPU, CUDA: _linalg_eigvals
|
13959
|
+
|
13753
13960
|
- func: linalg_eigvals(Tensor self) -> Tensor
|
13754
13961
|
python_module: linalg
|
13755
13962
|
|
13756
13963
|
- func: linalg_eigvals.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
13757
13964
|
python_module: linalg
|
13965
|
+
dispatch:
|
13966
|
+
CPU, CUDA: linalg_eigvals_out
|
13758
13967
|
|
13759
13968
|
# This function is exposes the `compute_v` flag, which is then used to implement `linalg.eigh` and
|
13760
13969
|
# `linalg.eigvalsh` as composite functions that call this one
|
@@ -14058,6 +14267,12 @@
|
|
14058
14267
|
# It is undocumented and should not be used outside of tests.
|
14059
14268
|
- func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
|
14060
14269
|
|
14270
|
+
# Note: for testing COW materialization within `at::parallel_for` loop function
|
14271
|
+
- func: _test_parallel_materialize(Tensor self, int num_parallel, bool skip_first=False) -> Tensor
|
14272
|
+
variants: function
|
14273
|
+
dispatch:
|
14274
|
+
CompositeExplicitAutograd: _test_parallel_materialize
|
14275
|
+
|
14061
14276
|
# Note: this function is only for testing.
|
14062
14277
|
- func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
|
14063
14278
|
python_module: nn
|
@@ -14392,6 +14607,7 @@
|
|
14392
14607
|
variants: function
|
14393
14608
|
dispatch:
|
14394
14609
|
CompositeExplicitAutograd: split_with_sizes_copy_out
|
14610
|
+
CUDA: split_with_sizes_copy_out_cuda
|
14395
14611
|
|
14396
14612
|
- func: view_copy(Tensor self, SymInt[] size) -> Tensor
|
14397
14613
|
variants: function
|
@@ -14428,6 +14644,16 @@
|
|
14428
14644
|
NestedTensorCUDA: NestedTensor_to_padded_tensor_cuda
|
14429
14645
|
autogen: to_padded_tensor.out
|
14430
14646
|
|
14647
|
+
- func: _jagged_to_padded_dense_forward(Tensor values, Tensor[] offsets, SymInt[] max_lengths, float padding_value=0.0) -> Tensor
|
14648
|
+
variants: function
|
14649
|
+
dispatch:
|
14650
|
+
CUDA: _fbgemm_jagged_to_padded_dense_forward
|
14651
|
+
|
14652
|
+
- func: _padded_dense_to_jagged_forward(Tensor dense, Tensor[] offsets, SymInt? total_L=None) -> Tensor
|
14653
|
+
variants: function
|
14654
|
+
dispatch:
|
14655
|
+
CUDA: _fbgemm_dense_to_jagged_forward_symint
|
14656
|
+
|
14431
14657
|
- func: _nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor
|
14432
14658
|
dispatch:
|
14433
14659
|
NestedTensorCPU: NestedTensor_softmax_dropout
|
@@ -14468,19 +14694,28 @@
|
|
14468
14694
|
|
14469
14695
|
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14470
14696
|
dispatch:
|
14471
|
-
CPU: _scaled_dot_product_flash_attention_cpu
|
14472
14697
|
CUDA: _scaled_dot_product_flash_attention_cuda
|
14473
14698
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
|
14474
14699
|
tags: nondeterministic_seeded
|
14475
14700
|
|
14701
|
+
- func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
|
14702
|
+
dispatch:
|
14703
|
+
CPU: _scaled_dot_product_flash_attention_cpu
|
14704
|
+
tags: nondeterministic_seeded
|
14705
|
+
|
14476
14706
|
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14477
14707
|
device_check: NoCheck
|
14478
14708
|
variants: function
|
14479
14709
|
dispatch:
|
14480
|
-
CPU: _scaled_dot_product_flash_attention_backward_cpu
|
14481
14710
|
CUDA: _scaled_dot_product_flash_attention_backward_cuda
|
14482
14711
|
NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
|
14483
14712
|
|
14713
|
+
- func: _scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
|
14714
|
+
device_check: NoCheck
|
14715
|
+
variants: function
|
14716
|
+
dispatch:
|
14717
|
+
CPU: _scaled_dot_product_flash_attention_cpu_backward
|
14718
|
+
|
14484
14719
|
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
|
14485
14720
|
dispatch:
|
14486
14721
|
CUDA: _scaled_dot_product_efficient_attention_cuda
|
@@ -14493,26 +14728,36 @@
|
|
14493
14728
|
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
|
14494
14729
|
tags: nondeterministic_seeded
|
14495
14730
|
|
14496
|
-
- func:
|
14731
|
+
- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14732
|
+
dispatch:
|
14733
|
+
CUDA: _scaled_dot_product_cudnn_attention_cuda
|
14734
|
+
tags: nondeterministic_seeded
|
14735
|
+
|
14736
|
+
- func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14737
|
+
dispatch:
|
14738
|
+
CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
|
14739
|
+
tags: nondeterministic_seeded
|
14740
|
+
|
14741
|
+
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
|
14497
14742
|
variants: function
|
14498
14743
|
dispatch:
|
14499
14744
|
CUDA: _flash_attention_forward
|
14500
14745
|
tags: nondeterministic_seeded
|
14501
14746
|
|
14502
|
-
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
|
14747
|
+
- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None) -> (Tensor, Tensor, Tensor)
|
14503
14748
|
device_check: NoCheck
|
14504
14749
|
variants: function
|
14505
14750
|
dispatch:
|
14506
14751
|
CUDA: _flash_attention_backward
|
14507
14752
|
|
14508
|
-
# Returns
|
14509
|
-
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k,
|
14753
|
+
# Returns output, logsumexp if compute_logsumexp
|
14754
|
+
- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt? max_seqlen_q, SymInt? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? seqlen_k=None, int? window_size=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
|
14510
14755
|
variants: function
|
14511
14756
|
dispatch:
|
14512
14757
|
CUDA: _efficient_attention_forward
|
14513
14758
|
tags: nondeterministic_seeded
|
14514
14759
|
|
14515
|
-
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
|
14760
|
+
- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None, int? window_size=None, bool shared_storage_dqdkdv=False) -> (Tensor, Tensor, Tensor, Tensor)
|
14516
14761
|
device_check: NoCheck
|
14517
14762
|
variants: function
|
14518
14763
|
dispatch:
|
@@ -15312,11 +15557,11 @@
|
|
15312
15557
|
CPU: foobar
|
15313
15558
|
autogen: _foobar.out
|
15314
15559
|
|
15315
|
-
# Fused Optimizer CUDA kernels.
|
15316
15560
|
- func: _fused_adam_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15317
15561
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15318
15562
|
variants: function
|
15319
15563
|
dispatch:
|
15564
|
+
CPU: _fused_adam_kernel_cpu_
|
15320
15565
|
CUDA: _fused_adam_kernel_cuda_
|
15321
15566
|
autogen: _fused_adam, _fused_adam.out
|
15322
15567
|
|
@@ -15326,6 +15571,7 @@
|
|
15326
15571
|
device_check: NoCheck
|
15327
15572
|
variants: function
|
15328
15573
|
dispatch:
|
15574
|
+
CPU: _fused_adam_kernel_cpu_
|
15329
15575
|
CUDA: _fused_adam_kernel_cuda_
|
15330
15576
|
autogen: _fused_adam.tensor_lr, _fused_adam.tensor_lr_out
|
15331
15577
|
|
@@ -15333,6 +15579,7 @@
|
|
15333
15579
|
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15334
15580
|
variants: function
|
15335
15581
|
dispatch:
|
15582
|
+
CPU: _fused_adamw_kernel_cpu_
|
15336
15583
|
CUDA: _fused_adamw_kernel_cuda_
|
15337
15584
|
autogen: _fused_adamw, _fused_adamw.out
|
15338
15585
|
|
@@ -15342,9 +15589,34 @@
|
|
15342
15589
|
device_check: NoCheck
|
15343
15590
|
variants: function
|
15344
15591
|
dispatch:
|
15592
|
+
CPU: _fused_adamw_kernel_cpu_
|
15345
15593
|
CUDA: _fused_adamw_kernel_cuda_
|
15346
15594
|
autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
|
15347
15595
|
|
15596
|
+
- func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15597
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15598
|
+
variants: function
|
15599
|
+
dispatch:
|
15600
|
+
CPU: _fused_sgd_kernel_cpu_
|
15601
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15602
|
+
autogen: _fused_sgd, _fused_sgd.out
|
15603
|
+
|
15604
|
+
- func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15605
|
+
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
|
15606
|
+
# but still skip the device check as the Tensor LR can be on CPU
|
15607
|
+
device_check: NoCheck
|
15608
|
+
variants: function
|
15609
|
+
dispatch:
|
15610
|
+
CPU: _fused_sgd_kernel_cpu_
|
15611
|
+
CUDA: _fused_sgd_kernel_cuda_
|
15612
|
+
autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
|
15613
|
+
|
15614
|
+
- func: _fused_adagrad_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor(d!)[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
|
15615
|
+
variants: function
|
15616
|
+
dispatch:
|
15617
|
+
CPU: _fused_adagrad_kernel_cpu_
|
15618
|
+
autogen: _fused_adagrad, _fused_adagrad.out
|
15619
|
+
|
15348
15620
|
# This op is ONLY used by pytorch/XLA in functionalization, and should never show up in vanilla eager mode or in any pytorch tracing contexts.
|
15349
15621
|
- func: _propagate_xla_data(Tensor input, Tensor output) -> ()
|
15350
15622
|
variants: function
|